From 85c52f1fd67f540b5defa671fb324de0b175c868 Mon Sep 17 00:00:00 2001 From: Chas Emerick Date: Thu, 13 Oct 2016 07:47:17 -0400 Subject: [PATCH] Fix getTextContent evaluation to only apply TJ horizontal offsets using numeric items/args While the array argument to TJ should only contain strings and numbers, other unfortunate items are found in PDFs in the wild, e.g.: [(Grandes) 0.0 Tc -250.0 (Client\350les,) 0.0 Tc -250.0 (Financements) 0.0 Tc -250.0 (et) 0.0 Tc -250.0 (March\351s) ] TJ getOperatorList already properly ignores any non-string, non-numeric values in TJ arrays; without this patch to getTextContent, returned text items can have NaN widths due to calculations being applied to those non-numeric values. --- src/core/evaluator.js | 2 +- test/pdfs/.gitignore | 1 + test/pdfs/operator-in-TJ-array.pdf | 70 ++++++++++++++++++++++++++++++ test/test_manifest.json | 7 +++ 4 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 test/pdfs/operator-in-TJ-array.pdf diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 5a0450efe..3f05f820b 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -1531,7 +1531,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { for (var j = 0, jj = items.length; j < jj; j++) { if (typeof items[j] === 'string') { buildTextContentItem(items[j]); - } else { + } else if (isNum(items[j])) { ensureTextContentItem(); // PDF Specification 5.3.2 states: diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 173e86f1f..792230050 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -258,3 +258,4 @@ !annotation-text-widget.pdf !annotation-choice-widget.pdf !zero_descent.pdf +!operator-in-TJ-array.pdf diff --git a/test/pdfs/operator-in-TJ-array.pdf b/test/pdfs/operator-in-TJ-array.pdf new file mode 100644 index 000000000..09b2655d8 --- /dev/null +++ b/test/pdfs/operator-in-TJ-array.pdf @@ -0,0 +1,70 @@ +%PDF-1.3 +1 0 obj +<> +endobj +2 0 obj +<> +endobj +3 0 obj +<>>> +endobj +4 0 obj +<> +endobj +5 0 obj +<< >> +stream +BT +/F7 10 Tf +0 g +0.0 Tc +1 0 0 1 22.677 732.083 Tm [(Grandes) 0.0 Tc +-250.0 (Client\350les,) 0.0 Tc +-250.0 (Financements) 0.0 Tc +-250.0 (et) 0.0 Tc +-250.0 (March\351s) ] TJ +0.0 Tc +ET + +endstream +endobj +8 0 obj +<> +endobj +10 0 obj +<> +xref +0 11 +0000000000 65535 f +0000000009 00000 n +0000000070 00000 n +0000000120 00000 n +0000000195 00000 n +0000000249 00000 n +0000000000 65535 f +0000000000 65535 f +0000000470 00000 n +0000000000 65535 f +0000000590 00000 n +trailer +<< +/Size 13 +/Root 2 0 R +/Info 4 0 R +>> +startxref +707 +%%EOF diff --git a/test/test_manifest.json b/test/test_manifest.json index 231921876..8b4a6064a 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -3267,5 +3267,12 @@ "rounds": 1, "lastPage": 1, "type": "text" + }, + { "id": "operator-in-TJ-array", + "file": "pdfs/operator-in-TJ-array.pdf", + "md5": "dfe0f15a45be18eca142adaf760984ee", + "link": false, + "rounds": 1, + "type": "text" } ]