Fix getTextContent evaluation to only apply TJ horizontal offsets using numeric items/args

While the array argument to TJ should only contain strings and numbers, other
unfortunate items are found in PDFs in the wild, e.g.:

[(Grandes) 0.0 Tc
-250.0 (Client\350les,) 0.0 Tc
-250.0 (Financements) 0.0 Tc
-250.0 (et) 0.0 Tc
-250.0 (March\351s) ] TJ

getOperatorList already properly ignores any non-string, non-numeric values in
TJ arrays; without this patch to getTextContent, returned text items can have
NaN widths due to calculations being applied to those non-numeric values.
This commit is contained in:
Chas Emerick 2016-10-13 07:47:17 -04:00
parent 8c5b925547
commit 85c52f1fd6
4 changed files with 79 additions and 1 deletions

View File

@ -1531,7 +1531,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
for (var j = 0, jj = items.length; j < jj; j++) { for (var j = 0, jj = items.length; j < jj; j++) {
if (typeof items[j] === 'string') { if (typeof items[j] === 'string') {
buildTextContentItem(items[j]); buildTextContentItem(items[j]);
} else { } else if (isNum(items[j])) {
ensureTextContentItem(); ensureTextContentItem();
// PDF Specification 5.3.2 states: // PDF Specification 5.3.2 states:

View File

@ -258,3 +258,4 @@
!annotation-text-widget.pdf !annotation-text-widget.pdf
!annotation-choice-widget.pdf !annotation-choice-widget.pdf
!zero_descent.pdf !zero_descent.pdf
!operator-in-TJ-array.pdf

View File

@ -0,0 +1,70 @@
%PDF-1.3
1 0 obj
<</Kids [8 0 R]
/Type /Pages
/Count 1>>
endobj
2 0 obj
<</Type /Catalog
/Pages 1 0 R>>
endobj
3 0 obj
<</ProcSet [/PDF /ImageC /Text]
/Font <</F7 10 0 R>>>>
endobj
4 0 obj
<</Type /Info
/Producer (null)>>
endobj
5 0 obj
<< >>
stream
BT
/F7 10 Tf
0 g
0.0 Tc
1 0 0 1 22.677 732.083 Tm [(Grandes) 0.0 Tc
-250.0 (Client\350les,) 0.0 Tc
-250.0 (Financements) 0.0 Tc
-250.0 (et) 0.0 Tc
-250.0 (March\351s) ] TJ
0.0 Tc
ET
endstream
endobj
8 0 obj
<</Contents 5 0 R
/Type /Page
/Resources 3 0 R
/Parent 1 0 R
/MediaBox [0 0 595 839]>>
endobj
10 0 obj
<</Name /F7
/Subtype /Type1
/Type /Font
/BaseFont /Times-Bold
/Encoding /WinAnsiEncoding>>
xref
0 11
0000000000 65535 f
0000000009 00000 n
0000000070 00000 n
0000000120 00000 n
0000000195 00000 n
0000000249 00000 n
0000000000 65535 f
0000000000 65535 f
0000000470 00000 n
0000000000 65535 f
0000000590 00000 n
trailer
<<
/Size 13
/Root 2 0 R
/Info 4 0 R
>>
startxref
707
%%EOF

View File

@ -3267,5 +3267,12 @@
"rounds": 1, "rounds": 1,
"lastPage": 1, "lastPage": 1,
"type": "text" "type": "text"
},
{ "id": "operator-in-TJ-array",
"file": "pdfs/operator-in-TJ-array.pdf",
"md5": "dfe0f15a45be18eca142adaf760984ee",
"link": false,
"rounds": 1,
"type": "text"
} }
] ]