Merge pull request #10756 from Snuffleupagus/issue-10542

Attempt to handle corrupt PDF documents that contains path operators inside of text object (issue 10542)
This commit is contained in:
Tim van der Meij 2019-05-02 22:29:24 +02:00 committed by GitHub
commit 155304a0c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 114 additions and 4 deletions

View File

@ -822,14 +822,30 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return fontCapability.promise;
},
buildPath: function PartialEvaluator_buildPath(operatorList, fn, args) {
buildPath(operatorList, fn, args, parsingText = false) {
var lastIndex = operatorList.length - 1;
if (!args) {
args = [];
}
if (lastIndex < 0 ||
operatorList.fnArray[lastIndex] !== OPS.constructPath) {
// Handle corrupt PDF documents that contains path operators inside of
// text objects, which may shift subsequent text, by enclosing the path
// operator in save/restore operators (fixes issue10542_reduced.pdf).
//
// Note that this will effectively disable the optimization in the
// `else` branch below, but given that this type of corruption is
// *extremely* rare that shouldn't really matter much in practice.
if (parsingText) {
warn(`Encountered path operator "${fn}" inside of a text object.`);
operatorList.addOp(OPS.save, null);
}
operatorList.addOp(OPS.constructPath, [[fn], args]);
if (parsingText) {
operatorList.addOp(OPS.restore, null);
}
} else {
var opArgs = operatorList.argsArray[lastIndex];
opArgs[0].push(fn);
@ -881,6 +897,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var self = this;
var xref = this.xref;
let parsingText = false;
var imageCache = Object.create(null);
var xobjs = (resources.get('XObject') || Dict.empty);
@ -999,6 +1016,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
operatorList.addOp(OPS.setFont, [loadedName, fontSize]);
}));
return;
case OPS.beginText:
parsingText = true;
break;
case OPS.endText:
parsingText = false;
break;
case OPS.endInlineImage:
var cacheKey = args[0].cacheKey;
if (cacheKey) {
@ -1158,10 +1181,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
case OPS.curveTo2:
case OPS.curveTo3:
case OPS.closePath:
self.buildPath(operatorList, fn, args);
continue;
case OPS.rectangle:
self.buildPath(operatorList, fn, args);
self.buildPath(operatorList, fn, args, parsingText);
continue;
case OPS.markPoint:
case OPS.markPointProps:

View File

@ -76,6 +76,7 @@
!issue10388_reduced.pdf
!issue10438_reduced.pdf
!issue10529.pdf
!issue10542_reduced.pdf
!issue10665_reduced.pdf
!bad-PageLabels.pdf
!decodeACSuccessive.pdf

View File

@ -0,0 +1,81 @@
%PDF-1.5
%âãÏÓ
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Count 1
/Kids [3 0 R]
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/Contents 6 0 R
/MediaBox [0 0 350 100]
/Resources 4 0 R
>>
endobj
4 0 obj
<<
/Font << /F1 5 0 R >>
>>
endobj
5 0 obj
<<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
>>
endobj
6 0 obj
<< /Length 165 >>
stream
BT
1 0 0 1 25 44 Tm
/F1 25 Tf
0 0 0 rg
(Abc ) Tj
0 0 1 RG
74 40 m
265 40 l
S
0 0 1 rg
(www.google.com ) Tj
0 0 0 rg
(test) Tj
ET
endstream
endobj
xref
0 7
0000000000 65535 f
0000000017 00000 n
0000000074 00000 n
0000000140 00000 n
0000000255 00000 n
0000000307 00000 n
0000000414 00000 n
trailer
<<
/Size 7
/Root 1 0 R
/ID [<281dda44e224156a5143dc0ac9d261ed> <281dda44e224156a5143dc0ac9d261ed>]
>>
startxref
638
%%EOF

View File

@ -848,6 +848,13 @@
"firstPage": 2,
"type": "eq"
},
{ "id": "issue10542",
"file": "pdfs/issue10542_reduced.pdf",
"md5": "92406cb903be6c7a63221ba61fcb8eaf",
"rounds": 1,
"link": false,
"type": "eq"
},
{ "id": "issue6289",
"file": "pdfs/issue6289.pdf",
"md5": "0869f3d147c734ec484ffd492104095d",