From 5335285cda1a92ca4f966bad3c1b61a4a580c4d1 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 21 Apr 2019 17:03:38 +0200 Subject: [PATCH] Attempt to handle corrupt PDF documents that contains path operators inside of text object (issue 10542) First of all, while this simple approach appears to work OK in practice I'm not sure if it's the best way of addressing the problem (assuming that you even want to). Second of all, while the solution implemented here only requires tracking/checking one new boolean in order for this to work, I'm nonetheless not entirely happy about this since it will add additional overhead (albeit *very* small) to the parsing of path operators in PDF documents just for a handful of *corrupt* ones. --- src/core/evaluator.js | 29 ++++++++++-- test/pdfs/.gitignore | 1 + test/pdfs/issue10542_reduced.pdf | 81 ++++++++++++++++++++++++++++++++ test/test_manifest.json | 7 +++ 4 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 test/pdfs/issue10542_reduced.pdf diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 8f7917f77..dc88db6ca 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -822,14 +822,30 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return fontCapability.promise; }, - buildPath: function PartialEvaluator_buildPath(operatorList, fn, args) { + buildPath(operatorList, fn, args, parsingText = false) { var lastIndex = operatorList.length - 1; if (!args) { args = []; } if (lastIndex < 0 || operatorList.fnArray[lastIndex] !== OPS.constructPath) { + // Handle corrupt PDF documents that contains path operators inside of + // text objects, which may shift subsequent text, by enclosing the path + // operator in save/restore operators (fixes issue10542_reduced.pdf). + // + // Note that this will effectively disable the optimization in the + // `else` branch below, but given that this type of corruption is + // *extremely* rare that shouldn't really matter much in practice. + if (parsingText) { + warn(`Encountered path operator "${fn}" inside of a text object.`); + operatorList.addOp(OPS.save, null); + } + operatorList.addOp(OPS.constructPath, [[fn], args]); + + if (parsingText) { + operatorList.addOp(OPS.restore, null); + } } else { var opArgs = operatorList.argsArray[lastIndex]; opArgs[0].push(fn); @@ -881,6 +897,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var self = this; var xref = this.xref; + let parsingText = false; var imageCache = Object.create(null); var xobjs = (resources.get('XObject') || Dict.empty); @@ -999,6 +1016,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { operatorList.addOp(OPS.setFont, [loadedName, fontSize]); })); return; + case OPS.beginText: + parsingText = true; + break; + case OPS.endText: + parsingText = false; + break; case OPS.endInlineImage: var cacheKey = args[0].cacheKey; if (cacheKey) { @@ -1158,10 +1181,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { case OPS.curveTo2: case OPS.curveTo3: case OPS.closePath: - self.buildPath(operatorList, fn, args); - continue; case OPS.rectangle: - self.buildPath(operatorList, fn, args); + self.buildPath(operatorList, fn, args, parsingText); continue; case OPS.markPoint: case OPS.markPointProps: diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index fd0935f1a..343eeef41 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -76,6 +76,7 @@ !issue10388_reduced.pdf !issue10438_reduced.pdf !issue10529.pdf +!issue10542_reduced.pdf !issue10665_reduced.pdf !bad-PageLabels.pdf !decodeACSuccessive.pdf diff --git a/test/pdfs/issue10542_reduced.pdf b/test/pdfs/issue10542_reduced.pdf new file mode 100644 index 000000000..ffc868895 --- /dev/null +++ b/test/pdfs/issue10542_reduced.pdf @@ -0,0 +1,81 @@ +%PDF-1.5 +%âãÏÓ +1 0 obj +<< +/Type /Catalog +/Pages 2 0 R +>> +endobj + +2 0 obj +<< +/Type /Pages +/Count 1 +/Kids [3 0 R] +>> +endobj + +3 0 obj +<< +/Type /Page +/Parent 2 0 R +/Contents 6 0 R +/MediaBox [0 0 350 100] +/Resources 4 0 R +>> +endobj + +4 0 obj +<< + /Font << /F1 5 0 R >> +>> +endobj + +5 0 obj +<< +/Type /Font +/Subtype /Type1 +/BaseFont /Helvetica +/Encoding /WinAnsiEncoding +>> +endobj + +6 0 obj +<< /Length 165 >> +stream +BT + 1 0 0 1 25 44 Tm + /F1 25 Tf + 0 0 0 rg + (Abc ) Tj + 0 0 1 RG + 74 40 m + 265 40 l + S + 0 0 1 rg + (www.google.com ) Tj + 0 0 0 rg + (test) Tj +ET +endstream +endobj + +xref +0 7 +0000000000 65535 f +0000000017 00000 n +0000000074 00000 n +0000000140 00000 n +0000000255 00000 n +0000000307 00000 n +0000000414 00000 n + +trailer +<< +/Size 7 +/Root 1 0 R +/ID [<281dda44e224156a5143dc0ac9d261ed> <281dda44e224156a5143dc0ac9d261ed>] +>> +startxref +638 +%%EOF diff --git a/test/test_manifest.json b/test/test_manifest.json index 201cafb42..20605a9b2 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -848,6 +848,13 @@ "firstPage": 2, "type": "eq" }, + { "id": "issue10542", + "file": "pdfs/issue10542_reduced.pdf", + "md5": "92406cb903be6c7a63221ba61fcb8eaf", + "rounds": 1, + "link": false, + "type": "eq" + }, { "id": "issue6289", "file": "pdfs/issue6289.pdf", "md5": "0869f3d147c734ec484ffd492104095d",