diff --git a/src/core/annotation.js b/src/core/annotation.js index 20b7b7aa9..7ba31b017 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -454,16 +454,15 @@ var Annotation = (function AnnotationClosure() { var self = this; return resourcesPromise.then(function(resources) { - var opList = new OperatorList(); - opList.addOp(OPS.beginAnnotation, [data.rect, transform, matrix]); - return evaluator.getOperatorList(self.appearance, task, - resources, opList). - then(function () { - opList.addOp(OPS.endAnnotation, []); - self.appearance.reset(); - return opList; - }); + var opList = new OperatorList(); + opList.addOp(OPS.beginAnnotation, [data.rect, transform, matrix]); + return evaluator.getOperatorList(self.appearance, task, + resources, opList).then(function () { + opList.addOp(OPS.endAnnotation, []); + self.appearance.reset(); + return opList; }); + }); } }; @@ -758,10 +757,9 @@ var TextWidgetAnnotation = (function TextWidgetAnnotationClosure() { var stream = new Stream(stringToBytes(this.data.defaultAppearance)); return evaluator.getOperatorList(stream, task, this.fieldResources, - operatorList). - then(function () { - return operatorList; - }); + operatorList).then(function () { + return operatorList; + }); } }); diff --git a/src/core/evaluator.js b/src/core/evaluator.js index a19f1cdcc..f1e49c80f 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -114,6 +114,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { maxImageSize: -1, disableFontFace: false, disableNativeImageDecoder: false, + ignoreErrors: false, }; function NativeImageDecoder(xref, resources, handler, forceDataSchema) { @@ -342,9 +343,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { operatorList, task, initialState) { - var matrix = xobj.dict.getArray('Matrix'); - var bbox = xobj.dict.getArray('BBox'); - var group = xobj.dict.get('Group'); + var dict = xobj.dict; + var matrix = dict.getArray('Matrix'); + var bbox = dict.getArray('BBox'); + var group = dict.get('Group'); if (group) { var groupOptions = { matrix: matrix, @@ -374,8 +376,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { operatorList.addOp(OPS.paintFormXObjectBegin, [matrix, bbox]); return this.getOperatorList(xobj, task, - (xobj.dict.get('Resources') || resources), operatorList, initialState). - then(function () { + (dict.get('Resources') || resources), + operatorList, initialState).then(function () { operatorList.addOp(OPS.paintFormXObjectEnd, []); if (group) { @@ -522,7 +524,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } return this.buildFormXObject(resources, smaskContent, smaskOptions, - operatorList, task, stateManager.state.clone()); + operatorList, task, + stateManager.state.clone()); }, handleTilingType: @@ -538,14 +541,14 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return this.getOperatorList(pattern, task, patternResources, tilingOpList).then(function () { - // Add the dependencies to the parent operator list so they are - // resolved before sub operator list is executed synchronously. - operatorList.addDependencies(tilingOpList.dependencies); - operatorList.addOp(fn, getTilingPatternIR({ - fnArray: tilingOpList.fnArray, - argsArray: tilingOpList.argsArray - }, patternDict, args)); - }); + // Add the dependencies to the parent operator list so they are + // resolved before sub operator list is executed synchronously. + operatorList.addDependencies(tilingOpList.dependencies); + operatorList.addOp(fn, getTilingPatternIR({ + fnArray: tilingOpList.fnArray, + argsArray: tilingOpList.argsArray + }, patternDict, args)); + }); }, handleSetFont: @@ -899,7 +902,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { resources, operatorList, initialState) { - var self = this; var xref = this.xref; var imageCache = Object.create(null); @@ -913,6 +915,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager); var timeSlotManager = new TimeSlotManager(); + function closePendingRestoreOPS(argument) { + for (var i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) { + operatorList.addOp(OPS.restore, []); + } + } + return new Promise(function promiseBody(resolve, reject) { var next = function (promise) { promise.then(function () { @@ -1187,11 +1195,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } // Some PDFs don't close all restores inside object/form. // Closing those for them. - for (i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) { - operatorList.addOp(OPS.restore, []); - } + closePendingRestoreOPS(); resolve(); - }); + }).catch(function(reason) { + if (this.options.ignoreErrors) { + // Error(s) in the OperatorList -- sending unsupported feature + // notification and allow rendering to continue. + this.handler.send('UnsupportedFeature', + { featureId: UNSUPPORTED_FEATURES.unknown }); + warn('getOperatorList - ignoring errors during task: ' + task.name); + + closePendingRestoreOPS(); + return; + } + throw reason; + }.bind(this)); }, getTextContent: @@ -1660,19 +1678,24 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { break; } - stateManager.save(); + // Use a new `StateManager` to prevent incorrect positioning of + // textItems *after* the Form XObject, since errors in the data + // can otherwise prevent `restore` operators from being executed. + // NOTE: This is only an issue when `options.ignoreErrors = true`. + var currentState = stateManager.state.clone(); + var xObjStateManager = new StateManager(currentState); + var matrix = xobj.dict.getArray('Matrix'); if (isArray(matrix) && matrix.length === 6) { - stateManager.transform(matrix); + xObjStateManager.transform(matrix); } next(self.getTextContent(xobj, task, - xobj.dict.get('Resources') || resources, stateManager, + xobj.dict.get('Resources') || resources, xObjStateManager, normalizeWhitespace, combineTextItems).then( function (formTextContent) { Util.appendToArray(textContent.items, formTextContent.items); Util.extendObj(textContent.styles, formTextContent.styles); - stateManager.restore(); xobjsCache.key = name; xobjsCache.texts = formTextContent; @@ -1706,7 +1729,16 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } flushTextContentItem(); resolve(textContent); - }); + }).catch(function(reason) { + if (this.options.ignoreErrors) { + // Error(s) in the TextContent -- allow text-extraction to continue. + warn('getTextContent - ignoring errors during task: ' + task.name); + + flushTextContentItem(); + return textContent; + } + throw reason; + }.bind(this)); }, extractDataStructures: diff --git a/src/core/worker.js b/src/core/worker.js index ec8aa4b1c..2aa367a66 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -732,6 +732,7 @@ var WorkerMessageHandler = { maxImageSize: data.maxImageSize === undefined ? -1 : data.maxImageSize, disableFontFace: data.disableFontFace, disableNativeImageDecoder: data.disableNativeImageDecoder, + ignoreErrors: data.ignoreErrors, }; getPdfManager(data, evaluatorOptions).then(function (newPdfManager) { @@ -899,15 +900,14 @@ var WorkerMessageHandler = { handler.on('GetTextContent', function wphExtractText(data) { var pageIndex = data.pageIndex; - var normalizeWhitespace = data.normalizeWhitespace; - var combineTextItems = data.combineTextItems; return pdfManager.getPage(pageIndex).then(function(page) { var task = new WorkerTask('GetTextContent: page ' + pageIndex); startWorkerTask(task); + var pageNum = pageIndex + 1; var start = Date.now(); - return page.extractTextContent(handler, task, normalizeWhitespace, - combineTextItems).then( + return page.extractTextContent(handler, task, data.normalizeWhitespace, + data.combineTextItems).then( function(textContent) { finishWorkerTask(task); info('text indexing: page=' + pageNum + ' - time=' + diff --git a/src/display/api.js b/src/display/api.js index 984b27b72..89d76eadb 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -148,6 +148,10 @@ if (typeof PDFJSDev !== 'undefined' && * used when reading built-in CMap files. Providing a custom factory is useful * for environments without `XMLHttpRequest` support, such as e.g. Node.js. * The default value is {DOMCMapReaderFactory}. + * @property {boolean} stopAtErrors - (optional) Reject certain promises, e.g. + * `getOperatorList`, `getTextContent`, and `RenderTask`, when the associated + * PDF data cannot be successfully parsed, instead of attempting to recover + * whatever possible of the data. The default value is `false`. */ /** @@ -262,6 +266,7 @@ function getDocument(src, pdfDataRangeTransport, params.rangeChunkSize = params.rangeChunkSize || DEFAULT_RANGE_CHUNK_SIZE; params.disableNativeImageDecoder = params.disableNativeImageDecoder === true; + params.ignoreErrors = params.stopAtErrors !== true; var CMapReaderFactory = params.CMapReaderFactory || DOMCMapReaderFactory; if (!worker) { @@ -325,6 +330,7 @@ function _fetchDocument(worker, source, pdfDataRangeTransport, docId) { !isPostMessageTransfersDisabled, docBaseUrl: source.docBaseUrl, disableNativeImageDecoder: source.disableNativeImageDecoder, + ignoreErrors: source.ignoreErrors, }).then(function (workerId) { if (worker.destroyed) { throw new Error('Worker was destroyed'); @@ -826,8 +832,6 @@ var PDFPageProxy = (function PDFPageProxyClosure() { this.pendingCleanup = false; var renderingIntent = (params.intent === 'print' ? 'print' : 'display'); - var renderInteractiveForms = (params.renderInteractiveForms === true ? - true : /* Default */ false); var canvasFactory = params.canvasFactory || new DOMCanvasFactory(); if (!this.intentStates[renderingIntent]) { @@ -850,7 +854,7 @@ var PDFPageProxy = (function PDFPageProxyClosure() { this.transport.messageHandler.send('RenderPageRequest', { pageIndex: this.pageNumber - 1, intent: renderingIntent, - renderInteractiveForms: renderInteractiveForms, + renderInteractiveForms: (params.renderInteractiveForms === true), }); } @@ -914,7 +918,7 @@ var PDFPageProxy = (function PDFPageProxyClosure() { /** * @return {Promise} A promise resolved with an {@link PDFOperatorList} - * object that represents page's operator list. + * object that represents page's operator list. */ getOperatorList: function PDFPageProxy_getOperatorList() { function operatorListChanged() { @@ -950,7 +954,7 @@ var PDFPageProxy = (function PDFPageProxyClosure() { this.transport.messageHandler.send('RenderPageRequest', { pageIndex: this.pageIndex, - intent: renderingIntent + intent: renderingIntent, }); } return intentState.opListReadCapability.promise; @@ -962,12 +966,11 @@ var PDFPageProxy = (function PDFPageProxyClosure() { * object that represent the page text content. */ getTextContent: function PDFPageProxy_getTextContent(params) { + params = params || {}; return this.transport.messageHandler.sendWithPromise('GetTextContent', { pageIndex: this.pageNumber - 1, - normalizeWhitespace: (params && params.normalizeWhitespace === true ? - true : /* Default */ false), - combineTextItems: (params && params.disableCombineTextItems === true ? - false : /* Default */ true), + normalizeWhitespace: (params.normalizeWhitespace === true), + combineTextItems: (params.disableCombineTextItems !== true), }); }, diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 037c4f5ab..0a4dc2b43 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -21,6 +21,7 @@ !issue5874.pdf !issue5808.pdf !issue6204.pdf +!issue6342.pdf !issue6652.pdf !issue6782.pdf !issue6901.pdf diff --git a/test/pdfs/bug1130815.pdf.link b/test/pdfs/bug1130815.pdf.link new file mode 100644 index 000000000..38b52e222 --- /dev/null +++ b/test/pdfs/bug1130815.pdf.link @@ -0,0 +1 @@ +https://bug1130815.bmoattachments.org/attachment.cgi?id=8560958 diff --git a/test/pdfs/issue6342.pdf b/test/pdfs/issue6342.pdf new file mode 100644 index 000000000..9a462a188 --- /dev/null +++ b/test/pdfs/issue6342.pdf @@ -0,0 +1,142 @@ +%PDF-1.7 +%âãÏÓ +1 0 obj +<< +/Kids [2 0 R] +/Count 1 +/Type /Pages +>> +endobj +2 0 obj +<< +/Group 3 0 R +/Parent 1 0 R +/Resources 4 0 R +/MediaBox [0 0 300 100] +/Type /Page +/Contents 5 0 R +>> +endobj +3 0 obj +<< +/CS /DeviceRGB +/Type /Group +/S /Transparency +>> +endobj +4 0 obj +<< +/Font +<< +/F1 6 0 R +>> +/XObject +<< +/Im1 7 0 R +>> +>> +endobj +5 0 obj +<< +/Length 193 +>> +stream +q +1 0 0 1 10 80 cm +0 0 0 rg 0 0 0 RG +1 w +0 0 m +280 0 l S +Q +q +1 0 0 1 25 45 cm +/Im1 Do +1 0 0 1 100 0 cm +/Im1 Do +Q +q +1 0 0 1 10 20 cm +BT +/F1 18 Tf +(Issue 6342 - Form XObject with errors) Tj +ET +Q + +endstream +endobj +7 0 obj +<< +/Group 3 0 R +/Subtype /Form +/Length 1050 +/Resources +<< +/ExtGState +<< +/a0 +<< +/ca 1 +/CA 1 +>> +>> +>> +/FormType 1 +/BBox [0 0 45 25] +/Type /XObject +>> +stream +q +0.2 0.8 0.2 rg /a0 gs +13.117 22.651 m 11.281 22.651 9.809 21.163 9.809 19.327 c 9.809 18.733 +9.961 18.174 10.234 17.69 c 11.34 18.315 12.621 18.678 13.98 18.678 c +14.113 18.678 14.238 18.674 14.367 18.666 c 14.352 18.85 14.344 19.038 +14.344 19.229 c 14.344 20.252 14.566 21.225 14.957 22.1 c 14.43 22.455 +13.801 22.651 13.117 22.651 c h +13.117 22.651 m f +6.383 12.92 m 2.859 12.92 0 10.084 0 6.561 c 0 3.034 2.859 0.174 6.383 +0.174 c 7.727 0.174 8.969 0.592 9.996 1.299 c 9.57 1.959 9.32 2.748 +9.32 3.584 .020.594 6. c 499 c08. c830.174 586.17 21.17436 +8.4 6.17436 9 c030.1717436 9 c 18.6.418.85930784 07.859 05.1717c08. +859 09.6.442.859 12m f +6.383 12651 m f +6.383 12.9f +678 13.757 5.651727563.757 5.7.0 2.8 c858.7.0 2.8 c030.177.0 2.09 8 +8.136.1778899 797 5.521265172496.17873.8 c90674 c95.65153174 c95.c.455 c 7.4 c95.651918.7.770.252105.7.74522.1 c047 61 18.67802 61623.67802 748069.229 c02 1 m 13.9 c719.651202 15c90678 c809.3215c195.654 18.6746 3. +117768.674469..75728.229 c 09..7578.6741452.757 5.678 13.757 5.651 c h 13.757 5.65f +Q + +endstream +endobj +6 0 obj +<< +/BaseFont /Times-Roman +/Subtype /Type1 +/Encoding /WinAnsiEncoding +/Type /Font +>> +endobj +8 0 obj +<< +/Pages 1 0 R +/Type /Catalog +>> +endobj xref +0 9 +0000000000 65535 f +0000000015 00000 n +0000000074 00000 n +0000000193 00000 n +0000000261 00000 n +0000000334 00000 n +0000001818 00000 n +0000000581 00000 n +0000001919 00000 n +trailer + +<< +/Root 8 0 R +/Size 9 +>> +startxref +1969 +%%EOF diff --git a/test/test_manifest.json b/test/test_manifest.json index e512cad94..67cbc9e1e 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1500,6 +1500,20 @@ "lastPage": 1, "type": "load" }, + { "id": "bug1130815-eq", + "file": "pdfs/bug1130815.pdf", + "md5": "3ff3b550c3af766991b2a1b11d00de85", + "rounds": 1, + "link": true, + "type": "eq" + }, + { "id": "bug1130815-text", + "file": "pdfs/bug1130815.pdf", + "md5": "3ff3b550c3af766991b2a1b11d00de85", + "rounds": 1, + "link": true, + "type": "text" + }, { "id": "issue3248", "file": "pdfs/issue3248.pdf", "md5": "970767ed68de46c316d74de67965999b", @@ -1532,6 +1546,20 @@ "lastPage": 1, "type": "load" }, + { "id": "issue6342-eq", + "file": "pdfs/issue6342.pdf", + "md5": "2ea85ca8d17117798f105be88bdb2bfd", + "rounds": 1, + "link": false, + "type": "eq" + }, + { "id": "issue6342-text", + "file": "pdfs/issue6342.pdf", + "md5": "2ea85ca8d17117798f105be88bdb2bfd", + "rounds": 1, + "link": false, + "type": "text" + }, { "id": "issue7020", "file": "pdfs/issue7020.pdf", "md5": "93b464e21c649e64ae92eeafe99fc31b",