Merge pull request #8240 from Snuffleupagus/api-stopAtErrors

[api-minor] Always allow e.g. rendering to continue even if there are errors, and add a `stopAtErrors` parameter to `getDocument` to opt-out of this behaviour (issue 6342, issue 3795, bug 1130815)
This commit is contained in:
Yury Delendik 2017-04-13 10:58:49 -05:00 committed by GitHub
commit c4c44c1bbe
8 changed files with 272 additions and 52 deletions

View File

@ -457,16 +457,15 @@ var Annotation = (function AnnotationClosure() {
var self = this;
return resourcesPromise.then(function(resources) {
var opList = new OperatorList();
opList.addOp(OPS.beginAnnotation, [data.rect, transform, matrix]);
return evaluator.getOperatorList(self.appearance, task,
resources, opList).
then(function () {
opList.addOp(OPS.endAnnotation, []);
self.appearance.reset();
return opList;
});
var opList = new OperatorList();
opList.addOp(OPS.beginAnnotation, [data.rect, transform, matrix]);
return evaluator.getOperatorList(self.appearance, task,
resources, opList).then(function () {
opList.addOp(OPS.endAnnotation, []);
self.appearance.reset();
return opList;
});
});
}
};
@ -761,10 +760,9 @@ var TextWidgetAnnotation = (function TextWidgetAnnotationClosure() {
var stream = new Stream(stringToBytes(this.data.defaultAppearance));
return evaluator.getOperatorList(stream, task, this.fieldResources,
operatorList).
then(function () {
return operatorList;
});
operatorList).then(function () {
return operatorList;
});
}
});

View File

@ -114,6 +114,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
maxImageSize: -1,
disableFontFace: false,
disableNativeImageDecoder: false,
ignoreErrors: false,
};
function NativeImageDecoder(xref, resources, handler, forceDataSchema) {
@ -266,6 +267,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var TILING_PATTERN = 1, SHADING_PATTERN = 2;
PartialEvaluator.prototype = {
clone: function(newOptions) {
newOptions = newOptions || DefaultPartialEvaluatorOptions;
var newEvaluator = Object.create(this);
newEvaluator.options = newOptions;
return newEvaluator;
},
hasBlendModes: function PartialEvaluator_hasBlendModes(resources) {
if (!isDict(resources)) {
return false;
@ -342,9 +350,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
operatorList,
task,
initialState) {
var matrix = xobj.dict.getArray('Matrix');
var bbox = xobj.dict.getArray('BBox');
var group = xobj.dict.get('Group');
var dict = xobj.dict;
var matrix = dict.getArray('Matrix');
var bbox = dict.getArray('BBox');
var group = dict.get('Group');
if (group) {
var groupOptions = {
matrix: matrix,
@ -374,8 +383,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
operatorList.addOp(OPS.paintFormXObjectBegin, [matrix, bbox]);
return this.getOperatorList(xobj, task,
(xobj.dict.get('Resources') || resources), operatorList, initialState).
then(function () {
(dict.get('Resources') || resources),
operatorList, initialState).then(function () {
operatorList.addOp(OPS.paintFormXObjectEnd, []);
if (group) {
@ -522,7 +531,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
return this.buildFormXObject(resources, smaskContent, smaskOptions,
operatorList, task, stateManager.state.clone());
operatorList, task,
stateManager.state.clone());
},
handleTilingType:
@ -538,14 +548,14 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return this.getOperatorList(pattern, task, patternResources,
tilingOpList).then(function () {
// Add the dependencies to the parent operator list so they are
// resolved before sub operator list is executed synchronously.
operatorList.addDependencies(tilingOpList.dependencies);
operatorList.addOp(fn, getTilingPatternIR({
fnArray: tilingOpList.fnArray,
argsArray: tilingOpList.argsArray
}, patternDict, args));
});
// Add the dependencies to the parent operator list so they are
// resolved before sub operator list is executed synchronously.
operatorList.addDependencies(tilingOpList.dependencies);
operatorList.addOp(fn, getTilingPatternIR({
fnArray: tilingOpList.fnArray,
argsArray: tilingOpList.argsArray
}, patternDict, args));
});
},
handleSetFont:
@ -899,7 +909,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
resources,
operatorList,
initialState) {
var self = this;
var xref = this.xref;
var imageCache = Object.create(null);
@ -913,6 +922,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
var timeSlotManager = new TimeSlotManager();
function closePendingRestoreOPS(argument) {
for (var i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) {
operatorList.addOp(OPS.restore, []);
}
}
return new Promise(function promiseBody(resolve, reject) {
var next = function (promise) {
promise.then(function () {
@ -1187,11 +1202,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
// Some PDFs don't close all restores inside object/form.
// Closing those for them.
for (i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) {
operatorList.addOp(OPS.restore, []);
}
closePendingRestoreOPS();
resolve();
});
}).catch(function(reason) {
if (this.options.ignoreErrors) {
// Error(s) in the OperatorList -- sending unsupported feature
// notification and allow rendering to continue.
this.handler.send('UnsupportedFeature',
{ featureId: UNSUPPORTED_FEATURES.unknown });
warn('getOperatorList - ignoring errors during task: ' + task.name);
closePendingRestoreOPS();
return;
}
throw reason;
}.bind(this));
},
getTextContent:
@ -1660,19 +1685,24 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
break;
}
stateManager.save();
// Use a new `StateManager` to prevent incorrect positioning of
// textItems *after* the Form XObject, since errors in the data
// can otherwise prevent `restore` operators from being executed.
// NOTE: This is only an issue when `options.ignoreErrors = true`.
var currentState = stateManager.state.clone();
var xObjStateManager = new StateManager(currentState);
var matrix = xobj.dict.getArray('Matrix');
if (isArray(matrix) && matrix.length === 6) {
stateManager.transform(matrix);
xObjStateManager.transform(matrix);
}
next(self.getTextContent(xobj, task,
xobj.dict.get('Resources') || resources, stateManager,
xobj.dict.get('Resources') || resources, xObjStateManager,
normalizeWhitespace, combineTextItems).then(
function (formTextContent) {
Util.appendToArray(textContent.items, formTextContent.items);
Util.extendObj(textContent.styles, formTextContent.styles);
stateManager.restore();
xobjsCache.key = name;
xobjsCache.texts = formTextContent;
@ -1706,7 +1736,16 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
flushTextContentItem();
resolve(textContent);
});
}).catch(function(reason) {
if (this.options.ignoreErrors) {
// Error(s) in the TextContent -- allow text-extraction to continue.
warn('getTextContent - ignoring errors during task: ' + task.name);
flushTextContentItem();
return textContent;
}
throw reason;
}.bind(this));
},
extractDataStructures:
@ -2468,6 +2507,12 @@ var TranslatedFont = (function TranslatedFontClosure() {
if (this.type3Loaded) {
return this.type3Loaded;
}
// When parsing Type3 glyphs, always ignore them if there are errors.
// Compared to the parsing of e.g. an entire page, it doesn't really
// make sense to only be able to render a Type3 glyph partially.
var type3Options = Object.create(evaluator.options);
type3Options.ignoreErrors = false;
var type3Evaluator = evaluator.clone(type3Options);
var translatedFont = this.font;
var loadCharProcsPromise = Promise.resolve();
@ -2475,12 +2520,14 @@ var TranslatedFont = (function TranslatedFontClosure() {
var fontResources = this.dict.get('Resources') || resources;
var charProcKeys = charProcs.getKeys();
var charProcOperatorList = Object.create(null);
for (var i = 0, n = charProcKeys.length; i < n; ++i) {
loadCharProcsPromise = loadCharProcsPromise.then(function (key) {
var glyphStream = charProcs.get(key);
var operatorList = new OperatorList();
return evaluator.getOperatorList(glyphStream, task, fontResources,
operatorList).then(function () {
return type3Evaluator.getOperatorList(glyphStream, task,
fontResources, operatorList).
then(function () {
charProcOperatorList[key] = operatorList.getIR();
// Add the dependencies to the parent operator list so they are

View File

@ -732,6 +732,7 @@ var WorkerMessageHandler = {
maxImageSize: data.maxImageSize === undefined ? -1 : data.maxImageSize,
disableFontFace: data.disableFontFace,
disableNativeImageDecoder: data.disableNativeImageDecoder,
ignoreErrors: data.ignoreErrors,
};
getPdfManager(data, evaluatorOptions).then(function (newPdfManager) {
@ -899,15 +900,14 @@ var WorkerMessageHandler = {
handler.on('GetTextContent', function wphExtractText(data) {
var pageIndex = data.pageIndex;
var normalizeWhitespace = data.normalizeWhitespace;
var combineTextItems = data.combineTextItems;
return pdfManager.getPage(pageIndex).then(function(page) {
var task = new WorkerTask('GetTextContent: page ' + pageIndex);
startWorkerTask(task);
var pageNum = pageIndex + 1;
var start = Date.now();
return page.extractTextContent(handler, task, normalizeWhitespace,
combineTextItems).then(
return page.extractTextContent(handler, task, data.normalizeWhitespace,
data.combineTextItems).then(
function(textContent) {
finishWorkerTask(task);
info('text indexing: page=' + pageNum + ' - time=' +

View File

@ -148,6 +148,10 @@ if (typeof PDFJSDev !== 'undefined' &&
* used when reading built-in CMap files. Providing a custom factory is useful
* for environments without `XMLHttpRequest` support, such as e.g. Node.js.
* The default value is {DOMCMapReaderFactory}.
* @property {boolean} stopAtErrors - (optional) Reject certain promises, e.g.
* `getOperatorList`, `getTextContent`, and `RenderTask`, when the associated
* PDF data cannot be successfully parsed, instead of attempting to recover
* whatever possible of the data. The default value is `false`.
*/
/**
@ -262,6 +266,7 @@ function getDocument(src, pdfDataRangeTransport,
params.rangeChunkSize = params.rangeChunkSize || DEFAULT_RANGE_CHUNK_SIZE;
params.disableNativeImageDecoder = params.disableNativeImageDecoder === true;
params.ignoreErrors = params.stopAtErrors !== true;
var CMapReaderFactory = params.CMapReaderFactory || DOMCMapReaderFactory;
if (!worker) {
@ -325,6 +330,7 @@ function _fetchDocument(worker, source, pdfDataRangeTransport, docId) {
!isPostMessageTransfersDisabled,
docBaseUrl: source.docBaseUrl,
disableNativeImageDecoder: source.disableNativeImageDecoder,
ignoreErrors: source.ignoreErrors,
}).then(function (workerId) {
if (worker.destroyed) {
throw new Error('Worker was destroyed');
@ -826,8 +832,6 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
this.pendingCleanup = false;
var renderingIntent = (params.intent === 'print' ? 'print' : 'display');
var renderInteractiveForms = (params.renderInteractiveForms === true ?
true : /* Default */ false);
var canvasFactory = params.canvasFactory || new DOMCanvasFactory();
if (!this.intentStates[renderingIntent]) {
@ -850,7 +854,7 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
this.transport.messageHandler.send('RenderPageRequest', {
pageIndex: this.pageNumber - 1,
intent: renderingIntent,
renderInteractiveForms: renderInteractiveForms,
renderInteractiveForms: (params.renderInteractiveForms === true),
});
}
@ -914,7 +918,7 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
/**
* @return {Promise} A promise resolved with an {@link PDFOperatorList}
* object that represents page's operator list.
* object that represents page's operator list.
*/
getOperatorList: function PDFPageProxy_getOperatorList() {
function operatorListChanged() {
@ -950,7 +954,7 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
this.transport.messageHandler.send('RenderPageRequest', {
pageIndex: this.pageIndex,
intent: renderingIntent
intent: renderingIntent,
});
}
return intentState.opListReadCapability.promise;
@ -962,12 +966,11 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
* object that represent the page text content.
*/
getTextContent: function PDFPageProxy_getTextContent(params) {
params = params || {};
return this.transport.messageHandler.sendWithPromise('GetTextContent', {
pageIndex: this.pageNumber - 1,
normalizeWhitespace: (params && params.normalizeWhitespace === true ?
true : /* Default */ false),
combineTextItems: (params && params.disableCombineTextItems === true ?
false : /* Default */ true),
normalizeWhitespace: (params.normalizeWhitespace === true),
combineTextItems: (params.disableCombineTextItems !== true),
});
},

View File

@ -21,6 +21,7 @@
!issue5874.pdf
!issue5808.pdf
!issue6204.pdf
!issue6342.pdf
!issue6652.pdf
!issue6782.pdf
!issue6901.pdf

View File

@ -0,0 +1 @@
https://bug1130815.bmoattachments.org/attachment.cgi?id=8560958

142
test/pdfs/issue6342.pdf Normal file
View File

@ -0,0 +1,142 @@
%PDF-1.7
%âãÏÓ
1 0 obj
<<
/Kids [2 0 R]
/Count 1
/Type /Pages
>>
endobj
2 0 obj
<<
/Group 3 0 R
/Parent 1 0 R
/Resources 4 0 R
/MediaBox [0 0 300 100]
/Type /Page
/Contents 5 0 R
>>
endobj
3 0 obj
<<
/CS /DeviceRGB
/Type /Group
/S /Transparency
>>
endobj
4 0 obj
<<
/Font
<<
/F1 6 0 R
>>
/XObject
<<
/Im1 7 0 R
>>
>>
endobj
5 0 obj
<<
/Length 193
>>
stream
q
1 0 0 1 10 80 cm
0 0 0 rg 0 0 0 RG
1 w
0 0 m
280 0 l S
Q
q
1 0 0 1 25 45 cm
/Im1 Do
1 0 0 1 100 0 cm
/Im1 Do
Q
q
1 0 0 1 10 20 cm
BT
/F1 18 Tf
(Issue 6342 - Form XObject with errors) Tj
ET
Q
endstream
endobj
7 0 obj
<<
/Group 3 0 R
/Subtype /Form
/Length 1050
/Resources
<<
/ExtGState
<<
/a0
<<
/ca 1
/CA 1
>>
>>
>>
/FormType 1
/BBox [0 0 45 25]
/Type /XObject
>>
stream
q
0.2 0.8 0.2 rg /a0 gs
13.117 22.651 m 11.281 22.651 9.809 21.163 9.809 19.327 c 9.809 18.733
9.961 18.174 10.234 17.69 c 11.34 18.315 12.621 18.678 13.98 18.678 c
14.113 18.678 14.238 18.674 14.367 18.666 c 14.352 18.85 14.344 19.038
14.344 19.229 c 14.344 20.252 14.566 21.225 14.957 22.1 c 14.43 22.455
13.801 22.651 13.117 22.651 c h
13.117 22.651 m f
6.383 12.92 m 2.859 12.92 0 10.084 0 6.561 c 0 3.034 2.859 0.174 6.383
0.174 c 7.727 0.174 8.969 0.592 9.996 1.299 c 9.57 1.959 9.32 2.748
9.32 3.584 .020.594 6. c 499 c08. c830.174 586.17 21.17436
8.4 6.17436 9 c030.1717436 9 c 18.6.418.85930784 07.859 05.1717c08.
859 09.6.442.859 12m f
6.383 12651 m f
6.383 12.9f
678 13.757 5.651727563.757 5.7.0 2.8 c858.7.0 2.8 c030.177.0 2.09 8
8.136.1778899 797 5.521265172496.17873.8 c90674 c95.65153174 c95.c.455 c 7.4 c95.651918.7.770.252105.7.74522.1 c047 61 18.67802 61623.67802 748069.229 c02 1 m 13.9 c719.651202 15c90678 c809.3215c195.654 18.6746 3.
117768.674469..75728.229 c 09..7578.6741452.757 5.678 13.757 5.651 c h 13.757 5.65f
Q
endstream
endobj
6 0 obj
<<
/BaseFont /Times-Roman
/Subtype /Type1
/Encoding /WinAnsiEncoding
/Type /Font
>>
endobj
8 0 obj
<<
/Pages 1 0 R
/Type /Catalog
>>
endobj xref
0 9
0000000000 65535 f
0000000015 00000 n
0000000074 00000 n
0000000193 00000 n
0000000261 00000 n
0000000334 00000 n
0000001818 00000 n
0000000581 00000 n
0000001919 00000 n
trailer
<<
/Root 8 0 R
/Size 9
>>
startxref
1969
%%EOF

View File

@ -1500,6 +1500,20 @@
"lastPage": 1,
"type": "load"
},
{ "id": "bug1130815-eq",
"file": "pdfs/bug1130815.pdf",
"md5": "3ff3b550c3af766991b2a1b11d00de85",
"rounds": 1,
"link": true,
"type": "eq"
},
{ "id": "bug1130815-text",
"file": "pdfs/bug1130815.pdf",
"md5": "3ff3b550c3af766991b2a1b11d00de85",
"rounds": 1,
"link": true,
"type": "text"
},
{ "id": "issue3248",
"file": "pdfs/issue3248.pdf",
"md5": "970767ed68de46c316d74de67965999b",
@ -1532,6 +1546,20 @@
"lastPage": 1,
"type": "load"
},
{ "id": "issue6342-eq",
"file": "pdfs/issue6342.pdf",
"md5": "2ea85ca8d17117798f105be88bdb2bfd",
"rounds": 1,
"link": false,
"type": "eq"
},
{ "id": "issue6342-text",
"file": "pdfs/issue6342.pdf",
"md5": "2ea85ca8d17117798f105be88bdb2bfd",
"rounds": 1,
"link": false,
"type": "text"
},
{ "id": "issue7020",
"file": "pdfs/issue7020.pdf",
"md5": "93b464e21c649e64ae92eeafe99fc31b",