Allow getOperatorList/getTextContent to skip errors when parsing broken XObjects (issue 8702, issue 8704)

This patch makes use of the existing `ignoreErrors` property in `src/core/evaluator.js`, see PRs 8240 and 8441, thus allowing us to attempt to recovery as much as possible of a page even when it contains broken XObjects.

Fixes 8702.
Fixes 8704.
This commit is contained in:
Jonas Jenwald 2017-09-17 13:35:18 +02:00
parent b3f8411264
commit b1472cddbb
5 changed files with 138 additions and 86 deletions

View File

@ -948,52 +948,65 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
case OPS.paintXObject: case OPS.paintXObject:
// eagerly compile XForm objects // eagerly compile XForm objects
var name = args[0].name; var name = args[0].name;
if (!name) { if (name && imageCache[name] !== undefined) {
warn('XObject must be referred to by name.');
continue;
}
if (imageCache[name] !== undefined) {
operatorList.addOp(imageCache[name].fn, imageCache[name].args); operatorList.addOp(imageCache[name].fn, imageCache[name].args);
args = null; args = null;
continue; continue;
} }
var xobj = xobjs.get(name); next(new Promise(function(resolveXObject, rejectXObject) {
if (xobj) { if (!name) {
throw new FormatError('XObject must be referred to by name.');
}
let xobj = xobjs.get(name);
if (!xobj) {
operatorList.addOp(fn, args);
resolveXObject();
return;
}
if (!isStream(xobj)) { if (!isStream(xobj)) {
throw new FormatError('XObject should be a stream'); throw new FormatError('XObject should be a stream');
} }
var type = xobj.dict.get('Subtype'); let type = xobj.dict.get('Subtype');
if (!isName(type)) { if (!isName(type)) {
throw new FormatError('XObject should have a Name subtype'); throw new FormatError('XObject should have a Name subtype');
} }
if (type.name === 'Form') { if (type.name === 'Form') {
stateManager.save(); stateManager.save();
next(self.buildFormXObject(resources, xobj, null, self.buildFormXObject(resources, xobj, null, operatorList,
operatorList, task, task, stateManager.state.clone()).
stateManager.state.clone()). then(function() {
then(function () {
stateManager.restore(); stateManager.restore();
})); resolveXObject();
}, rejectXObject);
return; return;
} else if (type.name === 'Image') { } else if (type.name === 'Image') {
self.buildPaintImageXObject(resources, xobj, false, self.buildPaintImageXObject(resources, xobj, false,
operatorList, name, imageCache); operatorList, name, imageCache);
args = null;
continue;
} else if (type.name === 'PS') { } else if (type.name === 'PS') {
// PostScript XObjects are unused when viewing documents. // PostScript XObjects are unused when viewing documents.
// See section 4.7.1 of Adobe's PDF reference. // See section 4.7.1 of Adobe's PDF reference.
info('Ignored XObject subtype PS'); info('Ignored XObject subtype PS');
continue;
} else { } else {
throw new FormatError( throw new FormatError(
`Unhandled XObject subtype ${type.name}`); `Unhandled XObject subtype ${type.name}`);
} }
resolveXObject();
}).catch(function(reason) {
if (self.options.ignoreErrors) {
// Error(s) in the XObject -- sending unsupported feature
// notification and allow rendering to continue.
self.handler.send('UnsupportedFeature',
{ featureId: UNSUPPORTED_FEATURES.unknown, });
warn(`getOperatorList - ignoring XObject: "${reason}".`);
return;
} }
break; throw reason;
}));
return;
case OPS.setFont: case OPS.setFont:
var fontSize = args[1]; var fontSize = args[1];
// eagerly collect all fonts // eagerly collect all fonts
@ -1666,36 +1679,43 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
} }
var name = args[0].name; var name = args[0].name;
if (name in skipEmptyXObjs) { if (name && skipEmptyXObjs[name] !== undefined) {
break; break;
} }
var xobj = xobjs.get(name); next(new Promise(function(resolveXObject, rejectXObject) {
if (!name) {
throw new FormatError('XObject must be referred to by name.');
}
let xobj = xobjs.get(name);
if (!xobj) { if (!xobj) {
break; resolveXObject();
return;
} }
if (!isStream(xobj)) { if (!isStream(xobj)) {
throw new FormatError('XObject should be a stream'); throw new FormatError('XObject should be a stream');
} }
var type = xobj.dict.get('Subtype'); let type = xobj.dict.get('Subtype');
if (!isName(type)) { if (!isName(type)) {
throw new FormatError('XObject should have a Name subtype'); throw new FormatError('XObject should have a Name subtype');
} }
if (type.name !== 'Form') { if (type.name !== 'Form') {
skipEmptyXObjs[name] = true; skipEmptyXObjs[name] = true;
break; resolveXObject();
return;
} }
// Use a new `StateManager` to prevent incorrect positioning of // Use a new `StateManager` to prevent incorrect positioning of
// textItems *after* the Form XObject, since errors in the data // textItems *after* the Form XObject, since errors in the data
// can otherwise prevent `restore` operators from being executed. // can otherwise prevent `restore` operators from executing.
// NOTE: This is only an issue when `options.ignoreErrors = true`. // NOTE: Only an issue when `options.ignoreErrors === true`.
var currentState = stateManager.state.clone(); let currentState = stateManager.state.clone();
var xObjStateManager = new StateManager(currentState); let xObjStateManager = new StateManager(currentState);
var matrix = xobj.dict.getArray('Matrix'); let matrix = xobj.dict.getArray('Matrix');
if (Array.isArray(matrix) && matrix.length === 6) { if (Array.isArray(matrix) && matrix.length === 6) {
xObjStateManager.transform(matrix); xObjStateManager.transform(matrix);
} }
@ -1720,7 +1740,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}, },
}; };
next(self.getTextContent({ self.getTextContent({
stream: xobj, stream: xobj,
task, task,
resources: xobj.dict.get('Resources') || resources, resources: xobj.dict.get('Resources') || resources,
@ -1733,6 +1753,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
if (!sinkWrapper.enqueueInvoked) { if (!sinkWrapper.enqueueInvoked) {
skipEmptyXObjs[name] = true; skipEmptyXObjs[name] = true;
} }
resolveXObject();
}, rejectXObject);
}).catch(function(reason) {
if (reason instanceof AbortException) {
return;
}
if (self.options.ignoreErrors) {
// Error(s) in the XObject -- allow text-extraction to
// continue.
warn(`getTextContent - ignoring XObject: "${reason}".`);
return;
}
throw reason;
})); }));
return; return;
case OPS.setGState: case OPS.setGState:

View File

@ -57,6 +57,7 @@
!issue8480.pdf !issue8480.pdf
!issue8570.pdf !issue8570.pdf
!issue8697.pdf !issue8697.pdf
!issue8702.pdf
!issue8707.pdf !issue8707.pdf
!issue8798r.pdf !issue8798r.pdf
!issue8823.pdf !issue8823.pdf

BIN
test/pdfs/issue8702.pdf Normal file

Binary file not shown.

View File

@ -1640,6 +1640,22 @@
"lastPage": 1, "lastPage": 1,
"type": "load" "type": "load"
}, },
{ "id": "issue8702-eq",
"file": "pdfs/issue8702.pdf",
"md5": "59d501ed1518d78ef6ee442cf824b0f6",
"rounds": 1,
"link": false,
"lastPage": 1,
"type": "eq"
},
{ "id": "issue8702-text",
"file": "pdfs/issue8702.pdf",
"md5": "59d501ed1518d78ef6ee442cf824b0f6",
"rounds": 1,
"link": false,
"lastPage": 1,
"type": "text"
},
{ "id": "pr4897", { "id": "pr4897",
"file": "pdfs/pr4897.pdf", "file": "pdfs/pr4897.pdf",
"md5": "26897633eea5e6d10345a130b1c1777c", "md5": "26897633eea5e6d10345a130b1c1777c",

View File

@ -14,9 +14,9 @@
*/ */
import { Dict, Name } from '../../src/core/primitives'; import { Dict, Name } from '../../src/core/primitives';
import { FormatError, OPS } from '../../src/shared/util';
import { OperatorList, PartialEvaluator } from '../../src/core/evaluator'; import { OperatorList, PartialEvaluator } from '../../src/core/evaluator';
import { Stream, StringStream } from '../../src/core/stream'; import { Stream, StringStream } from '../../src/core/stream';
import { OPS } from '../../src/shared/util';
import { WorkerTask } from '../../src/core/worker'; import { WorkerTask } from '../../src/core/worker';
import { XRefMock } from './test_utils'; import { XRefMock } from './test_utils';
@ -48,6 +48,8 @@ describe('evaluator', function() {
operatorList: result, operatorList: result,
}).then(function() { }).then(function() {
callback(result); callback(result);
}, function(reason) {
callback(reason);
}); });
} }
@ -229,9 +231,9 @@ describe('evaluator', function() {
it('should skip paintXObject if name is missing', function(done) { it('should skip paintXObject if name is missing', function(done) {
var stream = new StringStream('/ Do'); var stream = new StringStream('/ Do');
runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(), runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(),
function (result) { function(result) {
expect(result.argsArray).toEqual([]); expect(result instanceof FormatError).toEqual(true);
expect(result.fnArray).toEqual([]); expect(result.message).toEqual('XObject must be referred to by name.');
done(); done();
}); });
}); });