Allow getOperatorList
/getTextContent
to skip errors when parsing broken XObjects (issue 8702, issue 8704)
This patch makes use of the existing `ignoreErrors` property in `src/core/evaluator.js`, see PRs 8240 and 8441, thus allowing us to attempt to recovery as much as possible of a page even when it contains broken XObjects. Fixes 8702. Fixes 8704.
This commit is contained in:
parent
b3f8411264
commit
b1472cddbb
@ -948,52 +948,65 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
case OPS.paintXObject:
|
case OPS.paintXObject:
|
||||||
// eagerly compile XForm objects
|
// eagerly compile XForm objects
|
||||||
var name = args[0].name;
|
var name = args[0].name;
|
||||||
if (!name) {
|
if (name && imageCache[name] !== undefined) {
|
||||||
warn('XObject must be referred to by name.');
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (imageCache[name] !== undefined) {
|
|
||||||
operatorList.addOp(imageCache[name].fn, imageCache[name].args);
|
operatorList.addOp(imageCache[name].fn, imageCache[name].args);
|
||||||
args = null;
|
args = null;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
var xobj = xobjs.get(name);
|
next(new Promise(function(resolveXObject, rejectXObject) {
|
||||||
if (xobj) {
|
if (!name) {
|
||||||
|
throw new FormatError('XObject must be referred to by name.');
|
||||||
|
}
|
||||||
|
|
||||||
|
let xobj = xobjs.get(name);
|
||||||
|
if (!xobj) {
|
||||||
|
operatorList.addOp(fn, args);
|
||||||
|
resolveXObject();
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (!isStream(xobj)) {
|
if (!isStream(xobj)) {
|
||||||
throw new FormatError('XObject should be a stream');
|
throw new FormatError('XObject should be a stream');
|
||||||
}
|
}
|
||||||
|
|
||||||
var type = xobj.dict.get('Subtype');
|
let type = xobj.dict.get('Subtype');
|
||||||
if (!isName(type)) {
|
if (!isName(type)) {
|
||||||
throw new FormatError('XObject should have a Name subtype');
|
throw new FormatError('XObject should have a Name subtype');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type.name === 'Form') {
|
if (type.name === 'Form') {
|
||||||
stateManager.save();
|
stateManager.save();
|
||||||
next(self.buildFormXObject(resources, xobj, null,
|
self.buildFormXObject(resources, xobj, null, operatorList,
|
||||||
operatorList, task,
|
task, stateManager.state.clone()).
|
||||||
stateManager.state.clone()).
|
then(function() {
|
||||||
then(function () {
|
|
||||||
stateManager.restore();
|
stateManager.restore();
|
||||||
}));
|
resolveXObject();
|
||||||
|
}, rejectXObject);
|
||||||
return;
|
return;
|
||||||
} else if (type.name === 'Image') {
|
} else if (type.name === 'Image') {
|
||||||
self.buildPaintImageXObject(resources, xobj, false,
|
self.buildPaintImageXObject(resources, xobj, false,
|
||||||
operatorList, name, imageCache);
|
operatorList, name, imageCache);
|
||||||
args = null;
|
|
||||||
continue;
|
|
||||||
} else if (type.name === 'PS') {
|
} else if (type.name === 'PS') {
|
||||||
// PostScript XObjects are unused when viewing documents.
|
// PostScript XObjects are unused when viewing documents.
|
||||||
// See section 4.7.1 of Adobe's PDF reference.
|
// See section 4.7.1 of Adobe's PDF reference.
|
||||||
info('Ignored XObject subtype PS');
|
info('Ignored XObject subtype PS');
|
||||||
continue;
|
|
||||||
} else {
|
} else {
|
||||||
throw new FormatError(
|
throw new FormatError(
|
||||||
`Unhandled XObject subtype ${type.name}`);
|
`Unhandled XObject subtype ${type.name}`);
|
||||||
}
|
}
|
||||||
|
resolveXObject();
|
||||||
|
}).catch(function(reason) {
|
||||||
|
if (self.options.ignoreErrors) {
|
||||||
|
// Error(s) in the XObject -- sending unsupported feature
|
||||||
|
// notification and allow rendering to continue.
|
||||||
|
self.handler.send('UnsupportedFeature',
|
||||||
|
{ featureId: UNSUPPORTED_FEATURES.unknown, });
|
||||||
|
warn(`getOperatorList - ignoring XObject: "${reason}".`);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
break;
|
throw reason;
|
||||||
|
}));
|
||||||
|
return;
|
||||||
case OPS.setFont:
|
case OPS.setFont:
|
||||||
var fontSize = args[1];
|
var fontSize = args[1];
|
||||||
// eagerly collect all fonts
|
// eagerly collect all fonts
|
||||||
@ -1666,36 +1679,43 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var name = args[0].name;
|
var name = args[0].name;
|
||||||
if (name in skipEmptyXObjs) {
|
if (name && skipEmptyXObjs[name] !== undefined) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
var xobj = xobjs.get(name);
|
next(new Promise(function(resolveXObject, rejectXObject) {
|
||||||
|
if (!name) {
|
||||||
|
throw new FormatError('XObject must be referred to by name.');
|
||||||
|
}
|
||||||
|
|
||||||
|
let xobj = xobjs.get(name);
|
||||||
if (!xobj) {
|
if (!xobj) {
|
||||||
break;
|
resolveXObject();
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if (!isStream(xobj)) {
|
if (!isStream(xobj)) {
|
||||||
throw new FormatError('XObject should be a stream');
|
throw new FormatError('XObject should be a stream');
|
||||||
}
|
}
|
||||||
|
|
||||||
var type = xobj.dict.get('Subtype');
|
let type = xobj.dict.get('Subtype');
|
||||||
if (!isName(type)) {
|
if (!isName(type)) {
|
||||||
throw new FormatError('XObject should have a Name subtype');
|
throw new FormatError('XObject should have a Name subtype');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type.name !== 'Form') {
|
if (type.name !== 'Form') {
|
||||||
skipEmptyXObjs[name] = true;
|
skipEmptyXObjs[name] = true;
|
||||||
break;
|
resolveXObject();
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use a new `StateManager` to prevent incorrect positioning of
|
// Use a new `StateManager` to prevent incorrect positioning of
|
||||||
// textItems *after* the Form XObject, since errors in the data
|
// textItems *after* the Form XObject, since errors in the data
|
||||||
// can otherwise prevent `restore` operators from being executed.
|
// can otherwise prevent `restore` operators from executing.
|
||||||
// NOTE: This is only an issue when `options.ignoreErrors = true`.
|
// NOTE: Only an issue when `options.ignoreErrors === true`.
|
||||||
var currentState = stateManager.state.clone();
|
let currentState = stateManager.state.clone();
|
||||||
var xObjStateManager = new StateManager(currentState);
|
let xObjStateManager = new StateManager(currentState);
|
||||||
|
|
||||||
var matrix = xobj.dict.getArray('Matrix');
|
let matrix = xobj.dict.getArray('Matrix');
|
||||||
if (Array.isArray(matrix) && matrix.length === 6) {
|
if (Array.isArray(matrix) && matrix.length === 6) {
|
||||||
xObjStateManager.transform(matrix);
|
xObjStateManager.transform(matrix);
|
||||||
}
|
}
|
||||||
@ -1720,7 +1740,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
next(self.getTextContent({
|
self.getTextContent({
|
||||||
stream: xobj,
|
stream: xobj,
|
||||||
task,
|
task,
|
||||||
resources: xobj.dict.get('Resources') || resources,
|
resources: xobj.dict.get('Resources') || resources,
|
||||||
@ -1733,6 +1753,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
if (!sinkWrapper.enqueueInvoked) {
|
if (!sinkWrapper.enqueueInvoked) {
|
||||||
skipEmptyXObjs[name] = true;
|
skipEmptyXObjs[name] = true;
|
||||||
}
|
}
|
||||||
|
resolveXObject();
|
||||||
|
}, rejectXObject);
|
||||||
|
}).catch(function(reason) {
|
||||||
|
if (reason instanceof AbortException) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (self.options.ignoreErrors) {
|
||||||
|
// Error(s) in the XObject -- allow text-extraction to
|
||||||
|
// continue.
|
||||||
|
warn(`getTextContent - ignoring XObject: "${reason}".`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
throw reason;
|
||||||
}));
|
}));
|
||||||
return;
|
return;
|
||||||
case OPS.setGState:
|
case OPS.setGState:
|
||||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -57,6 +57,7 @@
|
|||||||
!issue8480.pdf
|
!issue8480.pdf
|
||||||
!issue8570.pdf
|
!issue8570.pdf
|
||||||
!issue8697.pdf
|
!issue8697.pdf
|
||||||
|
!issue8702.pdf
|
||||||
!issue8707.pdf
|
!issue8707.pdf
|
||||||
!issue8798r.pdf
|
!issue8798r.pdf
|
||||||
!issue8823.pdf
|
!issue8823.pdf
|
||||||
|
BIN
test/pdfs/issue8702.pdf
Normal file
BIN
test/pdfs/issue8702.pdf
Normal file
Binary file not shown.
@ -1640,6 +1640,22 @@
|
|||||||
"lastPage": 1,
|
"lastPage": 1,
|
||||||
"type": "load"
|
"type": "load"
|
||||||
},
|
},
|
||||||
|
{ "id": "issue8702-eq",
|
||||||
|
"file": "pdfs/issue8702.pdf",
|
||||||
|
"md5": "59d501ed1518d78ef6ee442cf824b0f6",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": false,
|
||||||
|
"lastPage": 1,
|
||||||
|
"type": "eq"
|
||||||
|
},
|
||||||
|
{ "id": "issue8702-text",
|
||||||
|
"file": "pdfs/issue8702.pdf",
|
||||||
|
"md5": "59d501ed1518d78ef6ee442cf824b0f6",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": false,
|
||||||
|
"lastPage": 1,
|
||||||
|
"type": "text"
|
||||||
|
},
|
||||||
{ "id": "pr4897",
|
{ "id": "pr4897",
|
||||||
"file": "pdfs/pr4897.pdf",
|
"file": "pdfs/pr4897.pdf",
|
||||||
"md5": "26897633eea5e6d10345a130b1c1777c",
|
"md5": "26897633eea5e6d10345a130b1c1777c",
|
||||||
|
@ -14,9 +14,9 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { Dict, Name } from '../../src/core/primitives';
|
import { Dict, Name } from '../../src/core/primitives';
|
||||||
|
import { FormatError, OPS } from '../../src/shared/util';
|
||||||
import { OperatorList, PartialEvaluator } from '../../src/core/evaluator';
|
import { OperatorList, PartialEvaluator } from '../../src/core/evaluator';
|
||||||
import { Stream, StringStream } from '../../src/core/stream';
|
import { Stream, StringStream } from '../../src/core/stream';
|
||||||
import { OPS } from '../../src/shared/util';
|
|
||||||
import { WorkerTask } from '../../src/core/worker';
|
import { WorkerTask } from '../../src/core/worker';
|
||||||
import { XRefMock } from './test_utils';
|
import { XRefMock } from './test_utils';
|
||||||
|
|
||||||
@ -48,6 +48,8 @@ describe('evaluator', function() {
|
|||||||
operatorList: result,
|
operatorList: result,
|
||||||
}).then(function() {
|
}).then(function() {
|
||||||
callback(result);
|
callback(result);
|
||||||
|
}, function(reason) {
|
||||||
|
callback(reason);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -229,9 +231,9 @@ describe('evaluator', function() {
|
|||||||
it('should skip paintXObject if name is missing', function(done) {
|
it('should skip paintXObject if name is missing', function(done) {
|
||||||
var stream = new StringStream('/ Do');
|
var stream = new StringStream('/ Do');
|
||||||
runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(),
|
runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(),
|
||||||
function (result) {
|
function(result) {
|
||||||
expect(result.argsArray).toEqual([]);
|
expect(result instanceof FormatError).toEqual(true);
|
||||||
expect(result.fnArray).toEqual([]);
|
expect(result.message).toEqual('XObject must be referred to by name.');
|
||||||
done();
|
done();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
Loading…
Reference in New Issue
Block a user