Merge pull request #8922 from Snuffleupagus/paintXObject-errors
Allow `getOperatorList`/`getTextContent` to skip errors when parsing broken XObjects (issue 8702, issue 8704)
This commit is contained in:
		
						commit
						509d3728f1
					
				@ -948,52 +948,65 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
 | 
				
			|||||||
            case OPS.paintXObject:
 | 
					            case OPS.paintXObject:
 | 
				
			||||||
              // eagerly compile XForm objects
 | 
					              // eagerly compile XForm objects
 | 
				
			||||||
              var name = args[0].name;
 | 
					              var name = args[0].name;
 | 
				
			||||||
              if (!name) {
 | 
					              if (name && imageCache[name] !== undefined) {
 | 
				
			||||||
                warn('XObject must be referred to by name.');
 | 
					 | 
				
			||||||
                continue;
 | 
					 | 
				
			||||||
              }
 | 
					 | 
				
			||||||
              if (imageCache[name] !== undefined) {
 | 
					 | 
				
			||||||
                operatorList.addOp(imageCache[name].fn, imageCache[name].args);
 | 
					                operatorList.addOp(imageCache[name].fn, imageCache[name].args);
 | 
				
			||||||
                args = null;
 | 
					                args = null;
 | 
				
			||||||
                continue;
 | 
					                continue;
 | 
				
			||||||
              }
 | 
					              }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
              var xobj = xobjs.get(name);
 | 
					              next(new Promise(function(resolveXObject, rejectXObject) {
 | 
				
			||||||
              if (xobj) {
 | 
					                if (!name) {
 | 
				
			||||||
 | 
					                  throw new FormatError('XObject must be referred to by name.');
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let xobj = xobjs.get(name);
 | 
				
			||||||
 | 
					                if (!xobj) {
 | 
				
			||||||
 | 
					                  operatorList.addOp(fn, args);
 | 
				
			||||||
 | 
					                  resolveXObject();
 | 
				
			||||||
 | 
					                  return;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
                if (!isStream(xobj)) {
 | 
					                if (!isStream(xobj)) {
 | 
				
			||||||
                  throw new FormatError('XObject should be a stream');
 | 
					                  throw new FormatError('XObject should be a stream');
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                var type = xobj.dict.get('Subtype');
 | 
					                let type = xobj.dict.get('Subtype');
 | 
				
			||||||
                if (!isName(type)) {
 | 
					                if (!isName(type)) {
 | 
				
			||||||
                  throw new FormatError('XObject should have a Name subtype');
 | 
					                  throw new FormatError('XObject should have a Name subtype');
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if (type.name === 'Form') {
 | 
					                if (type.name === 'Form') {
 | 
				
			||||||
                  stateManager.save();
 | 
					                  stateManager.save();
 | 
				
			||||||
                  next(self.buildFormXObject(resources, xobj, null,
 | 
					                  self.buildFormXObject(resources, xobj, null, operatorList,
 | 
				
			||||||
                                             operatorList, task,
 | 
					                                        task, stateManager.state.clone()).
 | 
				
			||||||
                                             stateManager.state.clone()).
 | 
					                    then(function() {
 | 
				
			||||||
                    then(function () {
 | 
					 | 
				
			||||||
                      stateManager.restore();
 | 
					                      stateManager.restore();
 | 
				
			||||||
                    }));
 | 
					                      resolveXObject();
 | 
				
			||||||
 | 
					                    }, rejectXObject);
 | 
				
			||||||
                  return;
 | 
					                  return;
 | 
				
			||||||
                } else if (type.name === 'Image') {
 | 
					                } else if (type.name === 'Image') {
 | 
				
			||||||
                  self.buildPaintImageXObject(resources, xobj, false,
 | 
					                  self.buildPaintImageXObject(resources, xobj, false,
 | 
				
			||||||
                    operatorList, name, imageCache);
 | 
					                                              operatorList, name, imageCache);
 | 
				
			||||||
                  args = null;
 | 
					 | 
				
			||||||
                  continue;
 | 
					 | 
				
			||||||
                } else if (type.name === 'PS') {
 | 
					                } else if (type.name === 'PS') {
 | 
				
			||||||
                  // PostScript XObjects are unused when viewing documents.
 | 
					                  // PostScript XObjects are unused when viewing documents.
 | 
				
			||||||
                  // See section 4.7.1 of Adobe's PDF reference.
 | 
					                  // See section 4.7.1 of Adobe's PDF reference.
 | 
				
			||||||
                  info('Ignored XObject subtype PS');
 | 
					                  info('Ignored XObject subtype PS');
 | 
				
			||||||
                  continue;
 | 
					 | 
				
			||||||
                } else {
 | 
					                } else {
 | 
				
			||||||
                  throw new FormatError(
 | 
					                  throw new FormatError(
 | 
				
			||||||
                    `Unhandled XObject subtype ${type.name}`);
 | 
					                    `Unhandled XObject subtype ${type.name}`);
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
              }
 | 
					                resolveXObject();
 | 
				
			||||||
              break;
 | 
					              }).catch(function(reason) {
 | 
				
			||||||
 | 
					                if (self.options.ignoreErrors) {
 | 
				
			||||||
 | 
					                  // Error(s) in the XObject -- sending unsupported feature
 | 
				
			||||||
 | 
					                  // notification and allow rendering to continue.
 | 
				
			||||||
 | 
					                  self.handler.send('UnsupportedFeature',
 | 
				
			||||||
 | 
					                    { featureId: UNSUPPORTED_FEATURES.unknown, });
 | 
				
			||||||
 | 
					                  warn(`getOperatorList - ignoring XObject: "${reason}".`);
 | 
				
			||||||
 | 
					                  return;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                throw reason;
 | 
				
			||||||
 | 
					              }));
 | 
				
			||||||
 | 
					              return;
 | 
				
			||||||
            case OPS.setFont:
 | 
					            case OPS.setFont:
 | 
				
			||||||
              var fontSize = args[1];
 | 
					              var fontSize = args[1];
 | 
				
			||||||
              // eagerly collect all fonts
 | 
					              // eagerly collect all fonts
 | 
				
			||||||
@ -1666,73 +1679,93 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
 | 
				
			|||||||
              }
 | 
					              }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
              var name = args[0].name;
 | 
					              var name = args[0].name;
 | 
				
			||||||
              if (name in skipEmptyXObjs) {
 | 
					              if (name && skipEmptyXObjs[name] !== undefined) {
 | 
				
			||||||
                break;
 | 
					                break;
 | 
				
			||||||
              }
 | 
					              }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
              var xobj = xobjs.get(name);
 | 
					              next(new Promise(function(resolveXObject, rejectXObject) {
 | 
				
			||||||
              if (!xobj) {
 | 
					                if (!name) {
 | 
				
			||||||
                break;
 | 
					                  throw new FormatError('XObject must be referred to by name.');
 | 
				
			||||||
              }
 | 
					 | 
				
			||||||
              if (!isStream(xobj)) {
 | 
					 | 
				
			||||||
                throw new FormatError('XObject should be a stream');
 | 
					 | 
				
			||||||
              }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
              var type = xobj.dict.get('Subtype');
 | 
					 | 
				
			||||||
              if (!isName(type)) {
 | 
					 | 
				
			||||||
                throw new FormatError('XObject should have a Name subtype');
 | 
					 | 
				
			||||||
              }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
              if (type.name !== 'Form') {
 | 
					 | 
				
			||||||
                skipEmptyXObjs[name] = true;
 | 
					 | 
				
			||||||
                break;
 | 
					 | 
				
			||||||
              }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
              // Use a new `StateManager` to prevent incorrect positioning of
 | 
					 | 
				
			||||||
              // textItems *after* the Form XObject, since errors in the data
 | 
					 | 
				
			||||||
              // can otherwise prevent `restore` operators from being executed.
 | 
					 | 
				
			||||||
              // NOTE: This is only an issue when `options.ignoreErrors = true`.
 | 
					 | 
				
			||||||
              var currentState = stateManager.state.clone();
 | 
					 | 
				
			||||||
              var xObjStateManager = new StateManager(currentState);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
              var matrix = xobj.dict.getArray('Matrix');
 | 
					 | 
				
			||||||
              if (Array.isArray(matrix) && matrix.length === 6) {
 | 
					 | 
				
			||||||
                xObjStateManager.transform(matrix);
 | 
					 | 
				
			||||||
              }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
              // Enqueue the `textContent` chunk before parsing the /Form
 | 
					 | 
				
			||||||
              // XObject.
 | 
					 | 
				
			||||||
              enqueueChunk();
 | 
					 | 
				
			||||||
              let sinkWrapper = {
 | 
					 | 
				
			||||||
                enqueueInvoked: false,
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                enqueue(chunk, size) {
 | 
					 | 
				
			||||||
                  this.enqueueInvoked = true;
 | 
					 | 
				
			||||||
                  sink.enqueue(chunk, size);
 | 
					 | 
				
			||||||
                },
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                get desiredSize() {
 | 
					 | 
				
			||||||
                  return sink.desiredSize;
 | 
					 | 
				
			||||||
                },
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                get ready() {
 | 
					 | 
				
			||||||
                  return sink.ready;
 | 
					 | 
				
			||||||
                },
 | 
					 | 
				
			||||||
              };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
              next(self.getTextContent({
 | 
					 | 
				
			||||||
                stream: xobj,
 | 
					 | 
				
			||||||
                task,
 | 
					 | 
				
			||||||
                resources: xobj.dict.get('Resources') || resources,
 | 
					 | 
				
			||||||
                stateManager: xObjStateManager,
 | 
					 | 
				
			||||||
                normalizeWhitespace,
 | 
					 | 
				
			||||||
                combineTextItems,
 | 
					 | 
				
			||||||
                sink: sinkWrapper,
 | 
					 | 
				
			||||||
                seenStyles,
 | 
					 | 
				
			||||||
              }).then(function() {
 | 
					 | 
				
			||||||
                if (!sinkWrapper.enqueueInvoked) {
 | 
					 | 
				
			||||||
                  skipEmptyXObjs[name] = true;
 | 
					 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let xobj = xobjs.get(name);
 | 
				
			||||||
 | 
					                if (!xobj) {
 | 
				
			||||||
 | 
					                  resolveXObject();
 | 
				
			||||||
 | 
					                  return;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                if (!isStream(xobj)) {
 | 
				
			||||||
 | 
					                  throw new FormatError('XObject should be a stream');
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let type = xobj.dict.get('Subtype');
 | 
				
			||||||
 | 
					                if (!isName(type)) {
 | 
				
			||||||
 | 
					                  throw new FormatError('XObject should have a Name subtype');
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if (type.name !== 'Form') {
 | 
				
			||||||
 | 
					                  skipEmptyXObjs[name] = true;
 | 
				
			||||||
 | 
					                  resolveXObject();
 | 
				
			||||||
 | 
					                  return;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                // Use a new `StateManager` to prevent incorrect positioning of
 | 
				
			||||||
 | 
					                // textItems *after* the Form XObject, since errors in the data
 | 
				
			||||||
 | 
					                // can otherwise prevent `restore` operators from executing.
 | 
				
			||||||
 | 
					                // NOTE: Only an issue when `options.ignoreErrors === true`.
 | 
				
			||||||
 | 
					                let currentState = stateManager.state.clone();
 | 
				
			||||||
 | 
					                let xObjStateManager = new StateManager(currentState);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let matrix = xobj.dict.getArray('Matrix');
 | 
				
			||||||
 | 
					                if (Array.isArray(matrix) && matrix.length === 6) {
 | 
				
			||||||
 | 
					                  xObjStateManager.transform(matrix);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                // Enqueue the `textContent` chunk before parsing the /Form
 | 
				
			||||||
 | 
					                // XObject.
 | 
				
			||||||
 | 
					                enqueueChunk();
 | 
				
			||||||
 | 
					                let sinkWrapper = {
 | 
				
			||||||
 | 
					                  enqueueInvoked: false,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                  enqueue(chunk, size) {
 | 
				
			||||||
 | 
					                    this.enqueueInvoked = true;
 | 
				
			||||||
 | 
					                    sink.enqueue(chunk, size);
 | 
				
			||||||
 | 
					                  },
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                  get desiredSize() {
 | 
				
			||||||
 | 
					                    return sink.desiredSize;
 | 
				
			||||||
 | 
					                  },
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                  get ready() {
 | 
				
			||||||
 | 
					                    return sink.ready;
 | 
				
			||||||
 | 
					                  },
 | 
				
			||||||
 | 
					                };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                self.getTextContent({
 | 
				
			||||||
 | 
					                  stream: xobj,
 | 
				
			||||||
 | 
					                  task,
 | 
				
			||||||
 | 
					                  resources: xobj.dict.get('Resources') || resources,
 | 
				
			||||||
 | 
					                  stateManager: xObjStateManager,
 | 
				
			||||||
 | 
					                  normalizeWhitespace,
 | 
				
			||||||
 | 
					                  combineTextItems,
 | 
				
			||||||
 | 
					                  sink: sinkWrapper,
 | 
				
			||||||
 | 
					                  seenStyles,
 | 
				
			||||||
 | 
					                }).then(function() {
 | 
				
			||||||
 | 
					                  if (!sinkWrapper.enqueueInvoked) {
 | 
				
			||||||
 | 
					                    skipEmptyXObjs[name] = true;
 | 
				
			||||||
 | 
					                  }
 | 
				
			||||||
 | 
					                  resolveXObject();
 | 
				
			||||||
 | 
					                }, rejectXObject);
 | 
				
			||||||
 | 
					              }).catch(function(reason) {
 | 
				
			||||||
 | 
					                if (reason instanceof AbortException) {
 | 
				
			||||||
 | 
					                  return;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                if (self.options.ignoreErrors) {
 | 
				
			||||||
 | 
					                  // Error(s) in the XObject -- allow text-extraction to
 | 
				
			||||||
 | 
					                  // continue.
 | 
				
			||||||
 | 
					                  warn(`getTextContent - ignoring XObject: "${reason}".`);
 | 
				
			||||||
 | 
					                  return;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                throw reason;
 | 
				
			||||||
              }));
 | 
					              }));
 | 
				
			||||||
              return;
 | 
					              return;
 | 
				
			||||||
            case OPS.setGState:
 | 
					            case OPS.setGState:
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							@ -57,6 +57,7 @@
 | 
				
			|||||||
!issue8480.pdf
 | 
					!issue8480.pdf
 | 
				
			||||||
!issue8570.pdf
 | 
					!issue8570.pdf
 | 
				
			||||||
!issue8697.pdf
 | 
					!issue8697.pdf
 | 
				
			||||||
 | 
					!issue8702.pdf
 | 
				
			||||||
!issue8707.pdf
 | 
					!issue8707.pdf
 | 
				
			||||||
!issue8798r.pdf
 | 
					!issue8798r.pdf
 | 
				
			||||||
!issue8823.pdf
 | 
					!issue8823.pdf
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										
											BIN
										
									
								
								test/pdfs/issue8702.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								test/pdfs/issue8702.pdf
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							@ -1640,6 +1640,22 @@
 | 
				
			|||||||
       "lastPage": 1,
 | 
					       "lastPage": 1,
 | 
				
			||||||
       "type": "load"
 | 
					       "type": "load"
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
 | 
					    {  "id": "issue8702-eq",
 | 
				
			||||||
 | 
					       "file": "pdfs/issue8702.pdf",
 | 
				
			||||||
 | 
					       "md5": "59d501ed1518d78ef6ee442cf824b0f6",
 | 
				
			||||||
 | 
					       "rounds": 1,
 | 
				
			||||||
 | 
					       "link": false,
 | 
				
			||||||
 | 
					       "lastPage": 1,
 | 
				
			||||||
 | 
					       "type": "eq"
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					    {  "id": "issue8702-text",
 | 
				
			||||||
 | 
					       "file": "pdfs/issue8702.pdf",
 | 
				
			||||||
 | 
					       "md5": "59d501ed1518d78ef6ee442cf824b0f6",
 | 
				
			||||||
 | 
					       "rounds": 1,
 | 
				
			||||||
 | 
					       "link": false,
 | 
				
			||||||
 | 
					       "lastPage": 1,
 | 
				
			||||||
 | 
					       "type": "text"
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
    {  "id": "pr4897",
 | 
					    {  "id": "pr4897",
 | 
				
			||||||
       "file": "pdfs/pr4897.pdf",
 | 
					       "file": "pdfs/pr4897.pdf",
 | 
				
			||||||
       "md5": "26897633eea5e6d10345a130b1c1777c",
 | 
					       "md5": "26897633eea5e6d10345a130b1c1777c",
 | 
				
			||||||
 | 
				
			|||||||
@ -14,9 +14,9 @@
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import { Dict, Name } from '../../src/core/primitives';
 | 
					import { Dict, Name } from '../../src/core/primitives';
 | 
				
			||||||
 | 
					import { FormatError, OPS } from '../../src/shared/util';
 | 
				
			||||||
import { OperatorList, PartialEvaluator } from '../../src/core/evaluator';
 | 
					import { OperatorList, PartialEvaluator } from '../../src/core/evaluator';
 | 
				
			||||||
import { Stream, StringStream } from '../../src/core/stream';
 | 
					import { Stream, StringStream } from '../../src/core/stream';
 | 
				
			||||||
import { OPS } from '../../src/shared/util';
 | 
					 | 
				
			||||||
import { WorkerTask } from '../../src/core/worker';
 | 
					import { WorkerTask } from '../../src/core/worker';
 | 
				
			||||||
import { XRefMock } from './test_utils';
 | 
					import { XRefMock } from './test_utils';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -48,6 +48,8 @@ describe('evaluator', function() {
 | 
				
			|||||||
      operatorList: result,
 | 
					      operatorList: result,
 | 
				
			||||||
    }).then(function() {
 | 
					    }).then(function() {
 | 
				
			||||||
      callback(result);
 | 
					      callback(result);
 | 
				
			||||||
 | 
					    }, function(reason) {
 | 
				
			||||||
 | 
					      callback(reason);
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -229,9 +231,9 @@ describe('evaluator', function() {
 | 
				
			|||||||
    it('should skip paintXObject if name is missing', function(done) {
 | 
					    it('should skip paintXObject if name is missing', function(done) {
 | 
				
			||||||
      var stream = new StringStream('/ Do');
 | 
					      var stream = new StringStream('/ Do');
 | 
				
			||||||
      runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(),
 | 
					      runOperatorListCheck(partialEvaluator, stream, new ResourcesMock(),
 | 
				
			||||||
          function (result) {
 | 
					          function(result) {
 | 
				
			||||||
        expect(result.argsArray).toEqual([]);
 | 
					        expect(result instanceof FormatError).toEqual(true);
 | 
				
			||||||
        expect(result.fnArray).toEqual([]);
 | 
					        expect(result.message).toEqual('XObject must be referred to by name.');
 | 
				
			||||||
        done();
 | 
					        done();
 | 
				
			||||||
      });
 | 
					      });
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user