From 885e7a8aa476398ae0a6db5ce92a9001546c1e36 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 26 Jul 2021 15:34:14 +0200 Subject: [PATCH] Allow `StreamsSequenceStream.readBlock` to skip sub-streams with errors (issue 13794) This patch makes use of the existing `ignoreErrors` option, thus allowing a page to continue parsing/rendering even if (some of) its sub-streams are corrupt. Obviously this may cause *part* of a page to be broken/missing, however it should be better than (potentially) rendering nothing. Also, to the best of my knowledge, this is the first bug of its kind that we've encountered. To avoid having to pass in a bunch of, for a `BaseStream`-instance, mostly unrelated parameters when initializing a `StreamsSequenceStream`-instance, I settled on utilizing a callback function instead to allow conditional Error-suppression. Note that the `StreamsSequenceStream`-class is a *special* stream-implementation that we only use when the `/Contents`-entry, in the `/Page`-dictionary, consists of an Array with streams. --- src/core/decode_stream.js | 14 ++++++++++++-- src/core/document.js | 28 ++++++++++++++++++++++++---- src/shared/util.js | 1 + test/pdfs/issue13794.pdf.link | 1 + test/test_manifest.json | 8 ++++++++ 5 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 test/pdfs/issue13794.pdf.link diff --git a/src/core/decode_stream.js b/src/core/decode_stream.js index 701d78dec..96157462a 100644 --- a/src/core/decode_stream.js +++ b/src/core/decode_stream.js @@ -127,7 +127,7 @@ class DecodeStream extends BaseStream { } class StreamsSequenceStream extends DecodeStream { - constructor(streams) { + constructor(streams, onError = null) { let maybeLength = 0; for (const stream of streams) { maybeLength += @@ -138,6 +138,7 @@ class StreamsSequenceStream extends DecodeStream { super(maybeLength); this.streams = streams; + this._onError = onError; } readBlock() { @@ -147,7 +148,16 @@ class StreamsSequenceStream extends DecodeStream { return; } const stream = streams.shift(); - const chunk = stream.getBytes(); + let chunk; + try { + chunk = stream.getBytes(); + } catch (reason) { + if (this._onError) { + this._onError(reason, stream.dict && stream.dict.objId); + return; + } + throw reason; + } const bufferLength = this.bufferLength; const newLength = bufferLength + chunk.length; const buffer = this.ensureBuffer(newLength); diff --git a/src/core/document.js b/src/core/document.js index c6a1c6e1e..c9380a9e4 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -30,6 +30,7 @@ import { stringToPDFString, stringToUTF8String, unreachable, + UNSUPPORTED_FEATURES, Util, warn, } from "../shared/util.js"; @@ -225,16 +226,35 @@ class Page { return shadow(this, "rotate", rotate); } + /** + * @private + */ + _onSubStreamError(handler, reason, objId) { + if (this.evaluatorOptions.ignoreErrors) { + // Error(s) when reading one of the /Contents sub-streams -- sending + // unsupported feature notification and allow parsing to continue. + handler.send("UnsupportedFeature", { + featureId: UNSUPPORTED_FEATURES.errorContentSubStream, + }); + warn(`getContentStream - ignoring sub-stream (${objId}): "${reason}".`); + return; + } + throw reason; + } + /** * @returns {Promise} */ - getContentStream() { + getContentStream(handler) { return this.pdfManager.ensure(this, "content").then(content => { if (content instanceof BaseStream) { return content; } if (Array.isArray(content)) { - return new StreamsSequenceStream(content); + return new StreamsSequenceStream( + content, + this._onSubStreamError.bind(this, handler) + ); } // Replace non-existent page content with empty content. return new NullStream(); @@ -307,7 +327,7 @@ class Page { renderInteractiveForms, annotationStorage, }) { - const contentStreamPromise = this.getContentStream(); + const contentStreamPromise = this.getContentStream(handler); const resourcesPromise = this.loadResources([ "ColorSpace", "ExtGState", @@ -417,7 +437,7 @@ class Page { sink, combineTextItems, }) { - const contentStreamPromise = this.getContentStream(); + const contentStreamPromise = this.getContentStream(handler); const resourcesPromise = this.loadResources([ "ExtGState", "Font", diff --git a/src/shared/util.js b/src/shared/util.js index f1a373acb..49970f832 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -335,6 +335,7 @@ const UNSUPPORTED_FEATURES = { errorFontBuildPath: "errorFontBuildPath", errorFontGetPath: "errorFontGetPath", errorMarkedContent: "errorMarkedContent", + errorContentSubStream: "errorContentSubStream", }; const PasswordResponses = { diff --git a/test/pdfs/issue13794.pdf.link b/test/pdfs/issue13794.pdf.link new file mode 100644 index 000000000..5d25e88d4 --- /dev/null +++ b/test/pdfs/issue13794.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/6876708/Scan-to-Mail-PDF1_.1.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index e28e30361..e84f05ade 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1391,6 +1391,14 @@ "lastPage": 7, "type": "eq" }, + { "id": "issue13794", + "file": "pdfs/issue13794.pdf", + "md5": "6b4c099e04c9df145198740f2bf75c48", + "link": true, + "rounds": 1, + "firstPage": 3, + "type": "eq" + }, { "id": "issue9262", "file": "pdfs/issue9262_reduced.pdf", "md5": "5347ce2d7b3866625c22e115fd90e0de",