Allow StreamsSequenceStream.readBlock to skip sub-streams with errors (issue 13794)

This patch makes use of the existing `ignoreErrors` option, thus allowing a page to continue parsing/rendering even if (some of) its sub-streams are corrupt. Obviously this may cause *part* of a page to be broken/missing, however it should be better than (potentially) rendering nothing.
Also, to the best of my knowledge, this is the first bug of its kind that we've encountered.

To avoid having to pass in a bunch of, for a `BaseStream`-instance, mostly unrelated parameters when initializing a `StreamsSequenceStream`-instance, I settled on utilizing a callback function instead to allow conditional Error-suppression.
Note that the `StreamsSequenceStream`-class is a *special* stream-implementation that we only use when the `/Contents`-entry, in the `/Page`-dictionary, consists of an Array with streams.
This commit is contained in:
Jonas Jenwald 2021-07-26 15:34:14 +02:00
parent 777d890268
commit 885e7a8aa4
5 changed files with 46 additions and 6 deletions

View File

@ -127,7 +127,7 @@ class DecodeStream extends BaseStream {
}
class StreamsSequenceStream extends DecodeStream {
constructor(streams) {
constructor(streams, onError = null) {
let maybeLength = 0;
for (const stream of streams) {
maybeLength +=
@ -138,6 +138,7 @@ class StreamsSequenceStream extends DecodeStream {
super(maybeLength);
this.streams = streams;
this._onError = onError;
}
readBlock() {
@ -147,7 +148,16 @@ class StreamsSequenceStream extends DecodeStream {
return;
}
const stream = streams.shift();
const chunk = stream.getBytes();
let chunk;
try {
chunk = stream.getBytes();
} catch (reason) {
if (this._onError) {
this._onError(reason, stream.dict && stream.dict.objId);
return;
}
throw reason;
}
const bufferLength = this.bufferLength;
const newLength = bufferLength + chunk.length;
const buffer = this.ensureBuffer(newLength);

View File

@ -30,6 +30,7 @@ import {
stringToPDFString,
stringToUTF8String,
unreachable,
UNSUPPORTED_FEATURES,
Util,
warn,
} from "../shared/util.js";
@ -225,16 +226,35 @@ class Page {
return shadow(this, "rotate", rotate);
}
/**
* @private
*/
_onSubStreamError(handler, reason, objId) {
if (this.evaluatorOptions.ignoreErrors) {
// Error(s) when reading one of the /Contents sub-streams -- sending
// unsupported feature notification and allow parsing to continue.
handler.send("UnsupportedFeature", {
featureId: UNSUPPORTED_FEATURES.errorContentSubStream,
});
warn(`getContentStream - ignoring sub-stream (${objId}): "${reason}".`);
return;
}
throw reason;
}
/**
* @returns {Promise<BaseStream>}
*/
getContentStream() {
getContentStream(handler) {
return this.pdfManager.ensure(this, "content").then(content => {
if (content instanceof BaseStream) {
return content;
}
if (Array.isArray(content)) {
return new StreamsSequenceStream(content);
return new StreamsSequenceStream(
content,
this._onSubStreamError.bind(this, handler)
);
}
// Replace non-existent page content with empty content.
return new NullStream();
@ -307,7 +327,7 @@ class Page {
renderInteractiveForms,
annotationStorage,
}) {
const contentStreamPromise = this.getContentStream();
const contentStreamPromise = this.getContentStream(handler);
const resourcesPromise = this.loadResources([
"ColorSpace",
"ExtGState",
@ -417,7 +437,7 @@ class Page {
sink,
combineTextItems,
}) {
const contentStreamPromise = this.getContentStream();
const contentStreamPromise = this.getContentStream(handler);
const resourcesPromise = this.loadResources([
"ExtGState",
"Font",

View File

@ -335,6 +335,7 @@ const UNSUPPORTED_FEATURES = {
errorFontBuildPath: "errorFontBuildPath",
errorFontGetPath: "errorFontGetPath",
errorMarkedContent: "errorMarkedContent",
errorContentSubStream: "errorContentSubStream",
};
const PasswordResponses = {

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/6876708/Scan-to-Mail-PDF1_.1.pdf

View File

@ -1391,6 +1391,14 @@
"lastPage": 7,
"type": "eq"
},
{ "id": "issue13794",
"file": "pdfs/issue13794.pdf",
"md5": "6b4c099e04c9df145198740f2bf75c48",
"link": true,
"rounds": 1,
"firstPage": 3,
"type": "eq"
},
{ "id": "issue9262",
"file": "pdfs/issue9262_reduced.pdf",
"md5": "5347ce2d7b3866625c22e115fd90e0de",