diff --git a/src/core/parser.js b/src/core/parser.js index 088a26c6e..25a101f68 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -40,27 +40,23 @@ import { PredictorStream } from "./predictor_stream.js"; import { RunLengthStream } from "./run_length_stream.js"; const MAX_LENGTH_TO_CACHE = 1000; -const MAX_ADLER32_LENGTH = 5552; -function computeAdler32(bytes) { - const bytesLength = bytes.length; - if ( - typeof PDFJSDev === "undefined" || - PDFJSDev.test("!PRODUCTION || TESTING") - ) { - assert( - bytesLength < MAX_ADLER32_LENGTH, - 'computeAdler32: Unsupported "bytes" length.' - ); +function getInlineImageCacheKey(bytes) { + const strBuf = [], + ii = bytes.length; + let i = 0; + while (i < ii - 1) { + strBuf.push((bytes[i++] << 8) | bytes[i++]); } - let a = 1, - b = 0; - for (let i = 0; i < bytesLength; ++i) { - // No modulo required in the loop if `bytesLength < 5552`. - a += bytes[i] & 0xff; - b += a; + // Handle an odd number of elements. + if (i < ii) { + strBuf.push(bytes[i]); } - return (b % 65521 << 16) | a % 65521; + // We purposely include the "raw" length in the cacheKey, to prevent any + // possible issues with hash collisions in the inline image cache. + // Here we also assume that `strBuf` is never larger than 8192 elements, + // please refer to the `bytesToString` implementation. + return ii + "_" + String.fromCharCode.apply(null, strBuf); } class Parser { @@ -71,6 +67,7 @@ class Parser { this.recoveryMode = recoveryMode; this.imageCache = Object.create(null); + this._imageId = 0; this.refill(); } @@ -532,25 +529,19 @@ class Parser { default: length = this.findDefaultInlineStreamEnd(stream); } - let imageStream = stream.makeSubStream(startPos, length, dict); // Cache all images below the MAX_LENGTH_TO_CACHE threshold by their - // adler32 checksum. + // stringified content, to prevent possible hash collisions. let cacheKey; - if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) { - const imageBytes = imageStream.getBytes(); - imageStream.reset(); - + if (length < MAX_LENGTH_TO_CACHE && dictLength > 0) { const initialStreamPos = stream.pos; // Set the stream position to the beginning of the dictionary data... stream.pos = lexer.beginInlineImagePos; - // ... and fetch the bytes of the *entire* dictionary. - const dictBytes = stream.getBytes(dictLength); + // ... and fetch the bytes of the dictionary *and* the inline image. + cacheKey = getInlineImageCacheKey(stream.getBytes(dictLength + length)); // Finally, don't forget to reset the stream position. stream.pos = initialStreamPos; - cacheKey = computeAdler32(imageBytes) + "_" + computeAdler32(dictBytes); - const cacheEntry = this.imageCache[cacheKey]; if (cacheEntry !== undefined) { this.buf2 = Cmd.get("EI"); @@ -561,6 +552,7 @@ class Parser { } } + let imageStream = stream.makeSubStream(startPos, length, dict); if (cipherTransform) { imageStream = cipherTransform.createStream(imageStream, length); } @@ -568,7 +560,7 @@ class Parser { imageStream = this.filter(imageStream, dict, length); imageStream.dict = dict; if (cacheKey !== undefined) { - imageStream.cacheKey = `inline_${length}_${cacheKey}`; + imageStream.cacheKey = `inline_img_${++this._imageId}`; this.imageCache[cacheKey] = imageStream; } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index c0ef6527d..86fb8fc99 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -534,6 +534,7 @@ !issue14415.pdf !issue14307.pdf !issue14497.pdf +!bug1799927.pdf !issue14502.pdf !issue13211.pdf !issue14627.pdf diff --git a/test/pdfs/bug1799927.pdf b/test/pdfs/bug1799927.pdf new file mode 100644 index 000000000..2605ee54e Binary files /dev/null and b/test/pdfs/bug1799927.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 9a66ad1cf..73493c19f 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -5921,6 +5921,12 @@ "annotations": true, "type": "eq" }, + { "id": "bug1799927", + "file": "pdfs/bug1799927.pdf", + "md5": "e6ad013c24e58e5b40c3bae50f04c8e8", + "rounds": 1, + "type": "eq" + }, { "id": "annotation-line-without-appearance-empty-Rect", "file": "pdfs/annotation-line-without-appearance-empty-Rect.pdf", "md5": "65f2d3ef80acfea637718c3fc66043b7",