Merge pull request #15679 from Snuffleupagus/bug-1799927-2

Use the *full* inline image as the cacheKey in `Parser.makeInlineImage` (bug 1799927)
This commit is contained in:
Jonas Jenwald 2022-11-10 22:54:48 +01:00 committed by GitHub
commit 595711bd7c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 41 additions and 36 deletions

View File

@ -40,27 +40,23 @@ import { PredictorStream } from "./predictor_stream.js";
import { RunLengthStream } from "./run_length_stream.js"; import { RunLengthStream } from "./run_length_stream.js";
const MAX_LENGTH_TO_CACHE = 1000; const MAX_LENGTH_TO_CACHE = 1000;
const MAX_ADLER32_LENGTH = 5552;
function computeAdler32(bytes) { function getInlineImageCacheKey(bytes) {
const bytesLength = bytes.length; const strBuf = [],
if ( ii = bytes.length;
typeof PDFJSDev === "undefined" || let i = 0;
PDFJSDev.test("!PRODUCTION || TESTING") while (i < ii - 1) {
) { strBuf.push((bytes[i++] << 8) | bytes[i++]);
assert(
bytesLength < MAX_ADLER32_LENGTH,
'computeAdler32: Unsupported "bytes" length.'
);
} }
let a = 1, // Handle an odd number of elements.
b = 0; if (i < ii) {
for (let i = 0; i < bytesLength; ++i) { strBuf.push(bytes[i]);
// No modulo required in the loop if `bytesLength < 5552`.
a += bytes[i] & 0xff;
b += a;
} }
return (b % 65521 << 16) | a % 65521; // We purposely include the "raw" length in the cacheKey, to prevent any
// possible issues with hash collisions in the inline image cache.
// Here we also assume that `strBuf` is never larger than 8192 elements,
// please refer to the `bytesToString` implementation.
return ii + "_" + String.fromCharCode.apply(null, strBuf);
} }
class Parser { class Parser {
@ -71,6 +67,7 @@ class Parser {
this.recoveryMode = recoveryMode; this.recoveryMode = recoveryMode;
this.imageCache = Object.create(null); this.imageCache = Object.create(null);
this._imageId = 0;
this.refill(); this.refill();
} }
@ -483,8 +480,9 @@ class Parser {
const lexer = this.lexer; const lexer = this.lexer;
const stream = lexer.stream; const stream = lexer.stream;
// Parse dictionary. // Parse dictionary, but initialize it lazily to improve performance with
const dict = new Dict(this.xref); // cached inline images (see issue 2618).
const dictMap = Object.create(null);
let dictLength; let dictLength;
while (!isCmd(this.buf1, "ID") && this.buf1 !== EOF) { while (!isCmd(this.buf1, "ID") && this.buf1 !== EOF) {
if (!(this.buf1 instanceof Name)) { if (!(this.buf1 instanceof Name)) {
@ -495,14 +493,14 @@ class Parser {
if (this.buf1 === EOF) { if (this.buf1 === EOF) {
break; break;
} }
dict.set(key, this.getObj(cipherTransform)); dictMap[key] = this.getObj(cipherTransform);
} }
if (lexer.beginInlineImagePos !== -1) { if (lexer.beginInlineImagePos !== -1) {
dictLength = stream.pos - lexer.beginInlineImagePos; dictLength = stream.pos - lexer.beginInlineImagePos;
} }
// Extract the name of the first (i.e. the current) image filter. // Extract the name of the first (i.e. the current) image filter.
const filter = dict.get("F", "Filter"); const filter = this.xref.fetchIfRef(dictMap.F || dictMap.Filter);
let filterName; let filterName;
if (filter instanceof Name) { if (filter instanceof Name) {
filterName = filter.name; filterName = filter.name;
@ -532,25 +530,19 @@ class Parser {
default: default:
length = this.findDefaultInlineStreamEnd(stream); length = this.findDefaultInlineStreamEnd(stream);
} }
let imageStream = stream.makeSubStream(startPos, length, dict);
// Cache all images below the MAX_LENGTH_TO_CACHE threshold by their // Cache all images below the MAX_LENGTH_TO_CACHE threshold by their
// adler32 checksum. // stringified content, to prevent possible hash collisions.
let cacheKey; let cacheKey;
if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) { if (length < MAX_LENGTH_TO_CACHE && dictLength > 0) {
const imageBytes = imageStream.getBytes();
imageStream.reset();
const initialStreamPos = stream.pos; const initialStreamPos = stream.pos;
// Set the stream position to the beginning of the dictionary data... // Set the stream position to the beginning of the dictionary data...
stream.pos = lexer.beginInlineImagePos; stream.pos = lexer.beginInlineImagePos;
// ... and fetch the bytes of the *entire* dictionary. // ... and fetch the bytes of the dictionary *and* the inline image.
const dictBytes = stream.getBytes(dictLength); cacheKey = getInlineImageCacheKey(stream.getBytes(dictLength + length));
// Finally, don't forget to reset the stream position. // Finally, don't forget to reset the stream position.
stream.pos = initialStreamPos; stream.pos = initialStreamPos;
cacheKey = computeAdler32(imageBytes) + "_" + computeAdler32(dictBytes);
const cacheEntry = this.imageCache[cacheKey]; const cacheEntry = this.imageCache[cacheKey];
if (cacheEntry !== undefined) { if (cacheEntry !== undefined) {
this.buf2 = Cmd.get("EI"); this.buf2 = Cmd.get("EI");
@ -561,6 +553,11 @@ class Parser {
} }
} }
const dict = new Dict(this.xref);
for (const key in dictMap) {
dict.set(key, dictMap[key]);
}
let imageStream = stream.makeSubStream(startPos, length, dict);
if (cipherTransform) { if (cipherTransform) {
imageStream = cipherTransform.createStream(imageStream, length); imageStream = cipherTransform.createStream(imageStream, length);
} }
@ -568,7 +565,7 @@ class Parser {
imageStream = this.filter(imageStream, dict, length); imageStream = this.filter(imageStream, dict, length);
imageStream.dict = dict; imageStream.dict = dict;
if (cacheKey !== undefined) { if (cacheKey !== undefined) {
imageStream.cacheKey = `inline_${length}_${cacheKey}`; imageStream.cacheKey = `inline_img_${++this._imageId}`;
this.imageCache[cacheKey] = imageStream; this.imageCache[cacheKey] = imageStream;
} }

View File

@ -130,9 +130,10 @@ class MurmurHash3_64 {
(((((h2 << 16) | (h1 >>> 16)) * 0xb9fe1a85) & MASK_HIGH) >>> 16); (((((h2 << 16) | (h1 >>> 16)) * 0xb9fe1a85) & MASK_HIGH) >>> 16);
h1 ^= h2 >>> 1; h1 ^= h2 >>> 1;
const hex1 = (h1 >>> 0).toString(16), return (
hex2 = (h2 >>> 0).toString(16); (h1 >>> 0).toString(16).padStart(8, "0") +
return hex1.padStart(8, "0") + hex2.padStart(8, "0"); (h2 >>> 0).toString(16).padStart(8, "0")
);
} }
} }

View File

@ -534,6 +534,7 @@
!issue14415.pdf !issue14415.pdf
!issue14307.pdf !issue14307.pdf
!issue14497.pdf !issue14497.pdf
!bug1799927.pdf
!issue14502.pdf !issue14502.pdf
!issue13211.pdf !issue13211.pdf
!issue14627.pdf !issue14627.pdf

BIN
test/pdfs/bug1799927.pdf Normal file

Binary file not shown.

View File

@ -5921,6 +5921,12 @@
"annotations": true, "annotations": true,
"type": "eq" "type": "eq"
}, },
{ "id": "bug1799927",
"file": "pdfs/bug1799927.pdf",
"md5": "e6ad013c24e58e5b40c3bae50f04c8e8",
"rounds": 1,
"type": "eq"
},
{ "id": "annotation-line-without-appearance-empty-Rect", { "id": "annotation-line-without-appearance-empty-Rect",
"file": "pdfs/annotation-line-without-appearance-empty-Rect.pdf", "file": "pdfs/annotation-line-without-appearance-empty-Rect.pdf",
"md5": "65f2d3ef80acfea637718c3fc66043b7", "md5": "65f2d3ef80acfea637718c3fc66043b7",