Initialize the dictionary *lazily* when parsing inline images

This helps improve performance for some PDF documents with a huge number of inline images, e.g. the PDF document from issue 2618.
Given that we no longer create `Stream`-instances unconditionally, we also don't need `Dict`-instances for cached inline images (since we only access the filter).
This commit is contained in:
Jonas Jenwald 2022-11-10 14:04:10 +01:00
parent b46e0d61cf
commit 7abb6429b0

View File

@ -480,8 +480,9 @@ class Parser {
const lexer = this.lexer;
const stream = lexer.stream;
// Parse dictionary.
const dict = new Dict(this.xref);
// Parse dictionary, but initialize it lazily to improve performance with
// cached inline images (see issue 2618).
const dictMap = Object.create(null);
let dictLength;
while (!isCmd(this.buf1, "ID") && this.buf1 !== EOF) {
if (!(this.buf1 instanceof Name)) {
@ -492,14 +493,14 @@ class Parser {
if (this.buf1 === EOF) {
break;
}
dict.set(key, this.getObj(cipherTransform));
dictMap[key] = this.getObj(cipherTransform);
}
if (lexer.beginInlineImagePos !== -1) {
dictLength = stream.pos - lexer.beginInlineImagePos;
}
// Extract the name of the first (i.e. the current) image filter.
const filter = dict.get("F", "Filter");
const filter = this.xref.fetchIfRef(dictMap.F || dictMap.Filter);
let filterName;
if (filter instanceof Name) {
filterName = filter.name;
@ -552,6 +553,10 @@ class Parser {
}
}
const dict = new Dict(this.xref);
for (const key in dictMap) {
dict.set(key, dictMap[key]);
}
let imageStream = stream.makeSubStream(startPos, length, dict);
if (cipherTransform) {
imageStream = cipherTransform.createStream(imageStream, length);