Attempt to cache repeated images at the document, rather than the page, level (issue 11878)
Currently image resources, as opposed to e.g. font resources, are handled exclusively on a page-specific basis. Generally speaking this makes sense, since pages are separate from each other, however there's PDF documents where many (or even all) pages actually references exactly the same image resources (through the XRef table). Hence, in some cases, we're decoding the *same* images over and over for every page which is obviously slow and wasting both CPU and memory resources better used elsewhere.[1] Obviously we cannot simply treat all image resources as-if they're used throughout the entire PDF document, since that would end up increasing memory usage too much.[2] However, by introducing a `GlobalImageCache` in the worker we can track image resources that appear on more than one page. Hence we can switch image resources from being page-specific to being document-specific, once the image resource has been seen on more than a certain number of pages. In many cases, such as e.g. the referenced issue, this patch will thus lead to reduced memory usage for image resources. Scrolling through all pages of the document, there's now only a few main-thread copies of the same image data, as opposed to one for each rendered page (i.e. there could theoretically be *twenty* copies of the image data). While this obviously benefit both CPU and memory usage in this case, for *very* large image data this patch *may* possibly increase persistent main-thread memory usage a tiny bit. Thus to avoid negatively affecting memory usage too much in general, particularly on the main-thread, the `GlobalImageCache` will *only* cache a certain number of image resources at the document level and simply fallback to the default behaviour. Unfortunately the asynchronous nature of the code, with ranged/streamed loading of data, actually makes all of this much more complicated than if all data could be assumed to be immediately available.[3] *Please note:* The patch will lead to *small* movement in some existing test-cases, since we're now using the built-in PDF.js JPEG decoder more. This was done in order to simplify the overall implementation, especially on the main-thread, by limiting it to only the `OPS.paintImageXObject` operator. --- [1] There's e.g. PDF documents that use the same image as background on all pages. [2] Given that data stored in the `commonObjs`, on the main-thread, are only cleared manually through `PDFDocumentProxy.cleanup`. This as opposed to data stored in the `objs` of each page, which is automatically removed when the page is cleaned-up e.g. by being evicted from the cache in the default viewer. [3] If the latter case were true, we could simply check for repeat images *before* parsing started and thus avoid handling *any* duplicate image resources.
This commit is contained in:
parent
604a6f96aa
commit
dda6626f40
@ -74,6 +74,7 @@ class Page {
|
||||
ref,
|
||||
fontCache,
|
||||
builtInCMapCache,
|
||||
globalImageCache,
|
||||
pdfFunctionFactory,
|
||||
}) {
|
||||
this.pdfManager = pdfManager;
|
||||
@ -83,6 +84,7 @@ class Page {
|
||||
this.ref = ref;
|
||||
this.fontCache = fontCache;
|
||||
this.builtInCMapCache = builtInCMapCache;
|
||||
this.globalImageCache = globalImageCache;
|
||||
this.pdfFunctionFactory = pdfFunctionFactory;
|
||||
this.evaluatorOptions = pdfManager.evaluatorOptions;
|
||||
this.resourcesPromise = null;
|
||||
@ -261,6 +263,7 @@ class Page {
|
||||
idFactory: this.idFactory,
|
||||
fontCache: this.fontCache,
|
||||
builtInCMapCache: this.builtInCMapCache,
|
||||
globalImageCache: this.globalImageCache,
|
||||
options: this.evaluatorOptions,
|
||||
pdfFunctionFactory: this.pdfFunctionFactory,
|
||||
});
|
||||
@ -354,6 +357,7 @@ class Page {
|
||||
idFactory: this.idFactory,
|
||||
fontCache: this.fontCache,
|
||||
builtInCMapCache: this.builtInCMapCache,
|
||||
globalImageCache: this.globalImageCache,
|
||||
options: this.evaluatorOptions,
|
||||
pdfFunctionFactory: this.pdfFunctionFactory,
|
||||
});
|
||||
@ -816,6 +820,7 @@ class PDFDocument {
|
||||
ref,
|
||||
fontCache: catalog.fontCache,
|
||||
builtInCMapCache: catalog.builtInCMapCache,
|
||||
globalImageCache: catalog.globalImageCache,
|
||||
pdfFunctionFactory: this.pdfFunctionFactory,
|
||||
});
|
||||
}));
|
||||
|
@ -105,6 +105,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
idFactory,
|
||||
fontCache,
|
||||
builtInCMapCache,
|
||||
globalImageCache,
|
||||
options = null,
|
||||
pdfFunctionFactory,
|
||||
}) {
|
||||
@ -114,6 +115,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
this.idFactory = idFactory;
|
||||
this.fontCache = fontCache;
|
||||
this.builtInCMapCache = builtInCMapCache;
|
||||
this.globalImageCache = globalImageCache;
|
||||
this.options = options || DefaultPartialEvaluatorOptions;
|
||||
this.pdfFunctionFactory = pdfFunctionFactory;
|
||||
this.parsingType3Font = false;
|
||||
@ -451,6 +453,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
forceDisableNativeImageDecoder = false,
|
||||
}) {
|
||||
var dict = image.dict;
|
||||
const imageRef = dict.objId;
|
||||
var w = dict.get("Width", "W");
|
||||
var h = dict.get("Height", "H");
|
||||
|
||||
@ -528,12 +531,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const nativeImageDecoderSupport = forceDisableNativeImageDecoder
|
||||
let nativeImageDecoderSupport = forceDisableNativeImageDecoder
|
||||
? NativeImageDecoding.NONE
|
||||
: this.options.nativeImageDecoderSupport;
|
||||
// If there is no imageMask, create the PDFImage and a lot
|
||||
// of image processing can be done here.
|
||||
let objId = `img_${this.idFactory.createObjId()}`;
|
||||
let objId = `img_${this.idFactory.createObjId()}`,
|
||||
cacheGlobally = false;
|
||||
|
||||
if (this.parsingType3Font) {
|
||||
assert(
|
||||
@ -542,6 +546,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
);
|
||||
|
||||
objId = `${this.idFactory.getDocId()}_type3res_${objId}`;
|
||||
} else if (imageRef) {
|
||||
cacheGlobally = this.globalImageCache.shouldCache(
|
||||
imageRef,
|
||||
this.pageIndex
|
||||
);
|
||||
|
||||
if (cacheGlobally) {
|
||||
// Ensure that the image is *completely* decoded on the worker-thread,
|
||||
// in order to simplify the caching/rendering code on the main-thread.
|
||||
nativeImageDecoderSupport = NativeImageDecoding.NONE;
|
||||
|
||||
objId = `${this.idFactory.getDocId()}_${objId}`;
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
@ -566,7 +583,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
image.getIR(this.options.forceDataSchema),
|
||||
])
|
||||
.then(
|
||||
function () {
|
||||
() => {
|
||||
// Only add the dependency once we know that the native JPEG
|
||||
// decoding succeeded, to ensure that rendering will always
|
||||
// complete.
|
||||
@ -579,6 +596,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
fn: OPS.paintJpegXObject,
|
||||
args,
|
||||
};
|
||||
|
||||
if (imageRef) {
|
||||
this.globalImageCache.addPageIndex(imageRef, this.pageIndex);
|
||||
}
|
||||
}
|
||||
},
|
||||
reason => {
|
||||
@ -639,6 +660,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
[objId, "FontType3Res", imgData],
|
||||
[imgData.data.buffer]
|
||||
);
|
||||
} else if (cacheGlobally) {
|
||||
this.handler.send(
|
||||
"commonobj",
|
||||
[objId, "Image", imgData],
|
||||
[imgData.data.buffer]
|
||||
);
|
||||
return undefined;
|
||||
}
|
||||
this.handler.send(
|
||||
"obj",
|
||||
@ -656,6 +684,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
"FontType3Res",
|
||||
null,
|
||||
]);
|
||||
} else if (cacheGlobally) {
|
||||
this.handler.send("commonobj", [objId, "Image", null]);
|
||||
return undefined;
|
||||
}
|
||||
this.handler.send("obj", [objId, this.pageIndex, "Image", null]);
|
||||
return undefined;
|
||||
@ -674,6 +705,18 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
fn: OPS.paintImageXObject,
|
||||
args,
|
||||
};
|
||||
|
||||
if (imageRef) {
|
||||
this.globalImageCache.addPageIndex(imageRef, this.pageIndex);
|
||||
|
||||
if (cacheGlobally) {
|
||||
this.globalImageCache.setData(imageRef, {
|
||||
objId,
|
||||
fn: OPS.paintImageXObject,
|
||||
args,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
},
|
||||
@ -1322,7 +1365,23 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
);
|
||||
}
|
||||
|
||||
const xobj = xobjs.get(name);
|
||||
let xobj = xobjs.getRaw(name);
|
||||
if (xobj instanceof Ref) {
|
||||
const globalImage = self.globalImageCache.getData(
|
||||
xobj,
|
||||
self.pageIndex
|
||||
);
|
||||
|
||||
if (globalImage) {
|
||||
operatorList.addDependency(globalImage.objId);
|
||||
operatorList.addOp(globalImage.fn, globalImage.args);
|
||||
|
||||
resolveXObject();
|
||||
return;
|
||||
}
|
||||
xobj = xref.fetch(xobj);
|
||||
}
|
||||
|
||||
if (!xobj) {
|
||||
operatorList.addOp(fn, args);
|
||||
resolveXObject();
|
||||
|
@ -14,8 +14,10 @@
|
||||
*/
|
||||
/* eslint no-var: error */
|
||||
|
||||
import { assert, info, shadow } from "../shared/util.js";
|
||||
import { ColorSpace } from "./colorspace.js";
|
||||
import { JpegStream } from "./jpeg_stream.js";
|
||||
import { RefSetCache } from "./primitives.js";
|
||||
import { Stream } from "./stream.js";
|
||||
|
||||
class NativeImageDecoder {
|
||||
@ -111,4 +113,96 @@ class NativeImageDecoder {
|
||||
}
|
||||
}
|
||||
|
||||
export { NativeImageDecoder };
|
||||
class GlobalImageCache {
|
||||
static get NUM_PAGES_THRESHOLD() {
|
||||
return shadow(this, "NUM_PAGES_THRESHOLD", 2);
|
||||
}
|
||||
|
||||
static get MAX_IMAGES_TO_CACHE() {
|
||||
return shadow(this, "MAX_IMAGES_TO_CACHE", 10);
|
||||
}
|
||||
|
||||
constructor() {
|
||||
if (
|
||||
typeof PDFJSDev === "undefined" ||
|
||||
PDFJSDev.test("!PRODUCTION || TESTING")
|
||||
) {
|
||||
assert(
|
||||
GlobalImageCache.NUM_PAGES_THRESHOLD > 1,
|
||||
"GlobalImageCache - invalid NUM_PAGES_THRESHOLD constant."
|
||||
);
|
||||
}
|
||||
this._refCache = new RefSetCache();
|
||||
this._imageCache = new RefSetCache();
|
||||
}
|
||||
|
||||
shouldCache(ref, pageIndex) {
|
||||
const pageIndexSet = this._refCache.get(ref);
|
||||
const numPages = pageIndexSet
|
||||
? pageIndexSet.size + (pageIndexSet.has(pageIndex) ? 0 : 1)
|
||||
: 1;
|
||||
|
||||
if (numPages < GlobalImageCache.NUM_PAGES_THRESHOLD) {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
!this._imageCache.has(ref) &&
|
||||
this._imageCache.size >= GlobalImageCache.MAX_IMAGES_TO_CACHE
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
addPageIndex(ref, pageIndex) {
|
||||
let pageIndexSet = this._refCache.get(ref);
|
||||
if (!pageIndexSet) {
|
||||
pageIndexSet = new Set();
|
||||
this._refCache.put(ref, pageIndexSet);
|
||||
}
|
||||
pageIndexSet.add(pageIndex);
|
||||
}
|
||||
|
||||
getData(ref, pageIndex) {
|
||||
if (!this._refCache.has(ref)) {
|
||||
return null;
|
||||
}
|
||||
const pageIndexSet = this._refCache.get(ref);
|
||||
|
||||
if (pageIndexSet.size < GlobalImageCache.NUM_PAGES_THRESHOLD) {
|
||||
return null;
|
||||
}
|
||||
if (!this._imageCache.has(ref)) {
|
||||
return null;
|
||||
}
|
||||
// Ensure that we keep track of all pages containing the image reference.
|
||||
pageIndexSet.add(pageIndex);
|
||||
|
||||
return this._imageCache.get(ref);
|
||||
}
|
||||
|
||||
setData(ref, data) {
|
||||
if (!this._refCache.has(ref)) {
|
||||
throw new Error(
|
||||
'GlobalImageCache.setData - expected "addPageIndex" to have been called.'
|
||||
);
|
||||
}
|
||||
if (this._imageCache.has(ref)) {
|
||||
return;
|
||||
}
|
||||
if (this._imageCache.size >= GlobalImageCache.MAX_IMAGES_TO_CACHE) {
|
||||
info(
|
||||
"GlobalImageCache.setData - ignoring image above MAX_IMAGES_TO_CACHE."
|
||||
);
|
||||
return;
|
||||
}
|
||||
this._imageCache.put(ref, data);
|
||||
}
|
||||
|
||||
clear() {
|
||||
this._refCache.clear();
|
||||
this._imageCache.clear();
|
||||
}
|
||||
}
|
||||
|
||||
export { NativeImageDecoder, GlobalImageCache };
|
||||
|
@ -54,6 +54,7 @@ import {
|
||||
} from "./core_utils.js";
|
||||
import { CipherTransformFactory } from "./crypto.js";
|
||||
import { ColorSpace } from "./colorspace.js";
|
||||
import { GlobalImageCache } from "./image_utils.js";
|
||||
|
||||
function fetchDestination(dest) {
|
||||
return isDict(dest) ? dest.get("D") : dest;
|
||||
@ -71,6 +72,7 @@ class Catalog {
|
||||
|
||||
this.fontCache = new RefSetCache();
|
||||
this.builtInCMapCache = new Map();
|
||||
this.globalImageCache = new GlobalImageCache();
|
||||
this.pageKidsCountCache = new RefSetCache();
|
||||
}
|
||||
|
||||
@ -716,6 +718,7 @@ class Catalog {
|
||||
|
||||
cleanup() {
|
||||
clearPrimitiveCaches();
|
||||
this.globalImageCache.clear();
|
||||
this.pageKidsCountCache.clear();
|
||||
|
||||
const promises = [];
|
||||
|
@ -251,6 +251,10 @@ var RefSetCache = (function RefSetCacheClosure() {
|
||||
}
|
||||
|
||||
RefSetCache.prototype = {
|
||||
get size() {
|
||||
return Object.keys(this.dict).length;
|
||||
},
|
||||
|
||||
get: function RefSetCache_get(ref) {
|
||||
return this.dict[ref.toString()];
|
||||
},
|
||||
|
@ -2288,6 +2288,7 @@ class WorkerTransport {
|
||||
break;
|
||||
case "FontPath":
|
||||
case "FontType3Res":
|
||||
case "Image":
|
||||
this.commonObjs.resolve(id, exportedData);
|
||||
break;
|
||||
default:
|
||||
|
@ -2114,7 +2114,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
|
||||
},
|
||||
|
||||
paintJpegXObject: function CanvasGraphics_paintJpegXObject(objId, w, h) {
|
||||
const domImage = this.processingType3
|
||||
const domImage = objId.startsWith("g_")
|
||||
? this.commonObjs.get(objId)
|
||||
: this.objs.get(objId);
|
||||
if (!domImage) {
|
||||
@ -2277,7 +2277,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
|
||||
},
|
||||
|
||||
paintImageXObject: function CanvasGraphics_paintImageXObject(objId) {
|
||||
const imgData = this.processingType3
|
||||
const imgData = objId.startsWith("g_")
|
||||
? this.commonObjs.get(objId)
|
||||
: this.objs.get(objId);
|
||||
if (!imgData) {
|
||||
@ -2294,7 +2294,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
|
||||
scaleY,
|
||||
positions
|
||||
) {
|
||||
const imgData = this.processingType3
|
||||
const imgData = objId.startsWith("g_")
|
||||
? this.commonObjs.get(objId)
|
||||
: this.objs.get(objId);
|
||||
if (!imgData) {
|
||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -90,6 +90,7 @@
|
||||
!issue11362.pdf
|
||||
!issue11578_reduced.pdf
|
||||
!issue11651.pdf
|
||||
!issue11878.pdf
|
||||
!bad-PageLabels.pdf
|
||||
!decodeACSuccessive.pdf
|
||||
!filled-background.pdf
|
||||
|
BIN
test/pdfs/issue11878.pdf
Normal file
BIN
test/pdfs/issue11878.pdf
Normal file
Binary file not shown.
@ -23,6 +23,7 @@ import {
|
||||
import {
|
||||
createPromiseCapability,
|
||||
FontType,
|
||||
ImageKind,
|
||||
InvalidPDFException,
|
||||
MissingPDFException,
|
||||
OPS,
|
||||
@ -44,6 +45,7 @@ import {
|
||||
PDFWorker,
|
||||
} from "../../src/display/api.js";
|
||||
import { AutoPrintRegExp } from "../../web/ui_utils.js";
|
||||
import { GlobalImageCache } from "../../src/core/image_utils.js";
|
||||
import { GlobalWorkerOptions } from "../../src/display/worker_options.js";
|
||||
import { isNodeJS } from "../../src/shared/is_node.js";
|
||||
import { Metadata } from "../../src/display/metadata.js";
|
||||
@ -1928,6 +1930,80 @@ describe("api", function () {
|
||||
})
|
||||
.catch(done.fail);
|
||||
});
|
||||
|
||||
it("caches image resources at the document/page level as expected (issue 11878)", async function (done) {
|
||||
const { NUM_PAGES_THRESHOLD } = GlobalImageCache,
|
||||
EXPECTED_WIDTH = 2550,
|
||||
EXPECTED_HEIGHT = 3300;
|
||||
|
||||
const loadingTask = getDocument(buildGetDocumentParams("issue11878.pdf"));
|
||||
let firstImgData = null;
|
||||
|
||||
try {
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
|
||||
for (let i = 1; i <= pdfDoc.numPages; i++) {
|
||||
const pdfPage = await pdfDoc.getPage(i);
|
||||
const opList = await pdfPage.getOperatorList();
|
||||
|
||||
const { commonObjs, objs } = pdfPage;
|
||||
const imgIndex = opList.fnArray.indexOf(OPS.paintImageXObject);
|
||||
const [objId, width, height] = opList.argsArray[imgIndex];
|
||||
|
||||
if (i < NUM_PAGES_THRESHOLD) {
|
||||
expect(objId).toEqual(`img_p${i - 1}_1`);
|
||||
|
||||
expect(objs.has(objId)).toEqual(true);
|
||||
expect(commonObjs.has(objId)).toEqual(false);
|
||||
} else {
|
||||
expect(objId).toEqual(
|
||||
`g_${loadingTask.docId}_img_p${NUM_PAGES_THRESHOLD - 1}_1`
|
||||
);
|
||||
|
||||
expect(objs.has(objId)).toEqual(false);
|
||||
expect(commonObjs.has(objId)).toEqual(true);
|
||||
}
|
||||
expect(width).toEqual(EXPECTED_WIDTH);
|
||||
expect(height).toEqual(EXPECTED_HEIGHT);
|
||||
|
||||
// Ensure that the actual image data is identical for all pages.
|
||||
if (i === 1) {
|
||||
firstImgData = objs.get(objId);
|
||||
|
||||
expect(firstImgData.width).toEqual(EXPECTED_WIDTH);
|
||||
expect(firstImgData.height).toEqual(EXPECTED_HEIGHT);
|
||||
|
||||
expect(firstImgData.kind).toEqual(ImageKind.RGB_24BPP);
|
||||
expect(firstImgData.data instanceof Uint8ClampedArray).toEqual(
|
||||
true
|
||||
);
|
||||
expect(firstImgData.data.length).toEqual(25245000);
|
||||
} else {
|
||||
const objsPool = i >= NUM_PAGES_THRESHOLD ? commonObjs : objs;
|
||||
const currentImgData = objsPool.get(objId);
|
||||
|
||||
expect(currentImgData.width).toEqual(firstImgData.width);
|
||||
expect(currentImgData.height).toEqual(firstImgData.height);
|
||||
|
||||
expect(currentImgData.kind).toEqual(firstImgData.kind);
|
||||
expect(currentImgData.data instanceof Uint8ClampedArray).toEqual(
|
||||
true
|
||||
);
|
||||
expect(
|
||||
currentImgData.data.every((value, index) => {
|
||||
return value === firstImgData.data[index];
|
||||
})
|
||||
).toEqual(true);
|
||||
}
|
||||
}
|
||||
|
||||
await loadingTask.destroy();
|
||||
firstImgData = null;
|
||||
done();
|
||||
} catch (ex) {
|
||||
done.fail(ex);
|
||||
}
|
||||
});
|
||||
});
|
||||
describe("Multiple `getDocument` instances", function () {
|
||||
// Regression test for https://github.com/mozilla/pdf.js/issues/6205
|
||||
|
Loading…
Reference in New Issue
Block a user