Merge pull request #11912 from Snuffleupagus/GlobalImageCache

Attempt to cache repeated images at the document, rather than the page, level (issue 11878)
This commit is contained in:
Tim van der Meij 2020-05-21 23:54:28 +02:00 committed by GitHub
commit 4a3a24b002
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 251 additions and 8 deletions

View File

@ -74,6 +74,7 @@ class Page {
ref, ref,
fontCache, fontCache,
builtInCMapCache, builtInCMapCache,
globalImageCache,
pdfFunctionFactory, pdfFunctionFactory,
}) { }) {
this.pdfManager = pdfManager; this.pdfManager = pdfManager;
@ -83,6 +84,7 @@ class Page {
this.ref = ref; this.ref = ref;
this.fontCache = fontCache; this.fontCache = fontCache;
this.builtInCMapCache = builtInCMapCache; this.builtInCMapCache = builtInCMapCache;
this.globalImageCache = globalImageCache;
this.pdfFunctionFactory = pdfFunctionFactory; this.pdfFunctionFactory = pdfFunctionFactory;
this.evaluatorOptions = pdfManager.evaluatorOptions; this.evaluatorOptions = pdfManager.evaluatorOptions;
this.resourcesPromise = null; this.resourcesPromise = null;
@ -261,6 +263,7 @@ class Page {
idFactory: this.idFactory, idFactory: this.idFactory,
fontCache: this.fontCache, fontCache: this.fontCache,
builtInCMapCache: this.builtInCMapCache, builtInCMapCache: this.builtInCMapCache,
globalImageCache: this.globalImageCache,
options: this.evaluatorOptions, options: this.evaluatorOptions,
pdfFunctionFactory: this.pdfFunctionFactory, pdfFunctionFactory: this.pdfFunctionFactory,
}); });
@ -354,6 +357,7 @@ class Page {
idFactory: this.idFactory, idFactory: this.idFactory,
fontCache: this.fontCache, fontCache: this.fontCache,
builtInCMapCache: this.builtInCMapCache, builtInCMapCache: this.builtInCMapCache,
globalImageCache: this.globalImageCache,
options: this.evaluatorOptions, options: this.evaluatorOptions,
pdfFunctionFactory: this.pdfFunctionFactory, pdfFunctionFactory: this.pdfFunctionFactory,
}); });
@ -816,6 +820,7 @@ class PDFDocument {
ref, ref,
fontCache: catalog.fontCache, fontCache: catalog.fontCache,
builtInCMapCache: catalog.builtInCMapCache, builtInCMapCache: catalog.builtInCMapCache,
globalImageCache: catalog.globalImageCache,
pdfFunctionFactory: this.pdfFunctionFactory, pdfFunctionFactory: this.pdfFunctionFactory,
}); });
})); }));

View File

@ -105,6 +105,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
idFactory, idFactory,
fontCache, fontCache,
builtInCMapCache, builtInCMapCache,
globalImageCache,
options = null, options = null,
pdfFunctionFactory, pdfFunctionFactory,
}) { }) {
@ -114,6 +115,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
this.idFactory = idFactory; this.idFactory = idFactory;
this.fontCache = fontCache; this.fontCache = fontCache;
this.builtInCMapCache = builtInCMapCache; this.builtInCMapCache = builtInCMapCache;
this.globalImageCache = globalImageCache;
this.options = options || DefaultPartialEvaluatorOptions; this.options = options || DefaultPartialEvaluatorOptions;
this.pdfFunctionFactory = pdfFunctionFactory; this.pdfFunctionFactory = pdfFunctionFactory;
this.parsingType3Font = false; this.parsingType3Font = false;
@ -451,6 +453,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
forceDisableNativeImageDecoder = false, forceDisableNativeImageDecoder = false,
}) { }) {
var dict = image.dict; var dict = image.dict;
const imageRef = dict.objId;
var w = dict.get("Width", "W"); var w = dict.get("Width", "W");
var h = dict.get("Height", "H"); var h = dict.get("Height", "H");
@ -528,12 +531,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return undefined; return undefined;
} }
const nativeImageDecoderSupport = forceDisableNativeImageDecoder let nativeImageDecoderSupport = forceDisableNativeImageDecoder
? NativeImageDecoding.NONE ? NativeImageDecoding.NONE
: this.options.nativeImageDecoderSupport; : this.options.nativeImageDecoderSupport;
// If there is no imageMask, create the PDFImage and a lot // If there is no imageMask, create the PDFImage and a lot
// of image processing can be done here. // of image processing can be done here.
let objId = `img_${this.idFactory.createObjId()}`; let objId = `img_${this.idFactory.createObjId()}`,
cacheGlobally = false;
if (this.parsingType3Font) { if (this.parsingType3Font) {
assert( assert(
@ -542,6 +546,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
); );
objId = `${this.idFactory.getDocId()}_type3res_${objId}`; objId = `${this.idFactory.getDocId()}_type3res_${objId}`;
} else if (imageRef) {
cacheGlobally = this.globalImageCache.shouldCache(
imageRef,
this.pageIndex
);
if (cacheGlobally) {
// Ensure that the image is *completely* decoded on the worker-thread,
// in order to simplify the caching/rendering code on the main-thread.
nativeImageDecoderSupport = NativeImageDecoding.NONE;
objId = `${this.idFactory.getDocId()}_${objId}`;
}
} }
if ( if (
@ -566,7 +583,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
image.getIR(this.options.forceDataSchema), image.getIR(this.options.forceDataSchema),
]) ])
.then( .then(
function () { () => {
// Only add the dependency once we know that the native JPEG // Only add the dependency once we know that the native JPEG
// decoding succeeded, to ensure that rendering will always // decoding succeeded, to ensure that rendering will always
// complete. // complete.
@ -579,6 +596,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
fn: OPS.paintJpegXObject, fn: OPS.paintJpegXObject,
args, args,
}; };
if (imageRef) {
this.globalImageCache.addPageIndex(imageRef, this.pageIndex);
}
} }
}, },
reason => { reason => {
@ -639,6 +660,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
[objId, "FontType3Res", imgData], [objId, "FontType3Res", imgData],
[imgData.data.buffer] [imgData.data.buffer]
); );
} else if (cacheGlobally) {
this.handler.send(
"commonobj",
[objId, "Image", imgData],
[imgData.data.buffer]
);
return undefined;
} }
this.handler.send( this.handler.send(
"obj", "obj",
@ -656,6 +684,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
"FontType3Res", "FontType3Res",
null, null,
]); ]);
} else if (cacheGlobally) {
this.handler.send("commonobj", [objId, "Image", null]);
return undefined;
} }
this.handler.send("obj", [objId, this.pageIndex, "Image", null]); this.handler.send("obj", [objId, this.pageIndex, "Image", null]);
return undefined; return undefined;
@ -674,6 +705,18 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
fn: OPS.paintImageXObject, fn: OPS.paintImageXObject,
args, args,
}; };
if (imageRef) {
this.globalImageCache.addPageIndex(imageRef, this.pageIndex);
if (cacheGlobally) {
this.globalImageCache.setData(imageRef, {
objId,
fn: OPS.paintImageXObject,
args,
});
}
}
} }
return undefined; return undefined;
}, },
@ -1322,7 +1365,23 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
); );
} }
const xobj = xobjs.get(name); let xobj = xobjs.getRaw(name);
if (xobj instanceof Ref) {
const globalImage = self.globalImageCache.getData(
xobj,
self.pageIndex
);
if (globalImage) {
operatorList.addDependency(globalImage.objId);
operatorList.addOp(globalImage.fn, globalImage.args);
resolveXObject();
return;
}
xobj = xref.fetch(xobj);
}
if (!xobj) { if (!xobj) {
operatorList.addOp(fn, args); operatorList.addOp(fn, args);
resolveXObject(); resolveXObject();

View File

@ -14,8 +14,10 @@
*/ */
/* eslint no-var: error */ /* eslint no-var: error */
import { assert, info, shadow } from "../shared/util.js";
import { ColorSpace } from "./colorspace.js"; import { ColorSpace } from "./colorspace.js";
import { JpegStream } from "./jpeg_stream.js"; import { JpegStream } from "./jpeg_stream.js";
import { RefSetCache } from "./primitives.js";
import { Stream } from "./stream.js"; import { Stream } from "./stream.js";
class NativeImageDecoder { class NativeImageDecoder {
@ -111,4 +113,96 @@ class NativeImageDecoder {
} }
} }
export { NativeImageDecoder }; class GlobalImageCache {
static get NUM_PAGES_THRESHOLD() {
return shadow(this, "NUM_PAGES_THRESHOLD", 2);
}
static get MAX_IMAGES_TO_CACHE() {
return shadow(this, "MAX_IMAGES_TO_CACHE", 10);
}
constructor() {
if (
typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING")
) {
assert(
GlobalImageCache.NUM_PAGES_THRESHOLD > 1,
"GlobalImageCache - invalid NUM_PAGES_THRESHOLD constant."
);
}
this._refCache = new RefSetCache();
this._imageCache = new RefSetCache();
}
shouldCache(ref, pageIndex) {
const pageIndexSet = this._refCache.get(ref);
const numPages = pageIndexSet
? pageIndexSet.size + (pageIndexSet.has(pageIndex) ? 0 : 1)
: 1;
if (numPages < GlobalImageCache.NUM_PAGES_THRESHOLD) {
return false;
}
if (
!this._imageCache.has(ref) &&
this._imageCache.size >= GlobalImageCache.MAX_IMAGES_TO_CACHE
) {
return false;
}
return true;
}
addPageIndex(ref, pageIndex) {
let pageIndexSet = this._refCache.get(ref);
if (!pageIndexSet) {
pageIndexSet = new Set();
this._refCache.put(ref, pageIndexSet);
}
pageIndexSet.add(pageIndex);
}
getData(ref, pageIndex) {
if (!this._refCache.has(ref)) {
return null;
}
const pageIndexSet = this._refCache.get(ref);
if (pageIndexSet.size < GlobalImageCache.NUM_PAGES_THRESHOLD) {
return null;
}
if (!this._imageCache.has(ref)) {
return null;
}
// Ensure that we keep track of all pages containing the image reference.
pageIndexSet.add(pageIndex);
return this._imageCache.get(ref);
}
setData(ref, data) {
if (!this._refCache.has(ref)) {
throw new Error(
'GlobalImageCache.setData - expected "addPageIndex" to have been called.'
);
}
if (this._imageCache.has(ref)) {
return;
}
if (this._imageCache.size >= GlobalImageCache.MAX_IMAGES_TO_CACHE) {
info(
"GlobalImageCache.setData - ignoring image above MAX_IMAGES_TO_CACHE."
);
return;
}
this._imageCache.put(ref, data);
}
clear() {
this._refCache.clear();
this._imageCache.clear();
}
}
export { NativeImageDecoder, GlobalImageCache };

View File

@ -54,6 +54,7 @@ import {
} from "./core_utils.js"; } from "./core_utils.js";
import { CipherTransformFactory } from "./crypto.js"; import { CipherTransformFactory } from "./crypto.js";
import { ColorSpace } from "./colorspace.js"; import { ColorSpace } from "./colorspace.js";
import { GlobalImageCache } from "./image_utils.js";
function fetchDestination(dest) { function fetchDestination(dest) {
return isDict(dest) ? dest.get("D") : dest; return isDict(dest) ? dest.get("D") : dest;
@ -71,6 +72,7 @@ class Catalog {
this.fontCache = new RefSetCache(); this.fontCache = new RefSetCache();
this.builtInCMapCache = new Map(); this.builtInCMapCache = new Map();
this.globalImageCache = new GlobalImageCache();
this.pageKidsCountCache = new RefSetCache(); this.pageKidsCountCache = new RefSetCache();
} }
@ -716,6 +718,7 @@ class Catalog {
cleanup() { cleanup() {
clearPrimitiveCaches(); clearPrimitiveCaches();
this.globalImageCache.clear();
this.pageKidsCountCache.clear(); this.pageKidsCountCache.clear();
const promises = []; const promises = [];

View File

@ -251,6 +251,10 @@ var RefSetCache = (function RefSetCacheClosure() {
} }
RefSetCache.prototype = { RefSetCache.prototype = {
get size() {
return Object.keys(this.dict).length;
},
get: function RefSetCache_get(ref) { get: function RefSetCache_get(ref) {
return this.dict[ref.toString()]; return this.dict[ref.toString()];
}, },

View File

@ -2288,6 +2288,7 @@ class WorkerTransport {
break; break;
case "FontPath": case "FontPath":
case "FontType3Res": case "FontType3Res":
case "Image":
this.commonObjs.resolve(id, exportedData); this.commonObjs.resolve(id, exportedData);
break; break;
default: default:

View File

@ -2114,7 +2114,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
}, },
paintJpegXObject: function CanvasGraphics_paintJpegXObject(objId, w, h) { paintJpegXObject: function CanvasGraphics_paintJpegXObject(objId, w, h) {
const domImage = this.processingType3 const domImage = objId.startsWith("g_")
? this.commonObjs.get(objId) ? this.commonObjs.get(objId)
: this.objs.get(objId); : this.objs.get(objId);
if (!domImage) { if (!domImage) {
@ -2277,7 +2277,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
}, },
paintImageXObject: function CanvasGraphics_paintImageXObject(objId) { paintImageXObject: function CanvasGraphics_paintImageXObject(objId) {
const imgData = this.processingType3 const imgData = objId.startsWith("g_")
? this.commonObjs.get(objId) ? this.commonObjs.get(objId)
: this.objs.get(objId); : this.objs.get(objId);
if (!imgData) { if (!imgData) {
@ -2294,7 +2294,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
scaleY, scaleY,
positions positions
) { ) {
const imgData = this.processingType3 const imgData = objId.startsWith("g_")
? this.commonObjs.get(objId) ? this.commonObjs.get(objId)
: this.objs.get(objId); : this.objs.get(objId);
if (!imgData) { if (!imgData) {

View File

@ -90,6 +90,7 @@
!issue11362.pdf !issue11362.pdf
!issue11578_reduced.pdf !issue11578_reduced.pdf
!issue11651.pdf !issue11651.pdf
!issue11878.pdf
!bad-PageLabels.pdf !bad-PageLabels.pdf
!decodeACSuccessive.pdf !decodeACSuccessive.pdf
!filled-background.pdf !filled-background.pdf

BIN
test/pdfs/issue11878.pdf Normal file

Binary file not shown.

View File

@ -23,6 +23,7 @@ import {
import { import {
createPromiseCapability, createPromiseCapability,
FontType, FontType,
ImageKind,
InvalidPDFException, InvalidPDFException,
MissingPDFException, MissingPDFException,
OPS, OPS,
@ -44,6 +45,7 @@ import {
PDFWorker, PDFWorker,
} from "../../src/display/api.js"; } from "../../src/display/api.js";
import { AutoPrintRegExp } from "../../web/ui_utils.js"; import { AutoPrintRegExp } from "../../web/ui_utils.js";
import { GlobalImageCache } from "../../src/core/image_utils.js";
import { GlobalWorkerOptions } from "../../src/display/worker_options.js"; import { GlobalWorkerOptions } from "../../src/display/worker_options.js";
import { isNodeJS } from "../../src/shared/is_node.js"; import { isNodeJS } from "../../src/shared/is_node.js";
import { Metadata } from "../../src/display/metadata.js"; import { Metadata } from "../../src/display/metadata.js";
@ -1928,6 +1930,80 @@ describe("api", function () {
}) })
.catch(done.fail); .catch(done.fail);
}); });
it("caches image resources at the document/page level as expected (issue 11878)", async function (done) {
const { NUM_PAGES_THRESHOLD } = GlobalImageCache,
EXPECTED_WIDTH = 2550,
EXPECTED_HEIGHT = 3300;
const loadingTask = getDocument(buildGetDocumentParams("issue11878.pdf"));
let firstImgData = null;
try {
const pdfDoc = await loadingTask.promise;
for (let i = 1; i <= pdfDoc.numPages; i++) {
const pdfPage = await pdfDoc.getPage(i);
const opList = await pdfPage.getOperatorList();
const { commonObjs, objs } = pdfPage;
const imgIndex = opList.fnArray.indexOf(OPS.paintImageXObject);
const [objId, width, height] = opList.argsArray[imgIndex];
if (i < NUM_PAGES_THRESHOLD) {
expect(objId).toEqual(`img_p${i - 1}_1`);
expect(objs.has(objId)).toEqual(true);
expect(commonObjs.has(objId)).toEqual(false);
} else {
expect(objId).toEqual(
`g_${loadingTask.docId}_img_p${NUM_PAGES_THRESHOLD - 1}_1`
);
expect(objs.has(objId)).toEqual(false);
expect(commonObjs.has(objId)).toEqual(true);
}
expect(width).toEqual(EXPECTED_WIDTH);
expect(height).toEqual(EXPECTED_HEIGHT);
// Ensure that the actual image data is identical for all pages.
if (i === 1) {
firstImgData = objs.get(objId);
expect(firstImgData.width).toEqual(EXPECTED_WIDTH);
expect(firstImgData.height).toEqual(EXPECTED_HEIGHT);
expect(firstImgData.kind).toEqual(ImageKind.RGB_24BPP);
expect(firstImgData.data instanceof Uint8ClampedArray).toEqual(
true
);
expect(firstImgData.data.length).toEqual(25245000);
} else {
const objsPool = i >= NUM_PAGES_THRESHOLD ? commonObjs : objs;
const currentImgData = objsPool.get(objId);
expect(currentImgData.width).toEqual(firstImgData.width);
expect(currentImgData.height).toEqual(firstImgData.height);
expect(currentImgData.kind).toEqual(firstImgData.kind);
expect(currentImgData.data instanceof Uint8ClampedArray).toEqual(
true
);
expect(
currentImgData.data.every((value, index) => {
return value === firstImgData.data[index];
})
).toEqual(true);
}
}
await loadingTask.destroy();
firstImgData = null;
done();
} catch (ex) {
done.fail(ex);
}
});
}); });
describe("Multiple `getDocument` instances", function () { describe("Multiple `getDocument` instances", function () {
// Regression test for https://github.com/mozilla/pdf.js/issues/6205 // Regression test for https://github.com/mozilla/pdf.js/issues/6205