Merge pull request #11912 from Snuffleupagus/GlobalImageCache

Attempt to cache repeated images at the document, rather than the page, level (issue 11878)
This commit is contained in:
Tim van der Meij 2020-05-21 23:54:28 +02:00 committed by GitHub
commit 4a3a24b002
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 251 additions and 8 deletions

View File

@ -74,6 +74,7 @@ class Page {
ref,
fontCache,
builtInCMapCache,
globalImageCache,
pdfFunctionFactory,
}) {
this.pdfManager = pdfManager;
@ -83,6 +84,7 @@ class Page {
this.ref = ref;
this.fontCache = fontCache;
this.builtInCMapCache = builtInCMapCache;
this.globalImageCache = globalImageCache;
this.pdfFunctionFactory = pdfFunctionFactory;
this.evaluatorOptions = pdfManager.evaluatorOptions;
this.resourcesPromise = null;
@ -261,6 +263,7 @@ class Page {
idFactory: this.idFactory,
fontCache: this.fontCache,
builtInCMapCache: this.builtInCMapCache,
globalImageCache: this.globalImageCache,
options: this.evaluatorOptions,
pdfFunctionFactory: this.pdfFunctionFactory,
});
@ -354,6 +357,7 @@ class Page {
idFactory: this.idFactory,
fontCache: this.fontCache,
builtInCMapCache: this.builtInCMapCache,
globalImageCache: this.globalImageCache,
options: this.evaluatorOptions,
pdfFunctionFactory: this.pdfFunctionFactory,
});
@ -816,6 +820,7 @@ class PDFDocument {
ref,
fontCache: catalog.fontCache,
builtInCMapCache: catalog.builtInCMapCache,
globalImageCache: catalog.globalImageCache,
pdfFunctionFactory: this.pdfFunctionFactory,
});
}));

View File

@ -105,6 +105,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
idFactory,
fontCache,
builtInCMapCache,
globalImageCache,
options = null,
pdfFunctionFactory,
}) {
@ -114,6 +115,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
this.idFactory = idFactory;
this.fontCache = fontCache;
this.builtInCMapCache = builtInCMapCache;
this.globalImageCache = globalImageCache;
this.options = options || DefaultPartialEvaluatorOptions;
this.pdfFunctionFactory = pdfFunctionFactory;
this.parsingType3Font = false;
@ -451,6 +453,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
forceDisableNativeImageDecoder = false,
}) {
var dict = image.dict;
const imageRef = dict.objId;
var w = dict.get("Width", "W");
var h = dict.get("Height", "H");
@ -528,12 +531,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return undefined;
}
const nativeImageDecoderSupport = forceDisableNativeImageDecoder
let nativeImageDecoderSupport = forceDisableNativeImageDecoder
? NativeImageDecoding.NONE
: this.options.nativeImageDecoderSupport;
// If there is no imageMask, create the PDFImage and a lot
// of image processing can be done here.
let objId = `img_${this.idFactory.createObjId()}`;
let objId = `img_${this.idFactory.createObjId()}`,
cacheGlobally = false;
if (this.parsingType3Font) {
assert(
@ -542,6 +546,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
);
objId = `${this.idFactory.getDocId()}_type3res_${objId}`;
} else if (imageRef) {
cacheGlobally = this.globalImageCache.shouldCache(
imageRef,
this.pageIndex
);
if (cacheGlobally) {
// Ensure that the image is *completely* decoded on the worker-thread,
// in order to simplify the caching/rendering code on the main-thread.
nativeImageDecoderSupport = NativeImageDecoding.NONE;
objId = `${this.idFactory.getDocId()}_${objId}`;
}
}
if (
@ -566,7 +583,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
image.getIR(this.options.forceDataSchema),
])
.then(
function () {
() => {
// Only add the dependency once we know that the native JPEG
// decoding succeeded, to ensure that rendering will always
// complete.
@ -579,6 +596,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
fn: OPS.paintJpegXObject,
args,
};
if (imageRef) {
this.globalImageCache.addPageIndex(imageRef, this.pageIndex);
}
}
},
reason => {
@ -639,6 +660,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
[objId, "FontType3Res", imgData],
[imgData.data.buffer]
);
} else if (cacheGlobally) {
this.handler.send(
"commonobj",
[objId, "Image", imgData],
[imgData.data.buffer]
);
return undefined;
}
this.handler.send(
"obj",
@ -656,6 +684,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
"FontType3Res",
null,
]);
} else if (cacheGlobally) {
this.handler.send("commonobj", [objId, "Image", null]);
return undefined;
}
this.handler.send("obj", [objId, this.pageIndex, "Image", null]);
return undefined;
@ -674,6 +705,18 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
fn: OPS.paintImageXObject,
args,
};
if (imageRef) {
this.globalImageCache.addPageIndex(imageRef, this.pageIndex);
if (cacheGlobally) {
this.globalImageCache.setData(imageRef, {
objId,
fn: OPS.paintImageXObject,
args,
});
}
}
}
return undefined;
},
@ -1322,7 +1365,23 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
);
}
const xobj = xobjs.get(name);
let xobj = xobjs.getRaw(name);
if (xobj instanceof Ref) {
const globalImage = self.globalImageCache.getData(
xobj,
self.pageIndex
);
if (globalImage) {
operatorList.addDependency(globalImage.objId);
operatorList.addOp(globalImage.fn, globalImage.args);
resolveXObject();
return;
}
xobj = xref.fetch(xobj);
}
if (!xobj) {
operatorList.addOp(fn, args);
resolveXObject();

View File

@ -14,8 +14,10 @@
*/
/* eslint no-var: error */
import { assert, info, shadow } from "../shared/util.js";
import { ColorSpace } from "./colorspace.js";
import { JpegStream } from "./jpeg_stream.js";
import { RefSetCache } from "./primitives.js";
import { Stream } from "./stream.js";
class NativeImageDecoder {
@ -111,4 +113,96 @@ class NativeImageDecoder {
}
}
export { NativeImageDecoder };
class GlobalImageCache {
static get NUM_PAGES_THRESHOLD() {
return shadow(this, "NUM_PAGES_THRESHOLD", 2);
}
static get MAX_IMAGES_TO_CACHE() {
return shadow(this, "MAX_IMAGES_TO_CACHE", 10);
}
constructor() {
if (
typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING")
) {
assert(
GlobalImageCache.NUM_PAGES_THRESHOLD > 1,
"GlobalImageCache - invalid NUM_PAGES_THRESHOLD constant."
);
}
this._refCache = new RefSetCache();
this._imageCache = new RefSetCache();
}
shouldCache(ref, pageIndex) {
const pageIndexSet = this._refCache.get(ref);
const numPages = pageIndexSet
? pageIndexSet.size + (pageIndexSet.has(pageIndex) ? 0 : 1)
: 1;
if (numPages < GlobalImageCache.NUM_PAGES_THRESHOLD) {
return false;
}
if (
!this._imageCache.has(ref) &&
this._imageCache.size >= GlobalImageCache.MAX_IMAGES_TO_CACHE
) {
return false;
}
return true;
}
addPageIndex(ref, pageIndex) {
let pageIndexSet = this._refCache.get(ref);
if (!pageIndexSet) {
pageIndexSet = new Set();
this._refCache.put(ref, pageIndexSet);
}
pageIndexSet.add(pageIndex);
}
getData(ref, pageIndex) {
if (!this._refCache.has(ref)) {
return null;
}
const pageIndexSet = this._refCache.get(ref);
if (pageIndexSet.size < GlobalImageCache.NUM_PAGES_THRESHOLD) {
return null;
}
if (!this._imageCache.has(ref)) {
return null;
}
// Ensure that we keep track of all pages containing the image reference.
pageIndexSet.add(pageIndex);
return this._imageCache.get(ref);
}
setData(ref, data) {
if (!this._refCache.has(ref)) {
throw new Error(
'GlobalImageCache.setData - expected "addPageIndex" to have been called.'
);
}
if (this._imageCache.has(ref)) {
return;
}
if (this._imageCache.size >= GlobalImageCache.MAX_IMAGES_TO_CACHE) {
info(
"GlobalImageCache.setData - ignoring image above MAX_IMAGES_TO_CACHE."
);
return;
}
this._imageCache.put(ref, data);
}
clear() {
this._refCache.clear();
this._imageCache.clear();
}
}
export { NativeImageDecoder, GlobalImageCache };

View File

@ -54,6 +54,7 @@ import {
} from "./core_utils.js";
import { CipherTransformFactory } from "./crypto.js";
import { ColorSpace } from "./colorspace.js";
import { GlobalImageCache } from "./image_utils.js";
function fetchDestination(dest) {
return isDict(dest) ? dest.get("D") : dest;
@ -71,6 +72,7 @@ class Catalog {
this.fontCache = new RefSetCache();
this.builtInCMapCache = new Map();
this.globalImageCache = new GlobalImageCache();
this.pageKidsCountCache = new RefSetCache();
}
@ -716,6 +718,7 @@ class Catalog {
cleanup() {
clearPrimitiveCaches();
this.globalImageCache.clear();
this.pageKidsCountCache.clear();
const promises = [];

View File

@ -251,6 +251,10 @@ var RefSetCache = (function RefSetCacheClosure() {
}
RefSetCache.prototype = {
get size() {
return Object.keys(this.dict).length;
},
get: function RefSetCache_get(ref) {
return this.dict[ref.toString()];
},

View File

@ -2288,6 +2288,7 @@ class WorkerTransport {
break;
case "FontPath":
case "FontType3Res":
case "Image":
this.commonObjs.resolve(id, exportedData);
break;
default:

View File

@ -2114,7 +2114,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
},
paintJpegXObject: function CanvasGraphics_paintJpegXObject(objId, w, h) {
const domImage = this.processingType3
const domImage = objId.startsWith("g_")
? this.commonObjs.get(objId)
: this.objs.get(objId);
if (!domImage) {
@ -2277,7 +2277,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
},
paintImageXObject: function CanvasGraphics_paintImageXObject(objId) {
const imgData = this.processingType3
const imgData = objId.startsWith("g_")
? this.commonObjs.get(objId)
: this.objs.get(objId);
if (!imgData) {
@ -2294,7 +2294,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
scaleY,
positions
) {
const imgData = this.processingType3
const imgData = objId.startsWith("g_")
? this.commonObjs.get(objId)
: this.objs.get(objId);
if (!imgData) {

View File

@ -90,6 +90,7 @@
!issue11362.pdf
!issue11578_reduced.pdf
!issue11651.pdf
!issue11878.pdf
!bad-PageLabels.pdf
!decodeACSuccessive.pdf
!filled-background.pdf

BIN
test/pdfs/issue11878.pdf Normal file

Binary file not shown.

View File

@ -23,6 +23,7 @@ import {
import {
createPromiseCapability,
FontType,
ImageKind,
InvalidPDFException,
MissingPDFException,
OPS,
@ -44,6 +45,7 @@ import {
PDFWorker,
} from "../../src/display/api.js";
import { AutoPrintRegExp } from "../../web/ui_utils.js";
import { GlobalImageCache } from "../../src/core/image_utils.js";
import { GlobalWorkerOptions } from "../../src/display/worker_options.js";
import { isNodeJS } from "../../src/shared/is_node.js";
import { Metadata } from "../../src/display/metadata.js";
@ -1928,6 +1930,80 @@ describe("api", function () {
})
.catch(done.fail);
});
it("caches image resources at the document/page level as expected (issue 11878)", async function (done) {
const { NUM_PAGES_THRESHOLD } = GlobalImageCache,
EXPECTED_WIDTH = 2550,
EXPECTED_HEIGHT = 3300;
const loadingTask = getDocument(buildGetDocumentParams("issue11878.pdf"));
let firstImgData = null;
try {
const pdfDoc = await loadingTask.promise;
for (let i = 1; i <= pdfDoc.numPages; i++) {
const pdfPage = await pdfDoc.getPage(i);
const opList = await pdfPage.getOperatorList();
const { commonObjs, objs } = pdfPage;
const imgIndex = opList.fnArray.indexOf(OPS.paintImageXObject);
const [objId, width, height] = opList.argsArray[imgIndex];
if (i < NUM_PAGES_THRESHOLD) {
expect(objId).toEqual(`img_p${i - 1}_1`);
expect(objs.has(objId)).toEqual(true);
expect(commonObjs.has(objId)).toEqual(false);
} else {
expect(objId).toEqual(
`g_${loadingTask.docId}_img_p${NUM_PAGES_THRESHOLD - 1}_1`
);
expect(objs.has(objId)).toEqual(false);
expect(commonObjs.has(objId)).toEqual(true);
}
expect(width).toEqual(EXPECTED_WIDTH);
expect(height).toEqual(EXPECTED_HEIGHT);
// Ensure that the actual image data is identical for all pages.
if (i === 1) {
firstImgData = objs.get(objId);
expect(firstImgData.width).toEqual(EXPECTED_WIDTH);
expect(firstImgData.height).toEqual(EXPECTED_HEIGHT);
expect(firstImgData.kind).toEqual(ImageKind.RGB_24BPP);
expect(firstImgData.data instanceof Uint8ClampedArray).toEqual(
true
);
expect(firstImgData.data.length).toEqual(25245000);
} else {
const objsPool = i >= NUM_PAGES_THRESHOLD ? commonObjs : objs;
const currentImgData = objsPool.get(objId);
expect(currentImgData.width).toEqual(firstImgData.width);
expect(currentImgData.height).toEqual(firstImgData.height);
expect(currentImgData.kind).toEqual(firstImgData.kind);
expect(currentImgData.data instanceof Uint8ClampedArray).toEqual(
true
);
expect(
currentImgData.data.every((value, index) => {
return value === firstImgData.data[index];
})
).toEqual(true);
}
}
await loadingTask.destroy();
firstImgData = null;
done();
} catch (ex) {
done.fail(ex);
}
});
});
describe("Multiple `getDocument` instances", function () {
// Regression test for https://github.com/mozilla/pdf.js/issues/6205