pdf.js/test/unit/test_utils.js
Jonas Jenwald 6da0944fc7 [api-minor] Replace PDFDocumentProxy.getStats with a synchronous PDFDocumentProxy.stats getter
*Please note:* These changes will primarily benefit longer documents, somewhat at the expense of e.g. one-page documents.

The existing `PDFDocumentProxy.getStats` function, which in the default viewer is called for each rendered page, requires a round-trip to the worker-thread in order to obtain the current document stats. In the default viewer, we currently make one such API-call for *every rendered* page.
This patch proposes replacing that method with a *synchronous* `PDFDocumentProxy.stats` getter instead, combined with re-factoring the worker-thread code by adding a `DocStats`-class to track Stream/Font-types and *only send* them to the main-thread *the first time* that a type is encountered.

Note that in practice most PDF documents only use a fairly limited number of Stream/Font-types, which means that in longer documents most of the `PDFDocumentProxy.getStats`-calls will return the same data.[1]
This re-factoring will obviously benefit longer document the most[2], and could actually be seen as a regression for one-page documents, since in practice there'll usually be a couple of "DocStats" messages sent during the parsing of the first page. However, if the user zooms/rotates the document (which causes re-rendering), note that even a one-page document would start to benefit from these changes.

Another benefit of having the data available/cached in the API is that unless the document stats change during parsing, repeated `PDFDocumentProxy.stats`-calls will return *the same identical* object.
This is something that we can easily take advantage of in the default viewer, by now *only* reporting "documentStats" telemetry[3] when the data actually have changed rather than once per rendered page (again beneficial in longer documents).

---
[1] Furthermore, the maximium number of `StreamType`/`FontType` are `10` respectively `12`, which means that regardless of the complexity and page count in a PDF document there'll never be more than twenty-two "DocStats" messages sent; see 41ac3f0c07/src/shared/util.js (L206-L232)

[2] One example is the `pdf.pdf` document in the test-suite, where rendering all of its 1310 pages only result in a total of seven "DocStats" messages being sent from the worker-thread.

[3] Reporting telemetry, in Firefox, includes using `JSON.stringify` on the data and then sending an event to the `PdfStreamConverter.jsm`-code.
In that code the event is handled and `JSON.parse` is used to retrieve the data, and in the "documentStats"-case we'll then iterate through the data to avoid double-reporting telemetry; see https://searchfox.org/mozilla-central/rev/8f4c180b87e52f3345ef8a3432d6e54bd1eb18dc/toolkit/components/pdfjs/content/PdfStreamConverter.jsm#515-549
2021-11-20 12:20:55 +01:00

156 lines
3.8 KiB
JavaScript

/* Copyright 2017 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { isRef, Ref } from "../../src/core/primitives.js";
import { Page, PDFDocument } from "../../src/core/document.js";
import { assert } from "../../src/shared/util.js";
import { DocStats } from "../../src/core/core_utils.js";
import { isNodeJS } from "../../src/shared/is_node.js";
import { StringStream } from "../../src/core/stream.js";
const TEST_PDFS_PATH = isNodeJS ? "./test/pdfs/" : "../pdfs/";
const CMAP_PARAMS = {
cMapUrl: isNodeJS ? "./external/bcmaps/" : "../../external/bcmaps/",
cMapPacked: true,
};
const STANDARD_FONT_DATA_URL = isNodeJS
? "./external/standard_fonts/"
: "../../external/standard_fonts/";
class DOMFileReaderFactory {
static async fetch(params) {
const response = await fetch(params.path);
if (!response.ok) {
throw new Error(response.statusText);
}
return new Uint8Array(await response.arrayBuffer());
}
}
class NodeFileReaderFactory {
static async fetch(params) {
const fs = require("fs");
return new Promise((resolve, reject) => {
fs.readFile(params.path, (error, data) => {
if (error || !data) {
reject(error || new Error(`Empty file for: ${params.path}`));
return;
}
resolve(new Uint8Array(data));
});
});
}
}
const DefaultFileReaderFactory = isNodeJS
? NodeFileReaderFactory
: DOMFileReaderFactory;
function buildGetDocumentParams(filename, options) {
const params = Object.create(null);
params.url = isNodeJS
? TEST_PDFS_PATH + filename
: new URL(TEST_PDFS_PATH + filename, window.location).href;
params.standardFontDataUrl = STANDARD_FONT_DATA_URL;
for (const option in options) {
params[option] = options[option];
}
return params;
}
class XRefMock {
constructor(array) {
this._map = Object.create(null);
this.stats = new DocStats({ send: () => {} });
this._newRefNum = null;
for (const key in array) {
const obj = array[key];
this._map[obj.ref.toString()] = obj.data;
}
}
getNewRef() {
if (this._newRefNum === null) {
this._newRefNum = Object.keys(this._map).length;
}
return Ref.get(this._newRefNum++, 0);
}
resetNewRef() {
this.newRef = null;
}
fetch(ref) {
return this._map[ref.toString()];
}
async fetchAsync(ref) {
return this.fetch(ref);
}
fetchIfRef(obj) {
if (!isRef(obj)) {
return obj;
}
return this.fetch(obj);
}
async fetchIfRefAsync(obj) {
return this.fetchIfRef(obj);
}
}
function createIdFactory(pageIndex) {
const pdfManager = {
get docId() {
return "d0";
},
};
const stream = new StringStream("Dummy_PDF_data");
const pdfDocument = new PDFDocument(pdfManager, stream);
const page = new Page({
pdfManager: pdfDocument.pdfManager,
xref: pdfDocument.xref,
pageIndex,
globalIdFactory: pdfDocument._globalIdFactory,
});
return page._localIdFactory;
}
function isEmptyObj(obj) {
assert(
typeof obj === "object" && obj !== null,
"isEmptyObj - invalid argument."
);
return Object.keys(obj).length === 0;
}
export {
buildGetDocumentParams,
CMAP_PARAMS,
createIdFactory,
DefaultFileReaderFactory,
isEmptyObj,
STANDARD_FONT_DATA_URL,
TEST_PDFS_PATH,
XRefMock,
};