6da0944fc7
*Please note:* These changes will primarily benefit longer documents, somewhat at the expense of e.g. one-page documents.
The existing `PDFDocumentProxy.getStats` function, which in the default viewer is called for each rendered page, requires a round-trip to the worker-thread in order to obtain the current document stats. In the default viewer, we currently make one such API-call for *every rendered* page.
This patch proposes replacing that method with a *synchronous* `PDFDocumentProxy.stats` getter instead, combined with re-factoring the worker-thread code by adding a `DocStats`-class to track Stream/Font-types and *only send* them to the main-thread *the first time* that a type is encountered.
Note that in practice most PDF documents only use a fairly limited number of Stream/Font-types, which means that in longer documents most of the `PDFDocumentProxy.getStats`-calls will return the same data.[1]
This re-factoring will obviously benefit longer document the most[2], and could actually be seen as a regression for one-page documents, since in practice there'll usually be a couple of "DocStats" messages sent during the parsing of the first page. However, if the user zooms/rotates the document (which causes re-rendering), note that even a one-page document would start to benefit from these changes.
Another benefit of having the data available/cached in the API is that unless the document stats change during parsing, repeated `PDFDocumentProxy.stats`-calls will return *the same identical* object.
This is something that we can easily take advantage of in the default viewer, by now *only* reporting "documentStats" telemetry[3] when the data actually have changed rather than once per rendered page (again beneficial in longer documents).
---
[1] Furthermore, the maximium number of `StreamType`/`FontType` are `10` respectively `12`, which means that regardless of the complexity and page count in a PDF document there'll never be more than twenty-two "DocStats" messages sent; see 41ac3f0c07/src/shared/util.js (L206-L232)
[2] One example is the `pdf.pdf` document in the test-suite, where rendering all of its 1310 pages only result in a total of seven "DocStats" messages being sent from the worker-thread.
[3] Reporting telemetry, in Firefox, includes using `JSON.stringify` on the data and then sending an event to the `PdfStreamConverter.jsm`-code.
In that code the event is handled and `JSON.parse` is used to retrieve the data, and in the "documentStats"-case we'll then iterate through the data to avoid double-reporting telemetry; see https://searchfox.org/mozilla-central/rev/8f4c180b87e52f3345ef8a3432d6e54bd1eb18dc/toolkit/components/pdfjs/content/PdfStreamConverter.jsm#515-549
156 lines
3.8 KiB
JavaScript
156 lines
3.8 KiB
JavaScript
/* Copyright 2017 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
import { isRef, Ref } from "../../src/core/primitives.js";
|
|
import { Page, PDFDocument } from "../../src/core/document.js";
|
|
import { assert } from "../../src/shared/util.js";
|
|
import { DocStats } from "../../src/core/core_utils.js";
|
|
import { isNodeJS } from "../../src/shared/is_node.js";
|
|
import { StringStream } from "../../src/core/stream.js";
|
|
|
|
const TEST_PDFS_PATH = isNodeJS ? "./test/pdfs/" : "../pdfs/";
|
|
|
|
const CMAP_PARAMS = {
|
|
cMapUrl: isNodeJS ? "./external/bcmaps/" : "../../external/bcmaps/",
|
|
cMapPacked: true,
|
|
};
|
|
|
|
const STANDARD_FONT_DATA_URL = isNodeJS
|
|
? "./external/standard_fonts/"
|
|
: "../../external/standard_fonts/";
|
|
|
|
class DOMFileReaderFactory {
|
|
static async fetch(params) {
|
|
const response = await fetch(params.path);
|
|
if (!response.ok) {
|
|
throw new Error(response.statusText);
|
|
}
|
|
return new Uint8Array(await response.arrayBuffer());
|
|
}
|
|
}
|
|
|
|
class NodeFileReaderFactory {
|
|
static async fetch(params) {
|
|
const fs = require("fs");
|
|
|
|
return new Promise((resolve, reject) => {
|
|
fs.readFile(params.path, (error, data) => {
|
|
if (error || !data) {
|
|
reject(error || new Error(`Empty file for: ${params.path}`));
|
|
return;
|
|
}
|
|
resolve(new Uint8Array(data));
|
|
});
|
|
});
|
|
}
|
|
}
|
|
|
|
const DefaultFileReaderFactory = isNodeJS
|
|
? NodeFileReaderFactory
|
|
: DOMFileReaderFactory;
|
|
|
|
function buildGetDocumentParams(filename, options) {
|
|
const params = Object.create(null);
|
|
params.url = isNodeJS
|
|
? TEST_PDFS_PATH + filename
|
|
: new URL(TEST_PDFS_PATH + filename, window.location).href;
|
|
params.standardFontDataUrl = STANDARD_FONT_DATA_URL;
|
|
|
|
for (const option in options) {
|
|
params[option] = options[option];
|
|
}
|
|
return params;
|
|
}
|
|
|
|
class XRefMock {
|
|
constructor(array) {
|
|
this._map = Object.create(null);
|
|
this.stats = new DocStats({ send: () => {} });
|
|
this._newRefNum = null;
|
|
|
|
for (const key in array) {
|
|
const obj = array[key];
|
|
this._map[obj.ref.toString()] = obj.data;
|
|
}
|
|
}
|
|
|
|
getNewRef() {
|
|
if (this._newRefNum === null) {
|
|
this._newRefNum = Object.keys(this._map).length;
|
|
}
|
|
return Ref.get(this._newRefNum++, 0);
|
|
}
|
|
|
|
resetNewRef() {
|
|
this.newRef = null;
|
|
}
|
|
|
|
fetch(ref) {
|
|
return this._map[ref.toString()];
|
|
}
|
|
|
|
async fetchAsync(ref) {
|
|
return this.fetch(ref);
|
|
}
|
|
|
|
fetchIfRef(obj) {
|
|
if (!isRef(obj)) {
|
|
return obj;
|
|
}
|
|
return this.fetch(obj);
|
|
}
|
|
|
|
async fetchIfRefAsync(obj) {
|
|
return this.fetchIfRef(obj);
|
|
}
|
|
}
|
|
|
|
function createIdFactory(pageIndex) {
|
|
const pdfManager = {
|
|
get docId() {
|
|
return "d0";
|
|
},
|
|
};
|
|
const stream = new StringStream("Dummy_PDF_data");
|
|
const pdfDocument = new PDFDocument(pdfManager, stream);
|
|
|
|
const page = new Page({
|
|
pdfManager: pdfDocument.pdfManager,
|
|
xref: pdfDocument.xref,
|
|
pageIndex,
|
|
globalIdFactory: pdfDocument._globalIdFactory,
|
|
});
|
|
return page._localIdFactory;
|
|
}
|
|
|
|
function isEmptyObj(obj) {
|
|
assert(
|
|
typeof obj === "object" && obj !== null,
|
|
"isEmptyObj - invalid argument."
|
|
);
|
|
return Object.keys(obj).length === 0;
|
|
}
|
|
|
|
export {
|
|
buildGetDocumentParams,
|
|
CMAP_PARAMS,
|
|
createIdFactory,
|
|
DefaultFileReaderFactory,
|
|
isEmptyObj,
|
|
STANDARD_FONT_DATA_URL,
|
|
TEST_PDFS_PATH,
|
|
XRefMock,
|
|
};
|