diff --git a/src/core/core_utils.js b/src/core/core_utils.js index 2e9b19e3f..bea871789 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -16,7 +16,9 @@ import { assert, BaseException, + FontType, objectSize, + StreamType, stringToPDFString, warn, } from "../shared/util.js"; @@ -76,6 +78,55 @@ class XRefParseException extends BaseException { } } +class DocStats { + constructor(handler) { + this._handler = handler; + + this._streamTypes = new Set(); + this._fontTypes = new Set(); + } + + _send() { + const streamTypes = Object.create(null), + fontTypes = Object.create(null); + for (const type of this._streamTypes) { + streamTypes[type] = true; + } + for (const type of this._fontTypes) { + fontTypes[type] = true; + } + this._handler.send("DocStats", { streamTypes, fontTypes }); + } + + addStreamType(type) { + if ( + typeof PDFJSDev === "undefined" || + PDFJSDev.test("!PRODUCTION || TESTING") + ) { + assert(StreamType[type] === type, 'addStreamType: Invalid "type" value.'); + } + if (this._streamTypes.has(type)) { + return; + } + this._streamTypes.add(type); + this._send(); + } + + addFontType(type) { + if ( + typeof PDFJSDev === "undefined" || + PDFJSDev.test("!PRODUCTION || TESTING") + ) { + assert(FontType[type] === type, 'addFontType: Invalid "type" value.'); + } + if (this._fontTypes.has(type)) { + return; + } + this._fontTypes.add(type); + this._send(); + } +} + /** * Get the value of an inheritable property. * @@ -481,6 +532,7 @@ function recoverJsURL(str) { export { collectActions, + DocStats, encodeToXmlString, escapePDFName, getArrayLookupTableFactory, diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 21ddefb58..4382b2215 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -1248,8 +1248,7 @@ class PartialEvaluator { this.translateFont(preEvaluatedFont) .then(translatedFont => { if (translatedFont.fontType !== undefined) { - const xrefFontStats = xref.stats.fontTypes; - xrefFontStats[translatedFont.fontType] = true; + xref.stats.addFontType(translatedFont.fontType); } fontCapability.resolve( @@ -1277,8 +1276,9 @@ class PartialEvaluator { preEvaluatedFont.type, subtype && subtype.name ); - const xrefFontStats = xref.stats.fontTypes; - xrefFontStats[fontType] = true; + if (fontType !== undefined) { + xref.stats.addFontType(fontType); + } } catch (ex) {} fontCapability.resolve( diff --git a/src/core/parser.js b/src/core/parser.js index 12eb59eb6..4d9a4267c 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -741,13 +741,13 @@ class Parser { warn(`Empty "${name}" stream.`); return new NullStream(); } + const xrefStats = this.xref.stats; try { - const xrefStreamStats = this.xref.stats.streamTypes; switch (name) { case "Fl": case "FlateDecode": - xrefStreamStats[StreamType.FLATE] = true; + xrefStats.addStreamType(StreamType.FLATE); if (params) { return new PredictorStream( new FlateStream(stream, maybeLength), @@ -758,7 +758,7 @@ class Parser { return new FlateStream(stream, maybeLength); case "LZW": case "LZWDecode": - xrefStreamStats[StreamType.LZW] = true; + xrefStats.addStreamType(StreamType.LZW); let earlyChange = 1; if (params) { if (params.has("EarlyChange")) { @@ -773,30 +773,30 @@ class Parser { return new LZWStream(stream, maybeLength, earlyChange); case "DCT": case "DCTDecode": - xrefStreamStats[StreamType.DCT] = true; + xrefStats.addStreamType(StreamType.DCT); return new JpegStream(stream, maybeLength, params); case "JPX": case "JPXDecode": - xrefStreamStats[StreamType.JPX] = true; + xrefStats.addStreamType(StreamType.JPX); return new JpxStream(stream, maybeLength, params); case "A85": case "ASCII85Decode": - xrefStreamStats[StreamType.A85] = true; + xrefStats.addStreamType(StreamType.A85); return new Ascii85Stream(stream, maybeLength); case "AHx": case "ASCIIHexDecode": - xrefStreamStats[StreamType.AHX] = true; + xrefStats.addStreamType(StreamType.AHX); return new AsciiHexStream(stream, maybeLength); case "CCF": case "CCITTFaxDecode": - xrefStreamStats[StreamType.CCF] = true; + xrefStats.addStreamType(StreamType.CCF); return new CCITTFaxStream(stream, maybeLength, params); case "RL": case "RunLengthDecode": - xrefStreamStats[StreamType.RLX] = true; + xrefStats.addStreamType(StreamType.RLX); return new RunLengthStream(stream, maybeLength); case "JBIG2Decode": - xrefStreamStats[StreamType.JBIG] = true; + xrefStats.addStreamType(StreamType.JBIG); return new Jbig2Stream(stream, maybeLength, params); } warn(`Filter "${name}" is not supported.`); diff --git a/src/core/pdf_manager.js b/src/core/pdf_manager.js index 4b5df1a8f..d9e444992 100644 --- a/src/core/pdf_manager.js +++ b/src/core/pdf_manager.js @@ -115,12 +115,21 @@ class BasePdfManager { } class LocalPdfManager extends BasePdfManager { - constructor(docId, data, password, evaluatorOptions, enableXfa, docBaseUrl) { + constructor( + docId, + data, + password, + msgHandler, + evaluatorOptions, + enableXfa, + docBaseUrl + ) { super(); this._docId = docId; this._password = password; this._docBaseUrl = parseDocBaseUrl(docBaseUrl); + this.msgHandler = msgHandler; this.evaluatorOptions = evaluatorOptions; this.enableXfa = enableXfa; diff --git a/src/core/worker.js b/src/core/worker.js index c981bd5f1..f6b41df99 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -215,6 +215,7 @@ class WorkerMessageHandler { docId, source.data, source.password, + handler, evaluatorOptions, enableXfa, docBaseUrl @@ -287,6 +288,7 @@ class WorkerMessageHandler { docId, pdfFile, source.password, + handler, evaluatorOptions, enableXfa, docBaseUrl @@ -532,10 +534,6 @@ class WorkerMessageHandler { }); }); - handler.on("GetStats", function wphSetupGetStats(data) { - return pdfManager.ensureXRef("stats"); - }); - handler.on("GetAnnotations", function ({ pageIndex, intent }) { return pdfManager.getPage(pageIndex).then(function (page) { return page.getAnnotationsData(intent); diff --git a/src/core/xref.js b/src/core/xref.js index 35ea6e7d9..37256085e 100644 --- a/src/core/xref.js +++ b/src/core/xref.js @@ -30,13 +30,14 @@ import { isStream, Ref, } from "./primitives.js"; -import { Lexer, Parser } from "./parser.js"; import { + DocStats, MissingDataException, ParserEOFException, XRefEntryException, XRefParseException, } from "./core_utils.js"; +import { Lexer, Parser } from "./parser.js"; import { CipherTransformFactory } from "./crypto.js"; class XRef { @@ -46,10 +47,7 @@ class XRef { this.entries = []; this.xrefstms = Object.create(null); this._cacheMap = new Map(); // Prepare the XRef cache. - this.stats = { - streamTypes: Object.create(null), - fontTypes: Object.create(null), - }; + this.stats = new DocStats(pdfManager.msgHandler); this._newRefNum = null; } diff --git a/src/display/api.js b/src/display/api.js index 92c80dfe0..616d5562c 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -701,6 +701,16 @@ class PDFDocumentProxy { return this.fingerprints[0]; }, }); + + Object.defineProperty(this, "getStats", { + value: async () => { + deprecated( + "`PDFDocumentProxy.getStats`, " + + "please use the `PDFDocumentProxy.stats`-getter instead." + ); + return this.stats || { streamTypes: {}, fontTypes: {} }; + }, + }); } } @@ -728,6 +738,24 @@ class PDFDocumentProxy { return this._pdfInfo.fingerprints; } + /** + * @typedef {Object} PDFDocumentStats + * @property {Object} streamTypes - Used stream types in the + * document (an item is set to true if specific stream ID was used in the + * document). + * @property {Object} fontTypes - Used font types in the + * document (an item is set to true if specific font ID was used in the + * document). + */ + + /** + * @type {PDFDocumentStats | null} The current statistics about document + * structures, or `null` when no statistics exists. + */ + get stats() { + return this._transport.stats; + } + /** * @type {boolean} True if only XFA form. */ @@ -940,25 +968,6 @@ class PDFDocumentProxy { return this._transport.downloadInfoCapability.promise; } - /** - * @typedef {Object} PDFDocumentStats - * @property {Object} streamTypes - Used stream types in the - * document (an item is set to true if specific stream ID was used in the - * document). - * @property {Object} fontTypes - Used font types in the - * document (an item is set to true if specific font ID was used in the - * document). - */ - - /** - * @returns {Promise} A promise this is resolved with - * current statistics about document structures (see - * {@link PDFDocumentStats}). - */ - getStats() { - return this._transport.getStats(); - } - /** * Cleans up resources allocated by the document on both the main and worker * threads. @@ -2392,6 +2401,8 @@ if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC")) { * @ignore */ class WorkerTransport { + #docStats = null; + constructor(messageHandler, loadingTask, networkStream, params) { this.messageHandler = messageHandler; this.loadingTask = loadingTask; @@ -2433,6 +2444,10 @@ class WorkerTransport { return shadow(this, "annotationStorage", new AnnotationStorage()); } + get stats() { + return this.#docStats; + } + getRenderingIntent( intent, annotationMode = AnnotationMode.ENABLE, @@ -2843,6 +2858,18 @@ class WorkerTransport { }); }); + messageHandler.on("DocStats", data => { + if (this.destroyed) { + return; // Ignore any pending requests if the worker was terminated. + } + // Ensure that a `PDFDocumentProxy.stats` call-site cannot accidentally + // modify this internal data. + this.#docStats = Object.freeze({ + streamTypes: Object.freeze(data.streamTypes), + fontTypes: Object.freeze(data.fontTypes), + }); + }); + messageHandler.on( "UnsupportedFeature", this._onUnsupportedFeature.bind(this) @@ -3055,10 +3082,6 @@ class WorkerTransport { return this.messageHandler.sendWithPromise("GetMarkInfo", null); } - getStats() { - return this.messageHandler.sendWithPromise("GetStats", null); - } - async startCleanup(keepLoadedFonts = false) { await this.messageHandler.sendWithPromise("Cleanup", null); diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 228c5e038..a2442e296 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1275,8 +1275,8 @@ describe("api", function () { }); it("gets document stats", async function () { - const stats = await pdfDocument.getStats(); - expect(stats).toEqual({ streamTypes: {}, fontTypes: {} }); + const stats = pdfDocument.stats; + expect(stats).toEqual(null); }); it("cleans up document resources", async function () { @@ -2021,15 +2021,16 @@ sozialökonomische Gerechtigkeit.`) }); it("gets document stats after parsing page", async function () { - const stats = await page.getOperatorList().then(function () { - return pdfDocument.getStats(); - }); + await page.getOperatorList(); + const stats = pdfDocument.stats; - const expectedStreamTypes = {}; - expectedStreamTypes[StreamType.FLATE] = true; - const expectedFontTypes = {}; - expectedFontTypes[FontType.TYPE1STANDARD] = true; - expectedFontTypes[FontType.CIDFONTTYPE2] = true; + const expectedStreamTypes = { + [StreamType.FLATE]: true, + }; + const expectedFontTypes = { + [FontType.TYPE1STANDARD]: true, + [FontType.CIDFONTTYPE2]: true, + }; expect(stats).toEqual({ streamTypes: expectedStreamTypes, diff --git a/test/unit/test_utils.js b/test/unit/test_utils.js index 85e4c0d28..0d75f33cc 100644 --- a/test/unit/test_utils.js +++ b/test/unit/test_utils.js @@ -16,6 +16,7 @@ import { isRef, Ref } from "../../src/core/primitives.js"; import { Page, PDFDocument } from "../../src/core/document.js"; import { assert } from "../../src/shared/util.js"; +import { DocStats } from "../../src/core/core_utils.js"; import { isNodeJS } from "../../src/shared/is_node.js"; import { StringStream } from "../../src/core/stream.js"; @@ -76,10 +77,7 @@ function buildGetDocumentParams(filename, options) { class XRefMock { constructor(array) { this._map = Object.create(null); - this.stats = { - streamTypes: Object.create(null), - fontTypes: Object.create(null), - }; + this.stats = new DocStats({ send: () => {} }); this._newRefNum = null; for (const key in array) { diff --git a/web/app.js b/web/app.js index 2715fd188..1356ea627 100644 --- a/web/app.js +++ b/web/app.js @@ -257,6 +257,7 @@ const PDFViewerApplication = { _contentDispositionFilename: null, _contentLength: null, _saveInProgress: false, + _docStats: null, _wheelUnusedTicks: 0, _idleCallbacks: new Set(), @@ -854,6 +855,7 @@ const PDFViewerApplication = { this._contentDispositionFilename = null; this._contentLength = null; this._saveInProgress = false; + this._docStats = null; this._cancelIdleCallbacks(); promises.push(this.pdfScriptingManager.destroyPromise); @@ -2108,6 +2110,21 @@ const PDFViewerApplication = { this._unblockDocumentLoadEvent = () => {}; }, + /** + * @ignore + */ + _reportDocumentStatsTelemetry() { + const { stats } = this.pdfDocument; + if (stats !== this._docStats) { + this._docStats = stats; + + this.externalServices.reportTelemetry({ + type: "documentStats", + stats, + }); + } + }, + /** * Used together with the integration-tests, to enable awaiting full * initialization of the scripting/sandbox. @@ -2347,12 +2364,7 @@ function webViewerPageRendered({ pageNumber, error }) { } // It is a good time to report stream and font types. - PDFViewerApplication.pdfDocument.getStats().then(function (stats) { - PDFViewerApplication.externalServices.reportTelemetry({ - type: "documentStats", - stats, - }); - }); + PDFViewerApplication._reportDocumentStatsTelemetry(); } function webViewerPageMode({ mode }) {