From 661c60ecc9e65af6711dc0ae25b0574f421b0d41 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 2 Jul 2021 16:36:27 +0200 Subject: [PATCH] [api-minor] Support accessing both the original and modified PDF fingerprint The PDF.js API has only ever supported accessing the original file ID, however the second one that (should) exist in *modified* documents have thus far been completely inaccessible through the API. That seems like a simple oversight, caused e.g. by the viewer not needing it, since it really shouldn't hurt to provide API-users with the ability to check if a PDF document has been modified since its creation.[1] Please refer to https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#G13.2261661 for additional information. For an example of how to update existing code to use the new API, please see the changes in the `web/app.js` file included in this patch. *Please note:* While I'm not sure if we'll ever be able to remove the old `PDFDocumentProxy.fingerprint` getter, given that it's existed since "forever", that probably isn't a big deal given that it's now limited to only `GENERIC`-builds. --- [1] Although this obviously depends on the PDF software following the specification, by updating the second file ID as intended. --- src/core/document.js | 46 ++++++++++++++++++++++++++++--------------- src/core/worker.js | 6 +++--- src/display/api.js | 21 +++++++++++++++++--- test/unit/api_spec.js | 31 +++++++++++++++++++++-------- web/app.js | 6 +++--- 5 files changed, 77 insertions(+), 33 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index b66bb7e14..93358aad9 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -1153,30 +1153,44 @@ class PDFDocument { return shadow(this, "documentInfo", docInfo); } - get fingerprint() { - let hash; + get fingerprints() { + function validate(data) { + return ( + typeof data === "string" && + data.length > 0 && + data !== EMPTY_FINGERPRINT + ); + } + + function hexString(hash) { + const buf = []; + for (let i = 0, ii = hash.length; i < ii; i++) { + const hex = hash[i].toString(16); + buf.push(hex.padStart(2, "0")); + } + return buf.join(""); + } + const idArray = this.xref.trailer.get("ID"); - if ( - Array.isArray(idArray) && - idArray[0] && - isString(idArray[0]) && - idArray[0] !== EMPTY_FINGERPRINT - ) { - hash = stringToBytes(idArray[0]); + let hashOriginal, hashModified; + if (Array.isArray(idArray) && validate(idArray[0])) { + hashOriginal = stringToBytes(idArray[0]); + + if (idArray[1] !== idArray[0] && validate(idArray[1])) { + hashModified = stringToBytes(idArray[1]); + } } else { - hash = calculateMD5( + hashOriginal = calculateMD5( this.stream.getByteRange(0, FINGERPRINT_FIRST_BYTES), 0, FINGERPRINT_FIRST_BYTES ); } - const fingerprintBuf = []; - for (let i = 0, ii = hash.length; i < ii; i++) { - const hex = hash[i].toString(16); - fingerprintBuf.push(hex.padStart(2, "0")); - } - return shadow(this, "fingerprint", fingerprintBuf.join("")); + return shadow(this, "fingerprints", [ + hexString(hashOriginal), + hashModified ? hexString(hashModified) : null, + ]); } _getLinearizationPage(pageIndex) { diff --git a/src/core/worker.js b/src/core/worker.js index dc491e7e8..fd5740317 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -199,9 +199,9 @@ class WorkerMessageHandler { .then(() => finishWorkerTask(task)); } - const [numPages, fingerprint] = await Promise.all([ + const [numPages, fingerprints] = await Promise.all([ pdfManager.ensureDoc("numPages"), - pdfManager.ensureDoc("fingerprint"), + pdfManager.ensureDoc("fingerprints"), ]); // Get htmlForXfa after numPages to avoid to create HTML twice. @@ -209,7 +209,7 @@ class WorkerMessageHandler { ? await pdfManager.ensureDoc("htmlForXfa") : null; - return { numPages, fingerprint, htmlForXfa }; + return { numPages, fingerprints, htmlForXfa }; } function getPdfManager(data, evaluatorOptions, enableXfa) { diff --git a/src/display/api.js b/src/display/api.js index e4b4c7c10..d17e96368 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -722,6 +722,18 @@ class PDFDocumentProxy { constructor(pdfInfo, transport) { this._pdfInfo = pdfInfo; this._transport = transport; + + if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) { + Object.defineProperty(this, "fingerprint", { + get() { + deprecated( + "`PDFDocumentProxy.fingerprint`, " + + "please use `PDFDocumentProxy.fingerprints` instead." + ); + return this.fingerprints[0]; + }, + }); + } } /** @@ -739,10 +751,13 @@ class PDFDocumentProxy { } /** - * @type {string} A (not guaranteed to be) unique ID to identify a PDF. + * @type {Array} A (not guaranteed to be) unique ID to + * identify the PDF document. + * NOTE: The first element will always be defined for all PDF documents, + * whereas the second element is only defined for *modified* PDF documents. */ - get fingerprint() { - return this._pdfInfo.fingerprint; + get fingerprints() { + return this._pdfInfo.fingerprints; } /** diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 4c7b09a16..cd03229b1 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -503,10 +503,25 @@ describe("api", function () { expect(pdfDocument.numPages).toEqual(3); }); - it("gets fingerprint", function () { - expect(pdfDocument.fingerprint).toEqual( - "ea8b35919d6279a369e835bde778611b" + it("gets fingerprints", function () { + expect(pdfDocument.fingerprints).toEqual([ + "ea8b35919d6279a369e835bde778611b", + null, + ]); + }); + + it("gets fingerprints, from modified document", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("annotation-tx.pdf") ); + const pdfDoc = await loadingTask.promise; + + expect(pdfDoc.fingerprints).toEqual([ + "3ebd77c320274649a68f10dbf3b9f882", + "e7087346aa4b4ae0911c1f1643b57345", + ]); + + await loadingTask.destroy(); }); it("gets page", async function () { @@ -1203,13 +1218,13 @@ describe("api", function () { loadingTask1.promise, loadingTask2.promise, ]); - const fingerprint1 = data[0].fingerprint; - const fingerprint2 = data[1].fingerprint; + const fingerprints1 = data[0].fingerprints; + const fingerprints2 = data[1].fingerprints; - expect(fingerprint1).not.toEqual(fingerprint2); + expect(fingerprints1).not.toEqual(fingerprints2); - expect(fingerprint1).toEqual("2f695a83d6e7553c24fc08b7ac69712d"); - expect(fingerprint2).toEqual("04c7126b34a46b6d4d6e7a1eff7edcb6"); + expect(fingerprints1).toEqual(["2f695a83d6e7553c24fc08b7ac69712d", null]); + expect(fingerprints2).toEqual(["04c7126b34a46b6d4d6e7a1eff7edcb6", null]); await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); }); diff --git a/web/app.js b/web/app.js index 7a08ee5ab..8abeb927f 100644 --- a/web/app.js +++ b/web/app.js @@ -1220,7 +1220,7 @@ const PDFViewerApplication = { pdfThumbnailViewer.setDocument(pdfDocument); const storedPromise = (this.store = new ViewHistory( - pdfDocument.fingerprint + pdfDocument.fingerprints[0] )) .getMultiple({ page: null, @@ -1252,7 +1252,7 @@ const PDFViewerApplication = { const viewOnLoad = AppOptions.get("viewOnLoad"); this._initializePdfHistory({ - fingerprint: pdfDocument.fingerprint, + fingerprint: pdfDocument.fingerprints[0], viewOnLoad, initialDest: openAction?.dest, }); @@ -1511,7 +1511,7 @@ const PDFViewerApplication = { // Provides some basic debug information console.log( - `PDF ${pdfDocument.fingerprint} [${info.PDFFormatVersion} ` + + `PDF ${pdfDocument.fingerprints[0]} [${info.PDFFormatVersion} ` + `${(info.Producer || "-").trim()} / ${(info.Creator || "-").trim()}] ` + `(PDF.js: ${version || "-"})` );