From 37e98e39f60bd15c8954a88572ccb0a2842e9dd0 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Mon, 12 Feb 2024 15:31:08 +0100 Subject: [PATCH] Skip any whitespace after the first object in linearized PDFs (issue 17665) This way the code is now consistent with the non-linearized branch in the `PDFDocument.startXRef` getter. --- src/core/document.js | 9 ++++++++- src/core/worker.js | 3 +++ src/display/api.js | 7 +++++++ test/unit/api_spec.js | 12 ++++++++++++ 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/core/document.js b/src/core/document.js index 37d81d9bc..f783ef31d 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -930,7 +930,14 @@ class PDFDocument { // Find the end of the first object. stream.reset(); if (find(stream, ENDOBJ_SIGNATURE)) { - startXRef = stream.pos + 6 - stream.start; + stream.skip(6); + + let ch = stream.peekByte(); + while (isWhiteSpace(ch)) { + stream.pos++; + ch = stream.peekByte(); + } + startXRef = stream.pos - stream.start; } } else { // Find `startxref` by checking backwards from the end of the file. diff --git a/src/core/worker.js b/src/core/worker.js index fe699b978..70fa1c249 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -880,6 +880,9 @@ class WorkerMessageHandler { .ensureXRef("trailer") .then(trailer => trailer.get("Prev")); }); + handler.on("GetStartXRefPos", function (data) { + return pdfManager.ensureDoc("startXRef"); + }); handler.on("GetAnnotArray", function (data) { return pdfManager.getPage(data.pageIndex).then(function (page) { return page.annotations.map(a => a.toString()); diff --git a/src/display/api.js b/src/display/api.js index faebddbea..2ae61ea29 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -768,6 +768,9 @@ class PDFDocumentProxy { Object.defineProperty(this, "getXRefPrevValue", { value: () => this._transport.getXRefPrevValue(), }); + Object.defineProperty(this, "getStartXRefPos", { + value: () => this._transport.getStartXRefPos(), + }); Object.defineProperty(this, "getAnnotArray", { value: pageIndex => this._transport.getAnnotArray(pageIndex), }); @@ -2349,6 +2352,10 @@ class WorkerTransport { value: () => this.messageHandler.sendWithPromise("GetXRefPrevValue", null), }); + Object.defineProperty(this, "getStartXRefPos", { + value: () => + this.messageHandler.sendWithPromise("GetStartXRefPos", null), + }); Object.defineProperty(this, "getAnnotArray", { value: pageIndex => this.messageHandler.sendWithPromise("GetAnnotArray", { pageIndex }), diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 7662c405e..0035c0c65 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -511,6 +511,18 @@ describe("api", function () { await loadingTask.destroy(); }); + it("checks the `startxref` position of a linearized pdf doc (issue 17665)", async function () { + const loadingTask = getDocument(buildGetDocumentParams("empty.pdf")); + expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true); + + const pdfDocument = await loadingTask.promise; + + const startXRefPos = await pdfDocument.getStartXRefPos(); + expect(startXRefPos).toEqual(116); + + await loadingTask.destroy(); + }); + it("checks that `docId`s are unique and increasing", async function () { const loadingTask1 = getDocument(basicApiGetDocumentParams); expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);