Skip any whitespace after the first object in linearized PDFs (issue 17665)

This way the code is now consistent with the non-linearized branch in the `PDFDocument.startXRef` getter.
This commit is contained in:
Jonas Jenwald 2024-02-12 15:31:08 +01:00
parent eb5e6e68d6
commit 37e98e39f6
4 changed files with 30 additions and 1 deletions

View File

@ -930,7 +930,14 @@ class PDFDocument {
// Find the end of the first object. // Find the end of the first object.
stream.reset(); stream.reset();
if (find(stream, ENDOBJ_SIGNATURE)) { if (find(stream, ENDOBJ_SIGNATURE)) {
startXRef = stream.pos + 6 - stream.start; stream.skip(6);
let ch = stream.peekByte();
while (isWhiteSpace(ch)) {
stream.pos++;
ch = stream.peekByte();
}
startXRef = stream.pos - stream.start;
} }
} else { } else {
// Find `startxref` by checking backwards from the end of the file. // Find `startxref` by checking backwards from the end of the file.

View File

@ -880,6 +880,9 @@ class WorkerMessageHandler {
.ensureXRef("trailer") .ensureXRef("trailer")
.then(trailer => trailer.get("Prev")); .then(trailer => trailer.get("Prev"));
}); });
handler.on("GetStartXRefPos", function (data) {
return pdfManager.ensureDoc("startXRef");
});
handler.on("GetAnnotArray", function (data) { handler.on("GetAnnotArray", function (data) {
return pdfManager.getPage(data.pageIndex).then(function (page) { return pdfManager.getPage(data.pageIndex).then(function (page) {
return page.annotations.map(a => a.toString()); return page.annotations.map(a => a.toString());

View File

@ -768,6 +768,9 @@ class PDFDocumentProxy {
Object.defineProperty(this, "getXRefPrevValue", { Object.defineProperty(this, "getXRefPrevValue", {
value: () => this._transport.getXRefPrevValue(), value: () => this._transport.getXRefPrevValue(),
}); });
Object.defineProperty(this, "getStartXRefPos", {
value: () => this._transport.getStartXRefPos(),
});
Object.defineProperty(this, "getAnnotArray", { Object.defineProperty(this, "getAnnotArray", {
value: pageIndex => this._transport.getAnnotArray(pageIndex), value: pageIndex => this._transport.getAnnotArray(pageIndex),
}); });
@ -2349,6 +2352,10 @@ class WorkerTransport {
value: () => value: () =>
this.messageHandler.sendWithPromise("GetXRefPrevValue", null), this.messageHandler.sendWithPromise("GetXRefPrevValue", null),
}); });
Object.defineProperty(this, "getStartXRefPos", {
value: () =>
this.messageHandler.sendWithPromise("GetStartXRefPos", null),
});
Object.defineProperty(this, "getAnnotArray", { Object.defineProperty(this, "getAnnotArray", {
value: pageIndex => value: pageIndex =>
this.messageHandler.sendWithPromise("GetAnnotArray", { pageIndex }), this.messageHandler.sendWithPromise("GetAnnotArray", { pageIndex }),

View File

@ -511,6 +511,18 @@ describe("api", function () {
await loadingTask.destroy(); await loadingTask.destroy();
}); });
it("checks the `startxref` position of a linearized pdf doc (issue 17665)", async function () {
const loadingTask = getDocument(buildGetDocumentParams("empty.pdf"));
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument = await loadingTask.promise;
const startXRefPos = await pdfDocument.getStartXRefPos();
expect(startXRefPos).toEqual(116);
await loadingTask.destroy();
});
it("checks that `docId`s are unique and increasing", async function () { it("checks that `docId`s are unique and increasing", async function () {
const loadingTask1 = getDocument(basicApiGetDocumentParams); const loadingTask1 = getDocument(basicApiGetDocumentParams);
expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true); expect(loadingTask1 instanceof PDFDocumentLoadingTask).toEqual(true);