Merge pull request #17636 from calixteman/bug1878916

When updating, write the xref table in the same format as the previous one (bug 1878916)
2024-02-13 14:45:14 +01:00 · 2024-02-13 14:45:14 +01:00 · 14874e50b7
commit 14874e50b7
parent e60329cea1 2133da166e
5 changed files with 204 additions and 77 deletions
--- a/src/core/core_utils.js
+++ b/src/core/core_utils.js
@ -611,6 +611,19 @@ function getRotationMatrix(rotation, width, height) {
  }
 }
 /**
 * Get the number of bytes to use to represent the given positive integer.
 * If n is zero, the function returns 0 which means that we don't need to waste
 * a byte to represent it.
 * @param {number} x - a positive integer.
 * @returns {number}
 */
 function getSizeInBytes(x) {
  // n bits are required for numbers up to 2^n - 1.
  // So for a number x, we need ceil(log2(1 + x)) bits.
  return Math.ceil(Math.ceil(Math.log2(1 + x)) / 8);
 }
 export {
  arrayBuffersToBytes,
  codePointIter,
@ -622,6 +635,7 @@ export {
  getLookupTableFactory,
  getNewAnnotationsMap,
  getRotationMatrix,
  getSizeInBytes,
  isAscii,
  isWhiteSpace,
  log2,
--- a/src/core/worker.js
+++ b/src/core/worker.js
@ -35,7 +35,7 @@ import {
  getNewAnnotationsMap,
  XRefParseException,
 } from "./core_utils.js";
-import { Dict, Ref } from "./primitives.js";
+import { Dict, isDict, Ref } from "./primitives.js";
 import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
 import { AnnotationFactory } from "./annotation.js";
 import { clearGlobalCaches } from "./cleanup_helper.js";
@ -726,6 +726,8 @@ class WorkerMessageHandler {
          acroFormRef,
          acroForm,
          xfaData,
          // Use the same kind of XRef as the previous one.
          useXrefStream: isDict(xref.topDict, "XRef"),
        }).finally(() => {
          xref.resetNewTemporaryRef();
        });
--- a/src/core/writer.js
+++ b/src/core/writer.js
@ -18,12 +18,14 @@ import { Dict, isName, Name, Ref } from "./primitives.js";
 import {
  escapePDFName,
  escapeString,
  getSizeInBytes,
  numberToString,
  parseXFAPath,
 } from "./core_utils.js";
 import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
 import { BaseStream } from "./base_stream.js";
 import { calculateMD5 } from "./crypto.js";
 import { Stream } from "./stream.js";
 async function writeObject(ref, obj, buffer, { encrypt = null }) {
  const transform = encrypt?.createCipherTransform(ref.num, ref.gen);
@ -281,6 +283,112 @@ function updateXFA({ xfaData, xfaDatasetsRef, newRefs, xref }) {
  newRefs.push({ ref: xfaDatasetsRef, data });
 }
 async function getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer) {
  buffer.push("xref\n");
  const indexes = getIndexes(newRefs);
  let indexesPosition = 0;
  for (const { ref, data } of newRefs) {
    if (ref.num === indexes[indexesPosition]) {
      buffer.push(
        `${indexes[indexesPosition]} ${indexes[indexesPosition + 1]}\n`
      );
      indexesPosition += 2;
    }
    // The EOL is \r\n to make sure that every entry is exactly 20 bytes long.
    // (see 7.5.4 - Cross-Reference Table).
    buffer.push(
      `${baseOffset.toString().padStart(10, "0")} ${Math.min(ref.gen, 0xffff).toString().padStart(5, "0")} n\r\n`
    );
    baseOffset += data.length;
  }
  computeIDs(baseOffset, xrefInfo, newXref);
  buffer.push("trailer\n");
  await writeDict(newXref, buffer);
  buffer.push("\nstartxref\n", baseOffset.toString(), "\n%%EOF\n");
 }
 function getIndexes(newRefs) {
  const indexes = [];
  for (const { ref } of newRefs) {
    if (ref.num === indexes.at(-2) + indexes.at(-1)) {
      indexes[indexes.length - 1] += 1;
    } else {
      indexes.push(ref.num, 1);
    }
  }
  return indexes;
 }
 async function getXRefStreamTable(
  xrefInfo,
  baseOffset,
  newRefs,
  newXref,
  buffer
 ) {
  const xrefTableData = [];
  let maxOffset = 0;
  let maxGen = 0;
  for (const { ref, data } of newRefs) {
    maxOffset = Math.max(maxOffset, baseOffset);
    const gen = Math.min(ref.gen, 0xffff);
    maxGen = Math.max(maxGen, gen);
    xrefTableData.push([1, baseOffset, gen]);
    baseOffset += data.length;
  }
  newXref.set("Index", getIndexes(newRefs));
  const offsetSize = getSizeInBytes(maxOffset);
  const maxGenSize = getSizeInBytes(maxGen);
  const sizes = [1, offsetSize, maxGenSize];
  newXref.set("W", sizes);
  computeIDs(baseOffset, xrefInfo, newXref);
  const structSize = sizes.reduce((a, x) => a + x, 0);
  const data = new Uint8Array(structSize * xrefTableData.length);
  const stream = new Stream(data);
  stream.dict = newXref;
  let offset = 0;
  for (const [type, objOffset, gen] of xrefTableData) {
    offset = writeInt(type, sizes[0], offset, data);
    offset = writeInt(objOffset, sizes[1], offset, data);
    offset = writeInt(gen, sizes[2], offset, data);
  }
  await writeObject(xrefInfo.newRef, stream, buffer, {});
  buffer.push("startxref\n", baseOffset.toString(), "\n%%EOF\n");
 }
 function computeIDs(baseOffset, xrefInfo, newXref) {
  if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
    const md5 = computeMD5(baseOffset, xrefInfo);
    newXref.set("ID", [xrefInfo.fileIds[0], md5]);
  }
 }
 function getTrailerDict(xrefInfo, newRefs, useXrefStream) {
  const newXref = new Dict(null);
  newXref.set("Prev", xrefInfo.startXRef);
  const refForXrefTable = xrefInfo.newRef;
  if (useXrefStream) {
    newRefs.push({ ref: refForXrefTable, data: "" });
    newXref.set("Size", refForXrefTable.num + 1);
    newXref.set("Type", Name.get("XRef"));
  } else {
    newXref.set("Size", refForXrefTable.num);
  }
  if (xrefInfo.rootRef !== null) {
    newXref.set("Root", xrefInfo.rootRef);
  }
  if (xrefInfo.infoRef !== null) {
    newXref.set("Info", xrefInfo.infoRef);
  }
  if (xrefInfo.encryptRef !== null) {
    newXref.set("Encrypt", xrefInfo.encryptRef);
  }
  return newXref;
 }
 async function incrementalUpdate({
  originalData,
  xrefInfo,
@ -293,6 +401,7 @@ async function incrementalUpdate({
  acroFormRef = null,
  acroForm = null,
  xfaData = null,
  useXrefStream = false,
 }) {
  await updateAcroform({
    xref,
@ -314,9 +423,6 @@ async function incrementalUpdate({
    });
  }
  const newXref = new Dict(null);
  const refForXrefTable = xrefInfo.newRef;
  let buffer, baseOffset;
  const lastByte = originalData.at(-1);
  if (lastByte === /* \n */ 0x0a || lastByte === /* \r */ 0x0d) {
@ -328,60 +434,23 @@ async function incrementalUpdate({
    baseOffset = originalData.length + 1;
  }
-  newXref.set("Size", refForXrefTable.num + 1);
+  const newXref = getTrailerDict(xrefInfo, newRefs, useXrefStream);
  newXref.set("Prev", xrefInfo.startXRef);
  newXref.set("Type", Name.get("XRef"));
  if (xrefInfo.rootRef !== null) {
    newXref.set("Root", xrefInfo.rootRef);
  }
  if (xrefInfo.infoRef !== null) {
    newXref.set("Info", xrefInfo.infoRef);
  }
  if (xrefInfo.encryptRef !== null) {
    newXref.set("Encrypt", xrefInfo.encryptRef);
  }
  // Add a ref for the new xref and sort them
  newRefs.push({ ref: refForXrefTable, data: "" });
  newRefs = newRefs.sort(
    (a, b) => /* compare the refs */ a.ref.num - b.ref.num
  );
-
+  for (const { data } of newRefs) {
  const xrefTableData = [[0, 1, 0xffff]];
  const indexes = [0, 1];
  let maxOffset = 0;
  for (const { ref, data } of newRefs) {
    maxOffset = Math.max(maxOffset, baseOffset);
    xrefTableData.push([1, baseOffset, Math.min(ref.gen, 0xffff)]);
    baseOffset += data.length;
    indexes.push(ref.num, 1);
    buffer.push(data);
  }
-  newXref.set("Index", indexes);
+  await (useXrefStream
    ? getXRefStreamTable(xrefInfo, baseOffset, newRefs, newXref, buffer)
    : getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer));
-  if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
+  const totalLength = buffer.reduce(
-    const md5 = computeMD5(baseOffset, xrefInfo);
+    (a, str) => a + str.length,
-    newXref.set("ID", [xrefInfo.fileIds[0], md5]);
+    originalData.length
  }
  const offsetSize = Math.ceil(Math.log2(maxOffset) / 8);
  const sizes = [1, offsetSize, 2];
  const structSize = sizes[0] + sizes[1] + sizes[2];
  const tableLength = structSize * xrefTableData.length;
  newXref.set("W", sizes);
  newXref.set("Length", tableLength);
  buffer.push(`${refForXrefTable.num} ${refForXrefTable.gen} obj\n`);
  await writeDict(newXref, buffer, null);
  buffer.push(" stream\n");
  const bufferLen = buffer.reduce((a, str) => a + str.length, 0);
  const footer = `\nendstream\nendobj\nstartxref\n${baseOffset}\n%%EOF\n`;
  const array = new Uint8Array(
    originalData.length + bufferLen + tableLength + footer.length
  );
  const array = new Uint8Array(totalLength);
  // Original data
  array.set(originalData);
@ -393,16 +462,6 @@ async function incrementalUpdate({
    offset += str.length;
  }
  // New xref table
  for (const [type, objOffset, gen] of xrefTableData) {
    offset = writeInt(type, sizes[0], offset, array);
    offset = writeInt(objOffset, sizes[1], offset, array);
    offset = writeInt(gen, sizes[2], offset, array);
  }
  // Add the footer
  writeString(footer, offset, array);
  return array;
 }
--- a/test/unit/core_utils_spec.js
+++ b/test/unit/core_utils_spec.js
@ -19,6 +19,7 @@ import {
  escapePDFName,
  escapeString,
  getInheritableProperty,
  getSizeInBytes,
  isAscii,
  isWhiteSpace,
  log2,
@ -468,4 +469,21 @@ describe("core_utils", function () {
      );
    });
  });
  describe("getSizeInBytes", function () {
    it("should get the size in bytes to use to represent a positive integer", function () {
      expect(getSizeInBytes(0)).toEqual(0);
      for (let i = 1; i <= 0xff; i++) {
        expect(getSizeInBytes(i)).toEqual(1);
      }
      for (let i = 0x100; i <= 0xffff; i += 0x100) {
        expect(getSizeInBytes(i)).toEqual(2);
      }
      for (let i = 0x10000; i <= 0xffffff; i += 0x10000) {
        expect(getSizeInBytes(i)).toEqual(3);
      }
    });
  });
 });
--- a/test/unit/writer_spec.js
+++ b/test/unit/writer_spec.js
@ -37,26 +37,55 @@ describe("Writer", function () {
        info: {},
      };
-      let data = await incrementalUpdate({ originalData, xrefInfo, newRefs });
+      let data = await incrementalUpdate({
        originalData,
        xrefInfo,
        newRefs,
        useXrefStream: true,
      });
      data = bytesToString(data);
-      const expected =
+      let expected =
        "\nabc\n" +
        "defg\n" +
        "789 0 obj\n" +
-        "<< /Size 790 /Prev 314 /Type /XRef /Index [0 1 123 1 456 1 789 1] " +
+        "<< /Prev 314 /Size 790 /Type /XRef /Index [123 1 456 1 789 1] " +
-        "/ID [(id) (\x01#Eg\x89\xab\xcd\xef\xfe\xdc\xba\x98vT2\x10)] " +
+        "/W [1 1 1] /ID [(id) (\x01#Eg\x89\xab\xcd\xef\xfe\xdc\xba\x98vT2\x10)] " +
-        "/W [1 1 2] /Length 16>> stream\n" +
+        "/Length 9>> stream\n" +
-        "\x00\x01\xff\xff" +
+        "\x01\x01\x2d" +
-        "\x01\x01\x00\x2d" +
+        "\x01\x05\x4e" +
-        "\x01\x05\x00\x4e" +
+        "\x01\x0a\x00\n" +
        "\x01\x0a\x00\x00\n" +
        "endstream\n" +
        "endobj\n" +
        "startxref\n" +
        "10\n" +
        "%%EOF\n";
      expect(data).toEqual(expected);
      data = await incrementalUpdate({
        originalData,
        xrefInfo,
        newRefs,
        useXrefStream: false,
      });
      data = bytesToString(data);
      expected =
        "\nabc\n" +
        "defg\n" +
        "xref\n" +
        "123 1\n" +
        "0000000001 00045 n\r\n" +
        "456 1\n" +
        "0000000005 00078 n\r\n" +
        "789 1\n" +
        "0000000010 00000 n\r\n" +
        "trailer\n" +
        "<< /Prev 314 /Size 789 " +
        "/ID [(id) (\x01#Eg\x89\xab\xcd\xef\xfe\xdc\xba\x98vT2\x10)]>>\n" +
        "startxref\n" +
        "10\n" +
        "%%EOF\n";
      expect(data).toEqual(expected);
    });
@ -74,17 +103,21 @@ describe("Writer", function () {
        info: {},
      };
-      let data = await incrementalUpdate({ originalData, xrefInfo, newRefs });
+      let data = await incrementalUpdate({
        originalData,
        xrefInfo,
        newRefs,
        useXrefStream: true,
      });
      data = bytesToString(data);
      const expected =
        "\nabc\n" +
        "789 0 obj\n" +
-        "<< /Size 790 /Prev 314 /Type /XRef /Index [0 1 123 1 789 1] " +
+        "<< /Prev 314 /Size 790 /Type /XRef /Index [123 1 789 1] " +
-        "/W [1 1 2] /Length 12>> stream\n" +
+        "/W [1 1 1] /Length 6>> stream\n" +
-        "\x00\x01\xff\xff" +
+        "\x01\x01\x2d" +
-        "\x01\x01\x00\x2d" +
+        "\x01\x05\x00\n" +
        "\x01\x05\x00\x00\n" +
        "endstream\n" +
        "endobj\n" +
        "startxref\n" +
@ -187,6 +220,7 @@ describe("Writer", function () {
        acroForm,
        xfaData,
        xref: {},
        useXrefStream: true,
      });
      data = bytesToString(data);
@ -202,8 +236,8 @@ describe("Writer", function () {
        "endstream\n" +
        "endobj\n" +
        "131415 0 obj\n" +
-        "<< /Size 131416 /Prev 314 /Type /XRef /Index [0 1 789 1 101112 1 131415 1] /W [1 1 2] /Length 16>> stream\n" +
+        "<< /Prev 314 /Size 131416 /Type /XRef /Index [789 1 101112 1 131415 1] /W [1 1 0] /Length 6>> stream\n" +
-        "\u0000\u0001ÿÿ\u0001\u0001\u0000\u0000\u0001[\u0000\u0000\u0001¹\u0000\u0000\n" +
+        "\x01\x01\x01[\x01¹\n" +
        "endstream\n" +
        "endobj\n" +
        "startxref\n" +