Merge pull request #17636 from calixteman/bug1878916

When updating, write the xref table in the same format as the previous one (bug 1878916)
This commit is contained in:
calixteman 2024-02-13 14:45:14 +01:00 committed by GitHub
commit 14874e50b7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 204 additions and 77 deletions

View File

@ -611,6 +611,19 @@ function getRotationMatrix(rotation, width, height) {
}
}
/**
* Get the number of bytes to use to represent the given positive integer.
* If n is zero, the function returns 0 which means that we don't need to waste
* a byte to represent it.
* @param {number} x - a positive integer.
* @returns {number}
*/
function getSizeInBytes(x) {
// n bits are required for numbers up to 2^n - 1.
// So for a number x, we need ceil(log2(1 + x)) bits.
return Math.ceil(Math.ceil(Math.log2(1 + x)) / 8);
}
export {
arrayBuffersToBytes,
codePointIter,
@ -622,6 +635,7 @@ export {
getLookupTableFactory,
getNewAnnotationsMap,
getRotationMatrix,
getSizeInBytes,
isAscii,
isWhiteSpace,
log2,

View File

@ -35,7 +35,7 @@ import {
getNewAnnotationsMap,
XRefParseException,
} from "./core_utils.js";
import { Dict, Ref } from "./primitives.js";
import { Dict, isDict, Ref } from "./primitives.js";
import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
import { AnnotationFactory } from "./annotation.js";
import { clearGlobalCaches } from "./cleanup_helper.js";
@ -726,6 +726,8 @@ class WorkerMessageHandler {
acroFormRef,
acroForm,
xfaData,
// Use the same kind of XRef as the previous one.
useXrefStream: isDict(xref.topDict, "XRef"),
}).finally(() => {
xref.resetNewTemporaryRef();
});

View File

@ -18,12 +18,14 @@ import { Dict, isName, Name, Ref } from "./primitives.js";
import {
escapePDFName,
escapeString,
getSizeInBytes,
numberToString,
parseXFAPath,
} from "./core_utils.js";
import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./crypto.js";
import { Stream } from "./stream.js";
async function writeObject(ref, obj, buffer, { encrypt = null }) {
const transform = encrypt?.createCipherTransform(ref.num, ref.gen);
@ -281,6 +283,112 @@ function updateXFA({ xfaData, xfaDatasetsRef, newRefs, xref }) {
newRefs.push({ ref: xfaDatasetsRef, data });
}
async function getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer) {
buffer.push("xref\n");
const indexes = getIndexes(newRefs);
let indexesPosition = 0;
for (const { ref, data } of newRefs) {
if (ref.num === indexes[indexesPosition]) {
buffer.push(
`${indexes[indexesPosition]} ${indexes[indexesPosition + 1]}\n`
);
indexesPosition += 2;
}
// The EOL is \r\n to make sure that every entry is exactly 20 bytes long.
// (see 7.5.4 - Cross-Reference Table).
buffer.push(
`${baseOffset.toString().padStart(10, "0")} ${Math.min(ref.gen, 0xffff).toString().padStart(5, "0")} n\r\n`
);
baseOffset += data.length;
}
computeIDs(baseOffset, xrefInfo, newXref);
buffer.push("trailer\n");
await writeDict(newXref, buffer);
buffer.push("\nstartxref\n", baseOffset.toString(), "\n%%EOF\n");
}
function getIndexes(newRefs) {
const indexes = [];
for (const { ref } of newRefs) {
if (ref.num === indexes.at(-2) + indexes.at(-1)) {
indexes[indexes.length - 1] += 1;
} else {
indexes.push(ref.num, 1);
}
}
return indexes;
}
async function getXRefStreamTable(
xrefInfo,
baseOffset,
newRefs,
newXref,
buffer
) {
const xrefTableData = [];
let maxOffset = 0;
let maxGen = 0;
for (const { ref, data } of newRefs) {
maxOffset = Math.max(maxOffset, baseOffset);
const gen = Math.min(ref.gen, 0xffff);
maxGen = Math.max(maxGen, gen);
xrefTableData.push([1, baseOffset, gen]);
baseOffset += data.length;
}
newXref.set("Index", getIndexes(newRefs));
const offsetSize = getSizeInBytes(maxOffset);
const maxGenSize = getSizeInBytes(maxGen);
const sizes = [1, offsetSize, maxGenSize];
newXref.set("W", sizes);
computeIDs(baseOffset, xrefInfo, newXref);
const structSize = sizes.reduce((a, x) => a + x, 0);
const data = new Uint8Array(structSize * xrefTableData.length);
const stream = new Stream(data);
stream.dict = newXref;
let offset = 0;
for (const [type, objOffset, gen] of xrefTableData) {
offset = writeInt(type, sizes[0], offset, data);
offset = writeInt(objOffset, sizes[1], offset, data);
offset = writeInt(gen, sizes[2], offset, data);
}
await writeObject(xrefInfo.newRef, stream, buffer, {});
buffer.push("startxref\n", baseOffset.toString(), "\n%%EOF\n");
}
function computeIDs(baseOffset, xrefInfo, newXref) {
if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
const md5 = computeMD5(baseOffset, xrefInfo);
newXref.set("ID", [xrefInfo.fileIds[0], md5]);
}
}
function getTrailerDict(xrefInfo, newRefs, useXrefStream) {
const newXref = new Dict(null);
newXref.set("Prev", xrefInfo.startXRef);
const refForXrefTable = xrefInfo.newRef;
if (useXrefStream) {
newRefs.push({ ref: refForXrefTable, data: "" });
newXref.set("Size", refForXrefTable.num + 1);
newXref.set("Type", Name.get("XRef"));
} else {
newXref.set("Size", refForXrefTable.num);
}
if (xrefInfo.rootRef !== null) {
newXref.set("Root", xrefInfo.rootRef);
}
if (xrefInfo.infoRef !== null) {
newXref.set("Info", xrefInfo.infoRef);
}
if (xrefInfo.encryptRef !== null) {
newXref.set("Encrypt", xrefInfo.encryptRef);
}
return newXref;
}
async function incrementalUpdate({
originalData,
xrefInfo,
@ -293,6 +401,7 @@ async function incrementalUpdate({
acroFormRef = null,
acroForm = null,
xfaData = null,
useXrefStream = false,
}) {
await updateAcroform({
xref,
@ -314,9 +423,6 @@ async function incrementalUpdate({
});
}
const newXref = new Dict(null);
const refForXrefTable = xrefInfo.newRef;
let buffer, baseOffset;
const lastByte = originalData.at(-1);
if (lastByte === /* \n */ 0x0a || lastByte === /* \r */ 0x0d) {
@ -328,60 +434,23 @@ async function incrementalUpdate({
baseOffset = originalData.length + 1;
}
newXref.set("Size", refForXrefTable.num + 1);
newXref.set("Prev", xrefInfo.startXRef);
newXref.set("Type", Name.get("XRef"));
if (xrefInfo.rootRef !== null) {
newXref.set("Root", xrefInfo.rootRef);
}
if (xrefInfo.infoRef !== null) {
newXref.set("Info", xrefInfo.infoRef);
}
if (xrefInfo.encryptRef !== null) {
newXref.set("Encrypt", xrefInfo.encryptRef);
}
// Add a ref for the new xref and sort them
newRefs.push({ ref: refForXrefTable, data: "" });
const newXref = getTrailerDict(xrefInfo, newRefs, useXrefStream);
newRefs = newRefs.sort(
(a, b) => /* compare the refs */ a.ref.num - b.ref.num
);
const xrefTableData = [[0, 1, 0xffff]];
const indexes = [0, 1];
let maxOffset = 0;
for (const { ref, data } of newRefs) {
maxOffset = Math.max(maxOffset, baseOffset);
xrefTableData.push([1, baseOffset, Math.min(ref.gen, 0xffff)]);
baseOffset += data.length;
indexes.push(ref.num, 1);
for (const { data } of newRefs) {
buffer.push(data);
}
newXref.set("Index", indexes);
await (useXrefStream
? getXRefStreamTable(xrefInfo, baseOffset, newRefs, newXref, buffer)
: getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer));
if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
const md5 = computeMD5(baseOffset, xrefInfo);
newXref.set("ID", [xrefInfo.fileIds[0], md5]);
}
const offsetSize = Math.ceil(Math.log2(maxOffset) / 8);
const sizes = [1, offsetSize, 2];
const structSize = sizes[0] + sizes[1] + sizes[2];
const tableLength = structSize * xrefTableData.length;
newXref.set("W", sizes);
newXref.set("Length", tableLength);
buffer.push(`${refForXrefTable.num} ${refForXrefTable.gen} obj\n`);
await writeDict(newXref, buffer, null);
buffer.push(" stream\n");
const bufferLen = buffer.reduce((a, str) => a + str.length, 0);
const footer = `\nendstream\nendobj\nstartxref\n${baseOffset}\n%%EOF\n`;
const array = new Uint8Array(
originalData.length + bufferLen + tableLength + footer.length
const totalLength = buffer.reduce(
(a, str) => a + str.length,
originalData.length
);
const array = new Uint8Array(totalLength);
// Original data
array.set(originalData);
@ -393,16 +462,6 @@ async function incrementalUpdate({
offset += str.length;
}
// New xref table
for (const [type, objOffset, gen] of xrefTableData) {
offset = writeInt(type, sizes[0], offset, array);
offset = writeInt(objOffset, sizes[1], offset, array);
offset = writeInt(gen, sizes[2], offset, array);
}
// Add the footer
writeString(footer, offset, array);
return array;
}

View File

@ -19,6 +19,7 @@ import {
escapePDFName,
escapeString,
getInheritableProperty,
getSizeInBytes,
isAscii,
isWhiteSpace,
log2,
@ -468,4 +469,21 @@ describe("core_utils", function () {
);
});
});
describe("getSizeInBytes", function () {
it("should get the size in bytes to use to represent a positive integer", function () {
expect(getSizeInBytes(0)).toEqual(0);
for (let i = 1; i <= 0xff; i++) {
expect(getSizeInBytes(i)).toEqual(1);
}
for (let i = 0x100; i <= 0xffff; i += 0x100) {
expect(getSizeInBytes(i)).toEqual(2);
}
for (let i = 0x10000; i <= 0xffffff; i += 0x10000) {
expect(getSizeInBytes(i)).toEqual(3);
}
});
});
});

View File

@ -37,26 +37,55 @@ describe("Writer", function () {
info: {},
};
let data = await incrementalUpdate({ originalData, xrefInfo, newRefs });
let data = await incrementalUpdate({
originalData,
xrefInfo,
newRefs,
useXrefStream: true,
});
data = bytesToString(data);
const expected =
let expected =
"\nabc\n" +
"defg\n" +
"789 0 obj\n" +
"<< /Size 790 /Prev 314 /Type /XRef /Index [0 1 123 1 456 1 789 1] " +
"/ID [(id) (\x01#Eg\x89\xab\xcd\xef\xfe\xdc\xba\x98vT2\x10)] " +
"/W [1 1 2] /Length 16>> stream\n" +
"\x00\x01\xff\xff" +
"\x01\x01\x00\x2d" +
"\x01\x05\x00\x4e" +
"\x01\x0a\x00\x00\n" +
"<< /Prev 314 /Size 790 /Type /XRef /Index [123 1 456 1 789 1] " +
"/W [1 1 1] /ID [(id) (\x01#Eg\x89\xab\xcd\xef\xfe\xdc\xba\x98vT2\x10)] " +
"/Length 9>> stream\n" +
"\x01\x01\x2d" +
"\x01\x05\x4e" +
"\x01\x0a\x00\n" +
"endstream\n" +
"endobj\n" +
"startxref\n" +
"10\n" +
"%%EOF\n";
expect(data).toEqual(expected);
data = await incrementalUpdate({
originalData,
xrefInfo,
newRefs,
useXrefStream: false,
});
data = bytesToString(data);
expected =
"\nabc\n" +
"defg\n" +
"xref\n" +
"123 1\n" +
"0000000001 00045 n\r\n" +
"456 1\n" +
"0000000005 00078 n\r\n" +
"789 1\n" +
"0000000010 00000 n\r\n" +
"trailer\n" +
"<< /Prev 314 /Size 789 " +
"/ID [(id) (\x01#Eg\x89\xab\xcd\xef\xfe\xdc\xba\x98vT2\x10)]>>\n" +
"startxref\n" +
"10\n" +
"%%EOF\n";
expect(data).toEqual(expected);
});
@ -74,17 +103,21 @@ describe("Writer", function () {
info: {},
};
let data = await incrementalUpdate({ originalData, xrefInfo, newRefs });
let data = await incrementalUpdate({
originalData,
xrefInfo,
newRefs,
useXrefStream: true,
});
data = bytesToString(data);
const expected =
"\nabc\n" +
"789 0 obj\n" +
"<< /Size 790 /Prev 314 /Type /XRef /Index [0 1 123 1 789 1] " +
"/W [1 1 2] /Length 12>> stream\n" +
"\x00\x01\xff\xff" +
"\x01\x01\x00\x2d" +
"\x01\x05\x00\x00\n" +
"<< /Prev 314 /Size 790 /Type /XRef /Index [123 1 789 1] " +
"/W [1 1 1] /Length 6>> stream\n" +
"\x01\x01\x2d" +
"\x01\x05\x00\n" +
"endstream\n" +
"endobj\n" +
"startxref\n" +
@ -187,6 +220,7 @@ describe("Writer", function () {
acroForm,
xfaData,
xref: {},
useXrefStream: true,
});
data = bytesToString(data);
@ -202,8 +236,8 @@ describe("Writer", function () {
"endstream\n" +
"endobj\n" +
"131415 0 obj\n" +
"<< /Size 131416 /Prev 314 /Type /XRef /Index [0 1 789 1 101112 1 131415 1] /W [1 1 2] /Length 16>> stream\n" +
"\u0000\u0001ÿÿ\u0001\u0001\u0000\u0000\u0001[\u0000\u0000\u0001¹\u0000\u0000\n" +
"<< /Prev 314 /Size 131416 /Type /XRef /Index [789 1 101112 1 131415 1] /W [1 1 0] /Length 6>> stream\n" +
"\x01\x01\x01[\x01¹\n" +
"endstream\n" +
"endobj\n" +
"startxref\n" +