Merge pull request #17636 from calixteman/bug1878916
When updating, write the xref table in the same format as the previous one (bug 1878916)
This commit is contained in:
commit
14874e50b7
@ -611,6 +611,19 @@ function getRotationMatrix(rotation, width, height) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the number of bytes to use to represent the given positive integer.
|
||||||
|
* If n is zero, the function returns 0 which means that we don't need to waste
|
||||||
|
* a byte to represent it.
|
||||||
|
* @param {number} x - a positive integer.
|
||||||
|
* @returns {number}
|
||||||
|
*/
|
||||||
|
function getSizeInBytes(x) {
|
||||||
|
// n bits are required for numbers up to 2^n - 1.
|
||||||
|
// So for a number x, we need ceil(log2(1 + x)) bits.
|
||||||
|
return Math.ceil(Math.ceil(Math.log2(1 + x)) / 8);
|
||||||
|
}
|
||||||
|
|
||||||
export {
|
export {
|
||||||
arrayBuffersToBytes,
|
arrayBuffersToBytes,
|
||||||
codePointIter,
|
codePointIter,
|
||||||
@ -622,6 +635,7 @@ export {
|
|||||||
getLookupTableFactory,
|
getLookupTableFactory,
|
||||||
getNewAnnotationsMap,
|
getNewAnnotationsMap,
|
||||||
getRotationMatrix,
|
getRotationMatrix,
|
||||||
|
getSizeInBytes,
|
||||||
isAscii,
|
isAscii,
|
||||||
isWhiteSpace,
|
isWhiteSpace,
|
||||||
log2,
|
log2,
|
||||||
|
@ -35,7 +35,7 @@ import {
|
|||||||
getNewAnnotationsMap,
|
getNewAnnotationsMap,
|
||||||
XRefParseException,
|
XRefParseException,
|
||||||
} from "./core_utils.js";
|
} from "./core_utils.js";
|
||||||
import { Dict, Ref } from "./primitives.js";
|
import { Dict, isDict, Ref } from "./primitives.js";
|
||||||
import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
|
import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
|
||||||
import { AnnotationFactory } from "./annotation.js";
|
import { AnnotationFactory } from "./annotation.js";
|
||||||
import { clearGlobalCaches } from "./cleanup_helper.js";
|
import { clearGlobalCaches } from "./cleanup_helper.js";
|
||||||
@ -726,6 +726,8 @@ class WorkerMessageHandler {
|
|||||||
acroFormRef,
|
acroFormRef,
|
||||||
acroForm,
|
acroForm,
|
||||||
xfaData,
|
xfaData,
|
||||||
|
// Use the same kind of XRef as the previous one.
|
||||||
|
useXrefStream: isDict(xref.topDict, "XRef"),
|
||||||
}).finally(() => {
|
}).finally(() => {
|
||||||
xref.resetNewTemporaryRef();
|
xref.resetNewTemporaryRef();
|
||||||
});
|
});
|
||||||
|
@ -18,12 +18,14 @@ import { Dict, isName, Name, Ref } from "./primitives.js";
|
|||||||
import {
|
import {
|
||||||
escapePDFName,
|
escapePDFName,
|
||||||
escapeString,
|
escapeString,
|
||||||
|
getSizeInBytes,
|
||||||
numberToString,
|
numberToString,
|
||||||
parseXFAPath,
|
parseXFAPath,
|
||||||
} from "./core_utils.js";
|
} from "./core_utils.js";
|
||||||
import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
|
import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
|
||||||
import { BaseStream } from "./base_stream.js";
|
import { BaseStream } from "./base_stream.js";
|
||||||
import { calculateMD5 } from "./crypto.js";
|
import { calculateMD5 } from "./crypto.js";
|
||||||
|
import { Stream } from "./stream.js";
|
||||||
|
|
||||||
async function writeObject(ref, obj, buffer, { encrypt = null }) {
|
async function writeObject(ref, obj, buffer, { encrypt = null }) {
|
||||||
const transform = encrypt?.createCipherTransform(ref.num, ref.gen);
|
const transform = encrypt?.createCipherTransform(ref.num, ref.gen);
|
||||||
@ -281,6 +283,112 @@ function updateXFA({ xfaData, xfaDatasetsRef, newRefs, xref }) {
|
|||||||
newRefs.push({ ref: xfaDatasetsRef, data });
|
newRefs.push({ ref: xfaDatasetsRef, data });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer) {
|
||||||
|
buffer.push("xref\n");
|
||||||
|
const indexes = getIndexes(newRefs);
|
||||||
|
let indexesPosition = 0;
|
||||||
|
for (const { ref, data } of newRefs) {
|
||||||
|
if (ref.num === indexes[indexesPosition]) {
|
||||||
|
buffer.push(
|
||||||
|
`${indexes[indexesPosition]} ${indexes[indexesPosition + 1]}\n`
|
||||||
|
);
|
||||||
|
indexesPosition += 2;
|
||||||
|
}
|
||||||
|
// The EOL is \r\n to make sure that every entry is exactly 20 bytes long.
|
||||||
|
// (see 7.5.4 - Cross-Reference Table).
|
||||||
|
buffer.push(
|
||||||
|
`${baseOffset.toString().padStart(10, "0")} ${Math.min(ref.gen, 0xffff).toString().padStart(5, "0")} n\r\n`
|
||||||
|
);
|
||||||
|
baseOffset += data.length;
|
||||||
|
}
|
||||||
|
computeIDs(baseOffset, xrefInfo, newXref);
|
||||||
|
buffer.push("trailer\n");
|
||||||
|
await writeDict(newXref, buffer);
|
||||||
|
buffer.push("\nstartxref\n", baseOffset.toString(), "\n%%EOF\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
function getIndexes(newRefs) {
|
||||||
|
const indexes = [];
|
||||||
|
for (const { ref } of newRefs) {
|
||||||
|
if (ref.num === indexes.at(-2) + indexes.at(-1)) {
|
||||||
|
indexes[indexes.length - 1] += 1;
|
||||||
|
} else {
|
||||||
|
indexes.push(ref.num, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return indexes;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getXRefStreamTable(
|
||||||
|
xrefInfo,
|
||||||
|
baseOffset,
|
||||||
|
newRefs,
|
||||||
|
newXref,
|
||||||
|
buffer
|
||||||
|
) {
|
||||||
|
const xrefTableData = [];
|
||||||
|
let maxOffset = 0;
|
||||||
|
let maxGen = 0;
|
||||||
|
for (const { ref, data } of newRefs) {
|
||||||
|
maxOffset = Math.max(maxOffset, baseOffset);
|
||||||
|
const gen = Math.min(ref.gen, 0xffff);
|
||||||
|
maxGen = Math.max(maxGen, gen);
|
||||||
|
xrefTableData.push([1, baseOffset, gen]);
|
||||||
|
baseOffset += data.length;
|
||||||
|
}
|
||||||
|
newXref.set("Index", getIndexes(newRefs));
|
||||||
|
const offsetSize = getSizeInBytes(maxOffset);
|
||||||
|
const maxGenSize = getSizeInBytes(maxGen);
|
||||||
|
const sizes = [1, offsetSize, maxGenSize];
|
||||||
|
newXref.set("W", sizes);
|
||||||
|
computeIDs(baseOffset, xrefInfo, newXref);
|
||||||
|
|
||||||
|
const structSize = sizes.reduce((a, x) => a + x, 0);
|
||||||
|
const data = new Uint8Array(structSize * xrefTableData.length);
|
||||||
|
const stream = new Stream(data);
|
||||||
|
stream.dict = newXref;
|
||||||
|
|
||||||
|
let offset = 0;
|
||||||
|
for (const [type, objOffset, gen] of xrefTableData) {
|
||||||
|
offset = writeInt(type, sizes[0], offset, data);
|
||||||
|
offset = writeInt(objOffset, sizes[1], offset, data);
|
||||||
|
offset = writeInt(gen, sizes[2], offset, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
await writeObject(xrefInfo.newRef, stream, buffer, {});
|
||||||
|
buffer.push("startxref\n", baseOffset.toString(), "\n%%EOF\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
function computeIDs(baseOffset, xrefInfo, newXref) {
|
||||||
|
if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
|
||||||
|
const md5 = computeMD5(baseOffset, xrefInfo);
|
||||||
|
newXref.set("ID", [xrefInfo.fileIds[0], md5]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getTrailerDict(xrefInfo, newRefs, useXrefStream) {
|
||||||
|
const newXref = new Dict(null);
|
||||||
|
newXref.set("Prev", xrefInfo.startXRef);
|
||||||
|
const refForXrefTable = xrefInfo.newRef;
|
||||||
|
if (useXrefStream) {
|
||||||
|
newRefs.push({ ref: refForXrefTable, data: "" });
|
||||||
|
newXref.set("Size", refForXrefTable.num + 1);
|
||||||
|
newXref.set("Type", Name.get("XRef"));
|
||||||
|
} else {
|
||||||
|
newXref.set("Size", refForXrefTable.num);
|
||||||
|
}
|
||||||
|
if (xrefInfo.rootRef !== null) {
|
||||||
|
newXref.set("Root", xrefInfo.rootRef);
|
||||||
|
}
|
||||||
|
if (xrefInfo.infoRef !== null) {
|
||||||
|
newXref.set("Info", xrefInfo.infoRef);
|
||||||
|
}
|
||||||
|
if (xrefInfo.encryptRef !== null) {
|
||||||
|
newXref.set("Encrypt", xrefInfo.encryptRef);
|
||||||
|
}
|
||||||
|
return newXref;
|
||||||
|
}
|
||||||
|
|
||||||
async function incrementalUpdate({
|
async function incrementalUpdate({
|
||||||
originalData,
|
originalData,
|
||||||
xrefInfo,
|
xrefInfo,
|
||||||
@ -293,6 +401,7 @@ async function incrementalUpdate({
|
|||||||
acroFormRef = null,
|
acroFormRef = null,
|
||||||
acroForm = null,
|
acroForm = null,
|
||||||
xfaData = null,
|
xfaData = null,
|
||||||
|
useXrefStream = false,
|
||||||
}) {
|
}) {
|
||||||
await updateAcroform({
|
await updateAcroform({
|
||||||
xref,
|
xref,
|
||||||
@ -314,9 +423,6 @@ async function incrementalUpdate({
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const newXref = new Dict(null);
|
|
||||||
const refForXrefTable = xrefInfo.newRef;
|
|
||||||
|
|
||||||
let buffer, baseOffset;
|
let buffer, baseOffset;
|
||||||
const lastByte = originalData.at(-1);
|
const lastByte = originalData.at(-1);
|
||||||
if (lastByte === /* \n */ 0x0a || lastByte === /* \r */ 0x0d) {
|
if (lastByte === /* \n */ 0x0a || lastByte === /* \r */ 0x0d) {
|
||||||
@ -328,60 +434,23 @@ async function incrementalUpdate({
|
|||||||
baseOffset = originalData.length + 1;
|
baseOffset = originalData.length + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
newXref.set("Size", refForXrefTable.num + 1);
|
const newXref = getTrailerDict(xrefInfo, newRefs, useXrefStream);
|
||||||
newXref.set("Prev", xrefInfo.startXRef);
|
|
||||||
newXref.set("Type", Name.get("XRef"));
|
|
||||||
|
|
||||||
if (xrefInfo.rootRef !== null) {
|
|
||||||
newXref.set("Root", xrefInfo.rootRef);
|
|
||||||
}
|
|
||||||
if (xrefInfo.infoRef !== null) {
|
|
||||||
newXref.set("Info", xrefInfo.infoRef);
|
|
||||||
}
|
|
||||||
if (xrefInfo.encryptRef !== null) {
|
|
||||||
newXref.set("Encrypt", xrefInfo.encryptRef);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add a ref for the new xref and sort them
|
|
||||||
newRefs.push({ ref: refForXrefTable, data: "" });
|
|
||||||
newRefs = newRefs.sort(
|
newRefs = newRefs.sort(
|
||||||
(a, b) => /* compare the refs */ a.ref.num - b.ref.num
|
(a, b) => /* compare the refs */ a.ref.num - b.ref.num
|
||||||
);
|
);
|
||||||
|
for (const { data } of newRefs) {
|
||||||
const xrefTableData = [[0, 1, 0xffff]];
|
|
||||||
const indexes = [0, 1];
|
|
||||||
let maxOffset = 0;
|
|
||||||
for (const { ref, data } of newRefs) {
|
|
||||||
maxOffset = Math.max(maxOffset, baseOffset);
|
|
||||||
xrefTableData.push([1, baseOffset, Math.min(ref.gen, 0xffff)]);
|
|
||||||
baseOffset += data.length;
|
|
||||||
indexes.push(ref.num, 1);
|
|
||||||
buffer.push(data);
|
buffer.push(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
newXref.set("Index", indexes);
|
await (useXrefStream
|
||||||
|
? getXRefStreamTable(xrefInfo, baseOffset, newRefs, newXref, buffer)
|
||||||
|
: getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer));
|
||||||
|
|
||||||
if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
|
const totalLength = buffer.reduce(
|
||||||
const md5 = computeMD5(baseOffset, xrefInfo);
|
(a, str) => a + str.length,
|
||||||
newXref.set("ID", [xrefInfo.fileIds[0], md5]);
|
originalData.length
|
||||||
}
|
|
||||||
|
|
||||||
const offsetSize = Math.ceil(Math.log2(maxOffset) / 8);
|
|
||||||
const sizes = [1, offsetSize, 2];
|
|
||||||
const structSize = sizes[0] + sizes[1] + sizes[2];
|
|
||||||
const tableLength = structSize * xrefTableData.length;
|
|
||||||
newXref.set("W", sizes);
|
|
||||||
newXref.set("Length", tableLength);
|
|
||||||
|
|
||||||
buffer.push(`${refForXrefTable.num} ${refForXrefTable.gen} obj\n`);
|
|
||||||
await writeDict(newXref, buffer, null);
|
|
||||||
buffer.push(" stream\n");
|
|
||||||
|
|
||||||
const bufferLen = buffer.reduce((a, str) => a + str.length, 0);
|
|
||||||
const footer = `\nendstream\nendobj\nstartxref\n${baseOffset}\n%%EOF\n`;
|
|
||||||
const array = new Uint8Array(
|
|
||||||
originalData.length + bufferLen + tableLength + footer.length
|
|
||||||
);
|
);
|
||||||
|
const array = new Uint8Array(totalLength);
|
||||||
|
|
||||||
// Original data
|
// Original data
|
||||||
array.set(originalData);
|
array.set(originalData);
|
||||||
@ -393,16 +462,6 @@ async function incrementalUpdate({
|
|||||||
offset += str.length;
|
offset += str.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
// New xref table
|
|
||||||
for (const [type, objOffset, gen] of xrefTableData) {
|
|
||||||
offset = writeInt(type, sizes[0], offset, array);
|
|
||||||
offset = writeInt(objOffset, sizes[1], offset, array);
|
|
||||||
offset = writeInt(gen, sizes[2], offset, array);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add the footer
|
|
||||||
writeString(footer, offset, array);
|
|
||||||
|
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ import {
|
|||||||
escapePDFName,
|
escapePDFName,
|
||||||
escapeString,
|
escapeString,
|
||||||
getInheritableProperty,
|
getInheritableProperty,
|
||||||
|
getSizeInBytes,
|
||||||
isAscii,
|
isAscii,
|
||||||
isWhiteSpace,
|
isWhiteSpace,
|
||||||
log2,
|
log2,
|
||||||
@ -468,4 +469,21 @@ describe("core_utils", function () {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("getSizeInBytes", function () {
|
||||||
|
it("should get the size in bytes to use to represent a positive integer", function () {
|
||||||
|
expect(getSizeInBytes(0)).toEqual(0);
|
||||||
|
for (let i = 1; i <= 0xff; i++) {
|
||||||
|
expect(getSizeInBytes(i)).toEqual(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0x100; i <= 0xffff; i += 0x100) {
|
||||||
|
expect(getSizeInBytes(i)).toEqual(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0x10000; i <= 0xffffff; i += 0x10000) {
|
||||||
|
expect(getSizeInBytes(i)).toEqual(3);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
@ -37,26 +37,55 @@ describe("Writer", function () {
|
|||||||
info: {},
|
info: {},
|
||||||
};
|
};
|
||||||
|
|
||||||
let data = await incrementalUpdate({ originalData, xrefInfo, newRefs });
|
let data = await incrementalUpdate({
|
||||||
|
originalData,
|
||||||
|
xrefInfo,
|
||||||
|
newRefs,
|
||||||
|
useXrefStream: true,
|
||||||
|
});
|
||||||
data = bytesToString(data);
|
data = bytesToString(data);
|
||||||
|
|
||||||
const expected =
|
let expected =
|
||||||
"\nabc\n" +
|
"\nabc\n" +
|
||||||
"defg\n" +
|
"defg\n" +
|
||||||
"789 0 obj\n" +
|
"789 0 obj\n" +
|
||||||
"<< /Size 790 /Prev 314 /Type /XRef /Index [0 1 123 1 456 1 789 1] " +
|
"<< /Prev 314 /Size 790 /Type /XRef /Index [123 1 456 1 789 1] " +
|
||||||
"/ID [(id) (\x01#Eg\x89\xab\xcd\xef\xfe\xdc\xba\x98vT2\x10)] " +
|
"/W [1 1 1] /ID [(id) (\x01#Eg\x89\xab\xcd\xef\xfe\xdc\xba\x98vT2\x10)] " +
|
||||||
"/W [1 1 2] /Length 16>> stream\n" +
|
"/Length 9>> stream\n" +
|
||||||
"\x00\x01\xff\xff" +
|
"\x01\x01\x2d" +
|
||||||
"\x01\x01\x00\x2d" +
|
"\x01\x05\x4e" +
|
||||||
"\x01\x05\x00\x4e" +
|
"\x01\x0a\x00\n" +
|
||||||
"\x01\x0a\x00\x00\n" +
|
|
||||||
"endstream\n" +
|
"endstream\n" +
|
||||||
"endobj\n" +
|
"endobj\n" +
|
||||||
"startxref\n" +
|
"startxref\n" +
|
||||||
"10\n" +
|
"10\n" +
|
||||||
"%%EOF\n";
|
"%%EOF\n";
|
||||||
|
expect(data).toEqual(expected);
|
||||||
|
|
||||||
|
data = await incrementalUpdate({
|
||||||
|
originalData,
|
||||||
|
xrefInfo,
|
||||||
|
newRefs,
|
||||||
|
useXrefStream: false,
|
||||||
|
});
|
||||||
|
data = bytesToString(data);
|
||||||
|
|
||||||
|
expected =
|
||||||
|
"\nabc\n" +
|
||||||
|
"defg\n" +
|
||||||
|
"xref\n" +
|
||||||
|
"123 1\n" +
|
||||||
|
"0000000001 00045 n\r\n" +
|
||||||
|
"456 1\n" +
|
||||||
|
"0000000005 00078 n\r\n" +
|
||||||
|
"789 1\n" +
|
||||||
|
"0000000010 00000 n\r\n" +
|
||||||
|
"trailer\n" +
|
||||||
|
"<< /Prev 314 /Size 789 " +
|
||||||
|
"/ID [(id) (\x01#Eg\x89\xab\xcd\xef\xfe\xdc\xba\x98vT2\x10)]>>\n" +
|
||||||
|
"startxref\n" +
|
||||||
|
"10\n" +
|
||||||
|
"%%EOF\n";
|
||||||
expect(data).toEqual(expected);
|
expect(data).toEqual(expected);
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -74,17 +103,21 @@ describe("Writer", function () {
|
|||||||
info: {},
|
info: {},
|
||||||
};
|
};
|
||||||
|
|
||||||
let data = await incrementalUpdate({ originalData, xrefInfo, newRefs });
|
let data = await incrementalUpdate({
|
||||||
|
originalData,
|
||||||
|
xrefInfo,
|
||||||
|
newRefs,
|
||||||
|
useXrefStream: true,
|
||||||
|
});
|
||||||
data = bytesToString(data);
|
data = bytesToString(data);
|
||||||
|
|
||||||
const expected =
|
const expected =
|
||||||
"\nabc\n" +
|
"\nabc\n" +
|
||||||
"789 0 obj\n" +
|
"789 0 obj\n" +
|
||||||
"<< /Size 790 /Prev 314 /Type /XRef /Index [0 1 123 1 789 1] " +
|
"<< /Prev 314 /Size 790 /Type /XRef /Index [123 1 789 1] " +
|
||||||
"/W [1 1 2] /Length 12>> stream\n" +
|
"/W [1 1 1] /Length 6>> stream\n" +
|
||||||
"\x00\x01\xff\xff" +
|
"\x01\x01\x2d" +
|
||||||
"\x01\x01\x00\x2d" +
|
"\x01\x05\x00\n" +
|
||||||
"\x01\x05\x00\x00\n" +
|
|
||||||
"endstream\n" +
|
"endstream\n" +
|
||||||
"endobj\n" +
|
"endobj\n" +
|
||||||
"startxref\n" +
|
"startxref\n" +
|
||||||
@ -187,6 +220,7 @@ describe("Writer", function () {
|
|||||||
acroForm,
|
acroForm,
|
||||||
xfaData,
|
xfaData,
|
||||||
xref: {},
|
xref: {},
|
||||||
|
useXrefStream: true,
|
||||||
});
|
});
|
||||||
data = bytesToString(data);
|
data = bytesToString(data);
|
||||||
|
|
||||||
@ -202,8 +236,8 @@ describe("Writer", function () {
|
|||||||
"endstream\n" +
|
"endstream\n" +
|
||||||
"endobj\n" +
|
"endobj\n" +
|
||||||
"131415 0 obj\n" +
|
"131415 0 obj\n" +
|
||||||
"<< /Size 131416 /Prev 314 /Type /XRef /Index [0 1 789 1 101112 1 131415 1] /W [1 1 2] /Length 16>> stream\n" +
|
"<< /Prev 314 /Size 131416 /Type /XRef /Index [789 1 101112 1 131415 1] /W [1 1 0] /Length 6>> stream\n" +
|
||||||
"\u0000\u0001ÿÿ\u0001\u0001\u0000\u0000\u0001[\u0000\u0000\u0001¹\u0000\u0000\n" +
|
"\x01\x01\x01[\x01¹\n" +
|
||||||
"endstream\n" +
|
"endstream\n" +
|
||||||
"endobj\n" +
|
"endobj\n" +
|
||||||
"startxref\n" +
|
"startxref\n" +
|
||||||
|
Loading…
Reference in New Issue
Block a user