Compress the data when saving annotions

CompressionStream API has been added in Firefox 113
(see https://bugzilla.mozilla.org/show_bug.cgi?id=1823619)
hence we can use it to compress the streams with added/modified
annotations.
This commit is contained in:
Calixte Denizet 2023-04-27 21:50:27 +02:00
parent 8f2d8f62f3
commit 2486536843
8 changed files with 156 additions and 38 deletions

View File

@ -273,7 +273,7 @@ class AnnotationFactory {
baseFont.set("Encoding", Name.get("WinAnsiEncoding"));
const buffer = [];
baseFontRef = xref.getNewTemporaryRef();
writeObject(baseFontRef, baseFont, buffer, null);
await writeObject(baseFontRef, baseFont, buffer, null);
dependencies.push({ ref: baseFontRef, data: buffer.join("") });
}
promises.push(
@ -1479,7 +1479,7 @@ class MarkupAnnotation extends Annotation {
const transform = xref.encrypt
? xref.encrypt.createCipherTransform(apRef.num, apRef.gen)
: null;
writeObject(apRef, ap, buffer, transform);
await writeObject(apRef, ap, buffer, transform);
dependencies.push({ ref: apRef, data: buffer.join("") });
} else {
annotationDict = this.createNewDict(annotation, xref, {});
@ -1489,7 +1489,7 @@ class MarkupAnnotation extends Annotation {
const transform = xref.encrypt
? xref.encrypt.createCipherTransform(annotationRef.num, annotationRef.gen)
: null;
writeObject(annotationRef, annotationDict, buffer, transform);
await writeObject(annotationRef, annotationDict, buffer, transform);
return { ref: annotationRef, data: buffer.join("") };
}
@ -1922,7 +1922,7 @@ class WidgetAnnotation extends Annotation {
appearanceDict.set("Matrix", rotationMatrix);
}
writeObject(newRef, appearanceStream, buffer, newTransform);
await writeObject(newRef, appearanceStream, buffer, newTransform);
changes.push(
// data for the new AP
@ -1937,7 +1937,7 @@ class WidgetAnnotation extends Annotation {
}
dict.set("M", `D:${getModificationDate()}`);
writeObject(this.ref, dict, buffer, originalTransform);
await writeObject(this.ref, dict, buffer, originalTransform);
changes[0].data = buffer.join("");
@ -2814,7 +2814,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
}
const buffer = [`${this.ref.num} ${this.ref.gen} obj\n`];
writeDict(dict, buffer, originalTransform);
await writeDict(dict, buffer, originalTransform);
buffer.push("\nendobj\n");
return [{ ref: this.ref, data: buffer.join(""), xfa }];
@ -2873,7 +2873,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
}
parent.set("V", name);
parentBuffer = [`${this.parent.num} ${this.parent.gen} obj\n`];
writeDict(parent, parentBuffer, parentTransform);
await writeDict(parent, parentBuffer, parentTransform);
parentBuffer.push("\nendobj\n");
} else if (this.parent instanceof Dict) {
this.parent.set("V", name);
@ -2897,7 +2897,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
}
const buffer = [`${this.ref.num} ${this.ref.gen} obj\n`];
writeDict(dict, buffer, originalTransform);
await writeDict(dict, buffer, originalTransform);
buffer.push("\nendobj\n");
const newRefs = [{ ref: this.ref, data: buffer.join(""), xfa }];

View File

@ -297,7 +297,7 @@ class Page {
);
}
writeObject(this.ref, pageDict, buffer, transform);
await writeObject(this.ref, pageDict, buffer, transform);
if (savedDict) {
pageDict.set("Annots", savedDict);
}

View File

@ -13,7 +13,7 @@
* limitations under the License.
*/
import { bytesToString, warn } from "../shared/util.js";
import { bytesToString, info, stringToBytes, warn } from "../shared/util.js";
import { Dict, Name, Ref } from "./primitives.js";
import {
escapePDFName,
@ -25,36 +25,87 @@ import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./crypto.js";
function writeObject(ref, obj, buffer, transform) {
async function writeObject(ref, obj, buffer, transform) {
buffer.push(`${ref.num} ${ref.gen} obj\n`);
if (obj instanceof Dict) {
writeDict(obj, buffer, transform);
await writeDict(obj, buffer, transform);
} else if (obj instanceof BaseStream) {
writeStream(obj, buffer, transform);
await writeStream(obj, buffer, transform);
}
buffer.push("\nendobj\n");
}
function writeDict(dict, buffer, transform) {
async function writeDict(dict, buffer, transform) {
buffer.push("<<");
for (const key of dict.getKeys()) {
buffer.push(` /${escapePDFName(key)} `);
writeValue(dict.getRaw(key), buffer, transform);
await writeValue(dict.getRaw(key), buffer, transform);
}
buffer.push(">>");
}
function writeStream(stream, buffer, transform) {
async function writeStream(stream, buffer, transform) {
let string = stream.getString();
if (transform !== null) {
string = transform.encryptString(string);
}
// eslint-disable-next-line no-undef
if (typeof CompressionStream === "undefined") {
stream.dict.set("Length", string.length);
await writeDict(stream.dict, buffer, transform);
buffer.push(" stream\n", string, "\nendstream");
return;
}
const filter = await stream.dict.getAsync("Filter");
const flateDecode = Name.get("FlateDecode");
// If the string is too small there is no real benefit
// in compressing it.
// The number 256 is arbitrary, but it should be reasonable.
const MIN_LENGTH_FOR_COMPRESSING = 256;
if (
string.length >= MIN_LENGTH_FOR_COMPRESSING ||
(Array.isArray(filter) && filter.includes(flateDecode)) ||
(filter instanceof Name && filter.name === flateDecode.name)
) {
try {
const byteArray = stringToBytes(string);
// eslint-disable-next-line no-undef
const cs = new CompressionStream("deflate");
const writer = cs.writable.getWriter();
writer.write(byteArray);
writer.close();
// Response::text doesn't return the correct data.
const buf = await new Response(cs.readable).arrayBuffer();
string = bytesToString(new Uint8Array(buf));
if (Array.isArray(filter)) {
if (!filter.includes(flateDecode)) {
filter.push(flateDecode);
}
} else if (!filter) {
stream.dict.set("Filter", flateDecode);
} else if (
!(filter instanceof Name) ||
filter.name !== flateDecode.name
) {
stream.dict.set("Filter", [filter, flateDecode]);
}
} catch (ex) {
info(`writeStream - cannot compress data: "${ex}".`);
}
}
stream.dict.set("Length", string.length);
writeDict(stream.dict, buffer, transform);
await writeDict(stream.dict, buffer, transform);
buffer.push(" stream\n", string, "\nendstream");
}
function writeArray(array, buffer, transform) {
async function writeArray(array, buffer, transform) {
buffer.push("[");
let first = true;
for (const val of array) {
@ -63,18 +114,18 @@ function writeArray(array, buffer, transform) {
} else {
first = false;
}
writeValue(val, buffer, transform);
await writeValue(val, buffer, transform);
}
buffer.push("]");
}
function writeValue(value, buffer, transform) {
async function writeValue(value, buffer, transform) {
if (value instanceof Name) {
buffer.push(`/${escapePDFName(value.name)}`);
} else if (value instanceof Ref) {
buffer.push(`${value.num} ${value.gen} R`);
} else if (Array.isArray(value)) {
writeArray(value, buffer, transform);
await writeArray(value, buffer, transform);
} else if (typeof value === "string") {
if (transform !== null) {
value = transform.encryptString(value);
@ -85,9 +136,9 @@ function writeValue(value, buffer, transform) {
} else if (typeof value === "boolean") {
buffer.push(value.toString());
} else if (value instanceof Dict) {
writeDict(value, buffer, transform);
await writeDict(value, buffer, transform);
} else if (value instanceof BaseStream) {
writeStream(value, buffer, transform);
await writeStream(value, buffer, transform);
} else if (value === null) {
buffer.push("null");
} else {
@ -160,7 +211,7 @@ function writeXFADataForAcroform(str, newRefs) {
return buffer.join("");
}
function updateAcroform({
async function updateAcroform({
xref,
acroForm,
acroFormRef,
@ -206,7 +257,7 @@ function updateAcroform({
}
const buffer = [];
writeObject(acroFormRef, dict, buffer, transform);
await writeObject(acroFormRef, dict, buffer, transform);
newRefs.push({ ref: acroFormRef, data: buffer.join("") });
}
@ -234,7 +285,7 @@ function updateXFA({ xfaData, xfaDatasetsRef, newRefs, xref }) {
newRefs.push({ ref: xfaDatasetsRef, data });
}
function incrementalUpdate({
async function incrementalUpdate({
originalData,
xrefInfo,
newRefs,
@ -247,7 +298,7 @@ function incrementalUpdate({
acroForm = null,
xfaData = null,
}) {
updateAcroform({
await updateAcroform({
xref,
acroForm,
acroFormRef,
@ -328,7 +379,7 @@ function incrementalUpdate({
newXref.set("Length", tableLength);
buffer.push(`${refForXrefTable.num} ${refForXrefTable.gen} obj\n`);
writeDict(newXref, buffer, null);
await writeDict(newXref, buffer, null);
buffer.push(" stream\n");
const bufferLen = buffer.reduce((a, str) => a + str.length, 0);

View File

@ -590,3 +590,4 @@
!copy_paste_ligatures.pdf
!issue16316.pdf
!issue14565.pdf
!multiline.pdf

BIN
test/pdfs/multiline.pdf Executable file

Binary file not shown.

View File

@ -7580,5 +7580,19 @@
"rounds": 1,
"annotations": true,
"type": "eq"
},
{
"id": "multiline_compress",
"file": "pdfs/multiline.pdf",
"md5": "4727c7d1e4e5c7d45fded8ab7a2e05e5",
"rounds": 1,
"type": "eq",
"save": true,
"print": true,
"annotationStorage": {
"24R": {
"value": "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"
}
}
}
]

View File

@ -2115,6 +2115,58 @@ describe("annotation", function () {
);
});
it("should compress and save text", async function () {
const textWidgetRef = Ref.get(123, 0);
const xref = new XRefMock([
{ ref: textWidgetRef, data: textWidgetDict },
helvRefObj,
]);
partialEvaluator.xref = xref;
const task = new WorkerTask("test save");
const annotation = await AnnotationFactory.create(
xref,
textWidgetRef,
pdfManagerMock,
idFactoryMock
);
const annotationStorage = new Map();
const value = "a".repeat(256);
annotationStorage.set(annotation.data.id, { value });
const data = await annotation.save(
partialEvaluator,
task,
annotationStorage
);
expect(data.length).toEqual(2);
const [oldData, newData] = data;
expect(oldData.ref).toEqual(Ref.get(123, 0));
expect(newData.ref).toEqual(Ref.get(2, 0));
oldData.data = oldData.data.replace(/\(D:\d+\)/, "(date)");
expect(oldData.data).toEqual(
"123 0 obj\n" +
"<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Helv 5 Tf) /DR " +
"<< /Font << /Helv 314 0 R>>>> /Rect [0 0 32 10] " +
`/V (${value}) /AP << /N 2 0 R>> /M (date)>>\nendobj\n`
);
const compressedData = [
120, 156, 211, 15, 169, 80, 112, 242, 117, 86, 40, 84, 112, 10, 81, 208,
247, 72, 205, 41, 83, 48, 85, 8, 73, 83, 48, 84, 48, 0, 66, 8, 25, 146,
171, 96, 164, 96, 172, 103, 96, 174, 16, 146, 162, 160, 145, 56, 194,
129, 166, 66, 72, 150, 130, 107, 136, 66, 160, 130, 171, 175, 51, 0,
222, 235, 111, 133,
];
const compressedStream = String.fromCharCode(...compressedData);
expect(newData.data).toEqual(
"2 0 obj\n<< /Subtype /Form /Resources " +
"<< /Font << /Helv 314 0 R>>>> /BBox [0 0 32 10] /Filter /FlateDecode /Length 68>> stream\n" +
`${compressedStream}\nendstream\nendobj\n`
);
});
it("should get field object for usage in JS sandbox", async function () {
const textWidgetRef = Ref.get(123, 0);
const xDictRef = Ref.get(141, 0);

View File

@ -20,7 +20,7 @@ import { StringStream } from "../../src/core/stream.js";
describe("Writer", function () {
describe("Incremental update", function () {
it("should update a file with new objects", function () {
it("should update a file with new objects", async function () {
const originalData = new Uint8Array();
const newRefs = [
{ ref: Ref.get(123, 0x2d), data: "abc\n" },
@ -37,7 +37,7 @@ describe("Writer", function () {
info: {},
};
let data = incrementalUpdate({ originalData, xrefInfo, newRefs });
let data = await incrementalUpdate({ originalData, xrefInfo, newRefs });
data = bytesToString(data);
const expected =
@ -60,7 +60,7 @@ describe("Writer", function () {
expect(data).toEqual(expected);
});
it("should update a file, missing the /ID-entry, with new objects", function () {
it("should update a file, missing the /ID-entry, with new objects", async function () {
const originalData = new Uint8Array();
const newRefs = [{ ref: Ref.get(123, 0x2d), data: "abc\n" }];
const xrefInfo = {
@ -74,7 +74,7 @@ describe("Writer", function () {
info: {},
};
let data = incrementalUpdate({ originalData, xrefInfo, newRefs });
let data = await incrementalUpdate({ originalData, xrefInfo, newRefs });
data = bytesToString(data);
const expected =
@ -96,7 +96,7 @@ describe("Writer", function () {
});
describe("writeDict", function () {
it("should write a Dict", function () {
it("should write a Dict", async function () {
const dict = new Dict(null);
dict.set("A", Name.get("B"));
dict.set("B", Ref.get(123, 456));
@ -121,7 +121,7 @@ describe("Writer", function () {
dict.set("NullVal", null);
const buffer = [];
writeDict(dict, buffer, null);
await writeDict(dict, buffer, null);
const expected =
"<< /A /B /B 123 456 R /C 789 /D (hello world) " +
@ -134,14 +134,14 @@ describe("Writer", function () {
expect(buffer.join("")).toEqual(expected);
});
it("should write a Dict in escaping PDF names", function () {
it("should write a Dict in escaping PDF names", async function () {
const dict = new Dict(null);
dict.set("\xfeA#", Name.get("hello"));
dict.set("B", Name.get("#hello"));
dict.set("C", Name.get("he\xfello\xff"));
const buffer = [];
writeDict(dict, buffer, null);
await writeDict(dict, buffer, null);
const expected = "<< /#feA#23 /hello /B /#23hello /C /he#fello#ff>>";
@ -150,7 +150,7 @@ describe("Writer", function () {
});
describe("XFA", function () {
it("should update AcroForm when no datasets in XFA array", function () {
it("should update AcroForm when no datasets in XFA array", async function () {
const originalData = new Uint8Array();
const newRefs = [];
@ -176,7 +176,7 @@ describe("Writer", function () {
info: {},
};
let data = incrementalUpdate({
let data = await incrementalUpdate({
originalData,
xrefInfo,
newRefs,