Compress the data when saving annotions
CompressionStream API has been added in Firefox 113 (see https://bugzilla.mozilla.org/show_bug.cgi?id=1823619) hence we can use it to compress the streams with added/modified annotations.
This commit is contained in:
parent
8f2d8f62f3
commit
2486536843
@ -273,7 +273,7 @@ class AnnotationFactory {
|
||||
baseFont.set("Encoding", Name.get("WinAnsiEncoding"));
|
||||
const buffer = [];
|
||||
baseFontRef = xref.getNewTemporaryRef();
|
||||
writeObject(baseFontRef, baseFont, buffer, null);
|
||||
await writeObject(baseFontRef, baseFont, buffer, null);
|
||||
dependencies.push({ ref: baseFontRef, data: buffer.join("") });
|
||||
}
|
||||
promises.push(
|
||||
@ -1479,7 +1479,7 @@ class MarkupAnnotation extends Annotation {
|
||||
const transform = xref.encrypt
|
||||
? xref.encrypt.createCipherTransform(apRef.num, apRef.gen)
|
||||
: null;
|
||||
writeObject(apRef, ap, buffer, transform);
|
||||
await writeObject(apRef, ap, buffer, transform);
|
||||
dependencies.push({ ref: apRef, data: buffer.join("") });
|
||||
} else {
|
||||
annotationDict = this.createNewDict(annotation, xref, {});
|
||||
@ -1489,7 +1489,7 @@ class MarkupAnnotation extends Annotation {
|
||||
const transform = xref.encrypt
|
||||
? xref.encrypt.createCipherTransform(annotationRef.num, annotationRef.gen)
|
||||
: null;
|
||||
writeObject(annotationRef, annotationDict, buffer, transform);
|
||||
await writeObject(annotationRef, annotationDict, buffer, transform);
|
||||
|
||||
return { ref: annotationRef, data: buffer.join("") };
|
||||
}
|
||||
@ -1922,7 +1922,7 @@ class WidgetAnnotation extends Annotation {
|
||||
appearanceDict.set("Matrix", rotationMatrix);
|
||||
}
|
||||
|
||||
writeObject(newRef, appearanceStream, buffer, newTransform);
|
||||
await writeObject(newRef, appearanceStream, buffer, newTransform);
|
||||
|
||||
changes.push(
|
||||
// data for the new AP
|
||||
@ -1937,7 +1937,7 @@ class WidgetAnnotation extends Annotation {
|
||||
}
|
||||
|
||||
dict.set("M", `D:${getModificationDate()}`);
|
||||
writeObject(this.ref, dict, buffer, originalTransform);
|
||||
await writeObject(this.ref, dict, buffer, originalTransform);
|
||||
|
||||
changes[0].data = buffer.join("");
|
||||
|
||||
@ -2814,7 +2814,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
|
||||
}
|
||||
|
||||
const buffer = [`${this.ref.num} ${this.ref.gen} obj\n`];
|
||||
writeDict(dict, buffer, originalTransform);
|
||||
await writeDict(dict, buffer, originalTransform);
|
||||
buffer.push("\nendobj\n");
|
||||
|
||||
return [{ ref: this.ref, data: buffer.join(""), xfa }];
|
||||
@ -2873,7 +2873,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
|
||||
}
|
||||
parent.set("V", name);
|
||||
parentBuffer = [`${this.parent.num} ${this.parent.gen} obj\n`];
|
||||
writeDict(parent, parentBuffer, parentTransform);
|
||||
await writeDict(parent, parentBuffer, parentTransform);
|
||||
parentBuffer.push("\nendobj\n");
|
||||
} else if (this.parent instanceof Dict) {
|
||||
this.parent.set("V", name);
|
||||
@ -2897,7 +2897,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation {
|
||||
}
|
||||
|
||||
const buffer = [`${this.ref.num} ${this.ref.gen} obj\n`];
|
||||
writeDict(dict, buffer, originalTransform);
|
||||
await writeDict(dict, buffer, originalTransform);
|
||||
buffer.push("\nendobj\n");
|
||||
|
||||
const newRefs = [{ ref: this.ref, data: buffer.join(""), xfa }];
|
||||
|
@ -297,7 +297,7 @@ class Page {
|
||||
);
|
||||
}
|
||||
|
||||
writeObject(this.ref, pageDict, buffer, transform);
|
||||
await writeObject(this.ref, pageDict, buffer, transform);
|
||||
if (savedDict) {
|
||||
pageDict.set("Annots", savedDict);
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { bytesToString, warn } from "../shared/util.js";
|
||||
import { bytesToString, info, stringToBytes, warn } from "../shared/util.js";
|
||||
import { Dict, Name, Ref } from "./primitives.js";
|
||||
import {
|
||||
escapePDFName,
|
||||
@ -25,36 +25,87 @@ import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
import { calculateMD5 } from "./crypto.js";
|
||||
|
||||
function writeObject(ref, obj, buffer, transform) {
|
||||
async function writeObject(ref, obj, buffer, transform) {
|
||||
buffer.push(`${ref.num} ${ref.gen} obj\n`);
|
||||
if (obj instanceof Dict) {
|
||||
writeDict(obj, buffer, transform);
|
||||
await writeDict(obj, buffer, transform);
|
||||
} else if (obj instanceof BaseStream) {
|
||||
writeStream(obj, buffer, transform);
|
||||
await writeStream(obj, buffer, transform);
|
||||
}
|
||||
buffer.push("\nendobj\n");
|
||||
}
|
||||
|
||||
function writeDict(dict, buffer, transform) {
|
||||
async function writeDict(dict, buffer, transform) {
|
||||
buffer.push("<<");
|
||||
for (const key of dict.getKeys()) {
|
||||
buffer.push(` /${escapePDFName(key)} `);
|
||||
writeValue(dict.getRaw(key), buffer, transform);
|
||||
await writeValue(dict.getRaw(key), buffer, transform);
|
||||
}
|
||||
buffer.push(">>");
|
||||
}
|
||||
|
||||
function writeStream(stream, buffer, transform) {
|
||||
async function writeStream(stream, buffer, transform) {
|
||||
let string = stream.getString();
|
||||
if (transform !== null) {
|
||||
string = transform.encryptString(string);
|
||||
}
|
||||
|
||||
// eslint-disable-next-line no-undef
|
||||
if (typeof CompressionStream === "undefined") {
|
||||
stream.dict.set("Length", string.length);
|
||||
await writeDict(stream.dict, buffer, transform);
|
||||
buffer.push(" stream\n", string, "\nendstream");
|
||||
return;
|
||||
}
|
||||
|
||||
const filter = await stream.dict.getAsync("Filter");
|
||||
const flateDecode = Name.get("FlateDecode");
|
||||
|
||||
// If the string is too small there is no real benefit
|
||||
// in compressing it.
|
||||
// The number 256 is arbitrary, but it should be reasonable.
|
||||
const MIN_LENGTH_FOR_COMPRESSING = 256;
|
||||
|
||||
if (
|
||||
string.length >= MIN_LENGTH_FOR_COMPRESSING ||
|
||||
(Array.isArray(filter) && filter.includes(flateDecode)) ||
|
||||
(filter instanceof Name && filter.name === flateDecode.name)
|
||||
) {
|
||||
try {
|
||||
const byteArray = stringToBytes(string);
|
||||
// eslint-disable-next-line no-undef
|
||||
const cs = new CompressionStream("deflate");
|
||||
const writer = cs.writable.getWriter();
|
||||
writer.write(byteArray);
|
||||
writer.close();
|
||||
|
||||
// Response::text doesn't return the correct data.
|
||||
const buf = await new Response(cs.readable).arrayBuffer();
|
||||
string = bytesToString(new Uint8Array(buf));
|
||||
|
||||
if (Array.isArray(filter)) {
|
||||
if (!filter.includes(flateDecode)) {
|
||||
filter.push(flateDecode);
|
||||
}
|
||||
} else if (!filter) {
|
||||
stream.dict.set("Filter", flateDecode);
|
||||
} else if (
|
||||
!(filter instanceof Name) ||
|
||||
filter.name !== flateDecode.name
|
||||
) {
|
||||
stream.dict.set("Filter", [filter, flateDecode]);
|
||||
}
|
||||
} catch (ex) {
|
||||
info(`writeStream - cannot compress data: "${ex}".`);
|
||||
}
|
||||
}
|
||||
|
||||
stream.dict.set("Length", string.length);
|
||||
writeDict(stream.dict, buffer, transform);
|
||||
await writeDict(stream.dict, buffer, transform);
|
||||
buffer.push(" stream\n", string, "\nendstream");
|
||||
}
|
||||
|
||||
function writeArray(array, buffer, transform) {
|
||||
async function writeArray(array, buffer, transform) {
|
||||
buffer.push("[");
|
||||
let first = true;
|
||||
for (const val of array) {
|
||||
@ -63,18 +114,18 @@ function writeArray(array, buffer, transform) {
|
||||
} else {
|
||||
first = false;
|
||||
}
|
||||
writeValue(val, buffer, transform);
|
||||
await writeValue(val, buffer, transform);
|
||||
}
|
||||
buffer.push("]");
|
||||
}
|
||||
|
||||
function writeValue(value, buffer, transform) {
|
||||
async function writeValue(value, buffer, transform) {
|
||||
if (value instanceof Name) {
|
||||
buffer.push(`/${escapePDFName(value.name)}`);
|
||||
} else if (value instanceof Ref) {
|
||||
buffer.push(`${value.num} ${value.gen} R`);
|
||||
} else if (Array.isArray(value)) {
|
||||
writeArray(value, buffer, transform);
|
||||
await writeArray(value, buffer, transform);
|
||||
} else if (typeof value === "string") {
|
||||
if (transform !== null) {
|
||||
value = transform.encryptString(value);
|
||||
@ -85,9 +136,9 @@ function writeValue(value, buffer, transform) {
|
||||
} else if (typeof value === "boolean") {
|
||||
buffer.push(value.toString());
|
||||
} else if (value instanceof Dict) {
|
||||
writeDict(value, buffer, transform);
|
||||
await writeDict(value, buffer, transform);
|
||||
} else if (value instanceof BaseStream) {
|
||||
writeStream(value, buffer, transform);
|
||||
await writeStream(value, buffer, transform);
|
||||
} else if (value === null) {
|
||||
buffer.push("null");
|
||||
} else {
|
||||
@ -160,7 +211,7 @@ function writeXFADataForAcroform(str, newRefs) {
|
||||
return buffer.join("");
|
||||
}
|
||||
|
||||
function updateAcroform({
|
||||
async function updateAcroform({
|
||||
xref,
|
||||
acroForm,
|
||||
acroFormRef,
|
||||
@ -206,7 +257,7 @@ function updateAcroform({
|
||||
}
|
||||
|
||||
const buffer = [];
|
||||
writeObject(acroFormRef, dict, buffer, transform);
|
||||
await writeObject(acroFormRef, dict, buffer, transform);
|
||||
|
||||
newRefs.push({ ref: acroFormRef, data: buffer.join("") });
|
||||
}
|
||||
@ -234,7 +285,7 @@ function updateXFA({ xfaData, xfaDatasetsRef, newRefs, xref }) {
|
||||
newRefs.push({ ref: xfaDatasetsRef, data });
|
||||
}
|
||||
|
||||
function incrementalUpdate({
|
||||
async function incrementalUpdate({
|
||||
originalData,
|
||||
xrefInfo,
|
||||
newRefs,
|
||||
@ -247,7 +298,7 @@ function incrementalUpdate({
|
||||
acroForm = null,
|
||||
xfaData = null,
|
||||
}) {
|
||||
updateAcroform({
|
||||
await updateAcroform({
|
||||
xref,
|
||||
acroForm,
|
||||
acroFormRef,
|
||||
@ -328,7 +379,7 @@ function incrementalUpdate({
|
||||
newXref.set("Length", tableLength);
|
||||
|
||||
buffer.push(`${refForXrefTable.num} ${refForXrefTable.gen} obj\n`);
|
||||
writeDict(newXref, buffer, null);
|
||||
await writeDict(newXref, buffer, null);
|
||||
buffer.push(" stream\n");
|
||||
|
||||
const bufferLen = buffer.reduce((a, str) => a + str.length, 0);
|
||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -590,3 +590,4 @@
|
||||
!copy_paste_ligatures.pdf
|
||||
!issue16316.pdf
|
||||
!issue14565.pdf
|
||||
!multiline.pdf
|
||||
|
BIN
test/pdfs/multiline.pdf
Executable file
BIN
test/pdfs/multiline.pdf
Executable file
Binary file not shown.
@ -7580,5 +7580,19 @@
|
||||
"rounds": 1,
|
||||
"annotations": true,
|
||||
"type": "eq"
|
||||
},
|
||||
{
|
||||
"id": "multiline_compress",
|
||||
"file": "pdfs/multiline.pdf",
|
||||
"md5": "4727c7d1e4e5c7d45fded8ab7a2e05e5",
|
||||
"rounds": 1,
|
||||
"type": "eq",
|
||||
"save": true,
|
||||
"print": true,
|
||||
"annotationStorage": {
|
||||
"24R": {
|
||||
"value": "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -2115,6 +2115,58 @@ describe("annotation", function () {
|
||||
);
|
||||
});
|
||||
|
||||
it("should compress and save text", async function () {
|
||||
const textWidgetRef = Ref.get(123, 0);
|
||||
const xref = new XRefMock([
|
||||
{ ref: textWidgetRef, data: textWidgetDict },
|
||||
helvRefObj,
|
||||
]);
|
||||
partialEvaluator.xref = xref;
|
||||
const task = new WorkerTask("test save");
|
||||
|
||||
const annotation = await AnnotationFactory.create(
|
||||
xref,
|
||||
textWidgetRef,
|
||||
pdfManagerMock,
|
||||
idFactoryMock
|
||||
);
|
||||
const annotationStorage = new Map();
|
||||
const value = "a".repeat(256);
|
||||
annotationStorage.set(annotation.data.id, { value });
|
||||
|
||||
const data = await annotation.save(
|
||||
partialEvaluator,
|
||||
task,
|
||||
annotationStorage
|
||||
);
|
||||
expect(data.length).toEqual(2);
|
||||
const [oldData, newData] = data;
|
||||
expect(oldData.ref).toEqual(Ref.get(123, 0));
|
||||
expect(newData.ref).toEqual(Ref.get(2, 0));
|
||||
|
||||
oldData.data = oldData.data.replace(/\(D:\d+\)/, "(date)");
|
||||
expect(oldData.data).toEqual(
|
||||
"123 0 obj\n" +
|
||||
"<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Helv 5 Tf) /DR " +
|
||||
"<< /Font << /Helv 314 0 R>>>> /Rect [0 0 32 10] " +
|
||||
`/V (${value}) /AP << /N 2 0 R>> /M (date)>>\nendobj\n`
|
||||
);
|
||||
|
||||
const compressedData = [
|
||||
120, 156, 211, 15, 169, 80, 112, 242, 117, 86, 40, 84, 112, 10, 81, 208,
|
||||
247, 72, 205, 41, 83, 48, 85, 8, 73, 83, 48, 84, 48, 0, 66, 8, 25, 146,
|
||||
171, 96, 164, 96, 172, 103, 96, 174, 16, 146, 162, 160, 145, 56, 194,
|
||||
129, 166, 66, 72, 150, 130, 107, 136, 66, 160, 130, 171, 175, 51, 0,
|
||||
222, 235, 111, 133,
|
||||
];
|
||||
const compressedStream = String.fromCharCode(...compressedData);
|
||||
expect(newData.data).toEqual(
|
||||
"2 0 obj\n<< /Subtype /Form /Resources " +
|
||||
"<< /Font << /Helv 314 0 R>>>> /BBox [0 0 32 10] /Filter /FlateDecode /Length 68>> stream\n" +
|
||||
`${compressedStream}\nendstream\nendobj\n`
|
||||
);
|
||||
});
|
||||
|
||||
it("should get field object for usage in JS sandbox", async function () {
|
||||
const textWidgetRef = Ref.get(123, 0);
|
||||
const xDictRef = Ref.get(141, 0);
|
||||
|
@ -20,7 +20,7 @@ import { StringStream } from "../../src/core/stream.js";
|
||||
|
||||
describe("Writer", function () {
|
||||
describe("Incremental update", function () {
|
||||
it("should update a file with new objects", function () {
|
||||
it("should update a file with new objects", async function () {
|
||||
const originalData = new Uint8Array();
|
||||
const newRefs = [
|
||||
{ ref: Ref.get(123, 0x2d), data: "abc\n" },
|
||||
@ -37,7 +37,7 @@ describe("Writer", function () {
|
||||
info: {},
|
||||
};
|
||||
|
||||
let data = incrementalUpdate({ originalData, xrefInfo, newRefs });
|
||||
let data = await incrementalUpdate({ originalData, xrefInfo, newRefs });
|
||||
data = bytesToString(data);
|
||||
|
||||
const expected =
|
||||
@ -60,7 +60,7 @@ describe("Writer", function () {
|
||||
expect(data).toEqual(expected);
|
||||
});
|
||||
|
||||
it("should update a file, missing the /ID-entry, with new objects", function () {
|
||||
it("should update a file, missing the /ID-entry, with new objects", async function () {
|
||||
const originalData = new Uint8Array();
|
||||
const newRefs = [{ ref: Ref.get(123, 0x2d), data: "abc\n" }];
|
||||
const xrefInfo = {
|
||||
@ -74,7 +74,7 @@ describe("Writer", function () {
|
||||
info: {},
|
||||
};
|
||||
|
||||
let data = incrementalUpdate({ originalData, xrefInfo, newRefs });
|
||||
let data = await incrementalUpdate({ originalData, xrefInfo, newRefs });
|
||||
data = bytesToString(data);
|
||||
|
||||
const expected =
|
||||
@ -96,7 +96,7 @@ describe("Writer", function () {
|
||||
});
|
||||
|
||||
describe("writeDict", function () {
|
||||
it("should write a Dict", function () {
|
||||
it("should write a Dict", async function () {
|
||||
const dict = new Dict(null);
|
||||
dict.set("A", Name.get("B"));
|
||||
dict.set("B", Ref.get(123, 456));
|
||||
@ -121,7 +121,7 @@ describe("Writer", function () {
|
||||
dict.set("NullVal", null);
|
||||
|
||||
const buffer = [];
|
||||
writeDict(dict, buffer, null);
|
||||
await writeDict(dict, buffer, null);
|
||||
|
||||
const expected =
|
||||
"<< /A /B /B 123 456 R /C 789 /D (hello world) " +
|
||||
@ -134,14 +134,14 @@ describe("Writer", function () {
|
||||
expect(buffer.join("")).toEqual(expected);
|
||||
});
|
||||
|
||||
it("should write a Dict in escaping PDF names", function () {
|
||||
it("should write a Dict in escaping PDF names", async function () {
|
||||
const dict = new Dict(null);
|
||||
dict.set("\xfeA#", Name.get("hello"));
|
||||
dict.set("B", Name.get("#hello"));
|
||||
dict.set("C", Name.get("he\xfello\xff"));
|
||||
|
||||
const buffer = [];
|
||||
writeDict(dict, buffer, null);
|
||||
await writeDict(dict, buffer, null);
|
||||
|
||||
const expected = "<< /#feA#23 /hello /B /#23hello /C /he#fello#ff>>";
|
||||
|
||||
@ -150,7 +150,7 @@ describe("Writer", function () {
|
||||
});
|
||||
|
||||
describe("XFA", function () {
|
||||
it("should update AcroForm when no datasets in XFA array", function () {
|
||||
it("should update AcroForm when no datasets in XFA array", async function () {
|
||||
const originalData = new Uint8Array();
|
||||
const newRefs = [];
|
||||
|
||||
@ -176,7 +176,7 @@ describe("Writer", function () {
|
||||
info: {},
|
||||
};
|
||||
|
||||
let data = incrementalUpdate({
|
||||
let data = await incrementalUpdate({
|
||||
originalData,
|
||||
xrefInfo,
|
||||
newRefs,
|
||||
|
Loading…
Reference in New Issue
Block a user