diff --git a/src/core/annotation.js b/src/core/annotation.js index 9521ec53c..e642b3d83 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -273,7 +273,7 @@ class AnnotationFactory { baseFont.set("Encoding", Name.get("WinAnsiEncoding")); const buffer = []; baseFontRef = xref.getNewTemporaryRef(); - writeObject(baseFontRef, baseFont, buffer, null); + await writeObject(baseFontRef, baseFont, buffer, null); dependencies.push({ ref: baseFontRef, data: buffer.join("") }); } promises.push( @@ -1479,7 +1479,7 @@ class MarkupAnnotation extends Annotation { const transform = xref.encrypt ? xref.encrypt.createCipherTransform(apRef.num, apRef.gen) : null; - writeObject(apRef, ap, buffer, transform); + await writeObject(apRef, ap, buffer, transform); dependencies.push({ ref: apRef, data: buffer.join("") }); } else { annotationDict = this.createNewDict(annotation, xref, {}); @@ -1489,7 +1489,7 @@ class MarkupAnnotation extends Annotation { const transform = xref.encrypt ? xref.encrypt.createCipherTransform(annotationRef.num, annotationRef.gen) : null; - writeObject(annotationRef, annotationDict, buffer, transform); + await writeObject(annotationRef, annotationDict, buffer, transform); return { ref: annotationRef, data: buffer.join("") }; } @@ -1922,7 +1922,7 @@ class WidgetAnnotation extends Annotation { appearanceDict.set("Matrix", rotationMatrix); } - writeObject(newRef, appearanceStream, buffer, newTransform); + await writeObject(newRef, appearanceStream, buffer, newTransform); changes.push( // data for the new AP @@ -1937,7 +1937,7 @@ class WidgetAnnotation extends Annotation { } dict.set("M", `D:${getModificationDate()}`); - writeObject(this.ref, dict, buffer, originalTransform); + await writeObject(this.ref, dict, buffer, originalTransform); changes[0].data = buffer.join(""); @@ -2814,7 +2814,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation { } const buffer = [`${this.ref.num} ${this.ref.gen} obj\n`]; - writeDict(dict, buffer, originalTransform); + await writeDict(dict, buffer, originalTransform); buffer.push("\nendobj\n"); return [{ ref: this.ref, data: buffer.join(""), xfa }]; @@ -2873,7 +2873,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation { } parent.set("V", name); parentBuffer = [`${this.parent.num} ${this.parent.gen} obj\n`]; - writeDict(parent, parentBuffer, parentTransform); + await writeDict(parent, parentBuffer, parentTransform); parentBuffer.push("\nendobj\n"); } else if (this.parent instanceof Dict) { this.parent.set("V", name); @@ -2897,7 +2897,7 @@ class ButtonWidgetAnnotation extends WidgetAnnotation { } const buffer = [`${this.ref.num} ${this.ref.gen} obj\n`]; - writeDict(dict, buffer, originalTransform); + await writeDict(dict, buffer, originalTransform); buffer.push("\nendobj\n"); const newRefs = [{ ref: this.ref, data: buffer.join(""), xfa }]; diff --git a/src/core/document.js b/src/core/document.js index f36e77d50..be6f0d728 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -297,7 +297,7 @@ class Page { ); } - writeObject(this.ref, pageDict, buffer, transform); + await writeObject(this.ref, pageDict, buffer, transform); if (savedDict) { pageDict.set("Annots", savedDict); } diff --git a/src/core/writer.js b/src/core/writer.js index 2f3e629c3..6c30acf64 100644 --- a/src/core/writer.js +++ b/src/core/writer.js @@ -13,7 +13,7 @@ * limitations under the License. */ -import { bytesToString, warn } from "../shared/util.js"; +import { bytesToString, info, stringToBytes, warn } from "../shared/util.js"; import { Dict, Name, Ref } from "./primitives.js"; import { escapePDFName, @@ -25,36 +25,87 @@ import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js"; import { BaseStream } from "./base_stream.js"; import { calculateMD5 } from "./crypto.js"; -function writeObject(ref, obj, buffer, transform) { +async function writeObject(ref, obj, buffer, transform) { buffer.push(`${ref.num} ${ref.gen} obj\n`); if (obj instanceof Dict) { - writeDict(obj, buffer, transform); + await writeDict(obj, buffer, transform); } else if (obj instanceof BaseStream) { - writeStream(obj, buffer, transform); + await writeStream(obj, buffer, transform); } buffer.push("\nendobj\n"); } -function writeDict(dict, buffer, transform) { +async function writeDict(dict, buffer, transform) { buffer.push("<<"); for (const key of dict.getKeys()) { buffer.push(` /${escapePDFName(key)} `); - writeValue(dict.getRaw(key), buffer, transform); + await writeValue(dict.getRaw(key), buffer, transform); } buffer.push(">>"); } -function writeStream(stream, buffer, transform) { +async function writeStream(stream, buffer, transform) { let string = stream.getString(); if (transform !== null) { string = transform.encryptString(string); } + + // eslint-disable-next-line no-undef + if (typeof CompressionStream === "undefined") { + stream.dict.set("Length", string.length); + await writeDict(stream.dict, buffer, transform); + buffer.push(" stream\n", string, "\nendstream"); + return; + } + + const filter = await stream.dict.getAsync("Filter"); + const flateDecode = Name.get("FlateDecode"); + + // If the string is too small there is no real benefit + // in compressing it. + // The number 256 is arbitrary, but it should be reasonable. + const MIN_LENGTH_FOR_COMPRESSING = 256; + + if ( + string.length >= MIN_LENGTH_FOR_COMPRESSING || + (Array.isArray(filter) && filter.includes(flateDecode)) || + (filter instanceof Name && filter.name === flateDecode.name) + ) { + try { + const byteArray = stringToBytes(string); + // eslint-disable-next-line no-undef + const cs = new CompressionStream("deflate"); + const writer = cs.writable.getWriter(); + writer.write(byteArray); + writer.close(); + + // Response::text doesn't return the correct data. + const buf = await new Response(cs.readable).arrayBuffer(); + string = bytesToString(new Uint8Array(buf)); + + if (Array.isArray(filter)) { + if (!filter.includes(flateDecode)) { + filter.push(flateDecode); + } + } else if (!filter) { + stream.dict.set("Filter", flateDecode); + } else if ( + !(filter instanceof Name) || + filter.name !== flateDecode.name + ) { + stream.dict.set("Filter", [filter, flateDecode]); + } + } catch (ex) { + info(`writeStream - cannot compress data: "${ex}".`); + } + } + stream.dict.set("Length", string.length); - writeDict(stream.dict, buffer, transform); + await writeDict(stream.dict, buffer, transform); buffer.push(" stream\n", string, "\nendstream"); } -function writeArray(array, buffer, transform) { +async function writeArray(array, buffer, transform) { buffer.push("["); let first = true; for (const val of array) { @@ -63,18 +114,18 @@ function writeArray(array, buffer, transform) { } else { first = false; } - writeValue(val, buffer, transform); + await writeValue(val, buffer, transform); } buffer.push("]"); } -function writeValue(value, buffer, transform) { +async function writeValue(value, buffer, transform) { if (value instanceof Name) { buffer.push(`/${escapePDFName(value.name)}`); } else if (value instanceof Ref) { buffer.push(`${value.num} ${value.gen} R`); } else if (Array.isArray(value)) { - writeArray(value, buffer, transform); + await writeArray(value, buffer, transform); } else if (typeof value === "string") { if (transform !== null) { value = transform.encryptString(value); @@ -85,9 +136,9 @@ function writeValue(value, buffer, transform) { } else if (typeof value === "boolean") { buffer.push(value.toString()); } else if (value instanceof Dict) { - writeDict(value, buffer, transform); + await writeDict(value, buffer, transform); } else if (value instanceof BaseStream) { - writeStream(value, buffer, transform); + await writeStream(value, buffer, transform); } else if (value === null) { buffer.push("null"); } else { @@ -160,7 +211,7 @@ function writeXFADataForAcroform(str, newRefs) { return buffer.join(""); } -function updateAcroform({ +async function updateAcroform({ xref, acroForm, acroFormRef, @@ -206,7 +257,7 @@ function updateAcroform({ } const buffer = []; - writeObject(acroFormRef, dict, buffer, transform); + await writeObject(acroFormRef, dict, buffer, transform); newRefs.push({ ref: acroFormRef, data: buffer.join("") }); } @@ -234,7 +285,7 @@ function updateXFA({ xfaData, xfaDatasetsRef, newRefs, xref }) { newRefs.push({ ref: xfaDatasetsRef, data }); } -function incrementalUpdate({ +async function incrementalUpdate({ originalData, xrefInfo, newRefs, @@ -247,7 +298,7 @@ function incrementalUpdate({ acroForm = null, xfaData = null, }) { - updateAcroform({ + await updateAcroform({ xref, acroForm, acroFormRef, @@ -328,7 +379,7 @@ function incrementalUpdate({ newXref.set("Length", tableLength); buffer.push(`${refForXrefTable.num} ${refForXrefTable.gen} obj\n`); - writeDict(newXref, buffer, null); + await writeDict(newXref, buffer, null); buffer.push(" stream\n"); const bufferLen = buffer.reduce((a, str) => a + str.length, 0); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index e8becca1e..98d799d1b 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -590,3 +590,4 @@ !copy_paste_ligatures.pdf !issue16316.pdf !issue14565.pdf +!multiline.pdf diff --git a/test/pdfs/multiline.pdf b/test/pdfs/multiline.pdf new file mode 100755 index 000000000..762b4a78e Binary files /dev/null and b/test/pdfs/multiline.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index e9cfceab0..8951d3d23 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -7580,5 +7580,19 @@ "rounds": 1, "annotations": true, "type": "eq" + }, + { + "id": "multiline_compress", + "file": "pdfs/multiline.pdf", + "md5": "4727c7d1e4e5c7d45fded8ab7a2e05e5", + "rounds": 1, + "type": "eq", + "save": true, + "print": true, + "annotationStorage": { + "24R": { + "value": "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\nabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" + } + } } ] diff --git a/test/unit/annotation_spec.js b/test/unit/annotation_spec.js index b90d18a83..615de6ddd 100644 --- a/test/unit/annotation_spec.js +++ b/test/unit/annotation_spec.js @@ -2115,6 +2115,58 @@ describe("annotation", function () { ); }); + it("should compress and save text", async function () { + const textWidgetRef = Ref.get(123, 0); + const xref = new XRefMock([ + { ref: textWidgetRef, data: textWidgetDict }, + helvRefObj, + ]); + partialEvaluator.xref = xref; + const task = new WorkerTask("test save"); + + const annotation = await AnnotationFactory.create( + xref, + textWidgetRef, + pdfManagerMock, + idFactoryMock + ); + const annotationStorage = new Map(); + const value = "a".repeat(256); + annotationStorage.set(annotation.data.id, { value }); + + const data = await annotation.save( + partialEvaluator, + task, + annotationStorage + ); + expect(data.length).toEqual(2); + const [oldData, newData] = data; + expect(oldData.ref).toEqual(Ref.get(123, 0)); + expect(newData.ref).toEqual(Ref.get(2, 0)); + + oldData.data = oldData.data.replace(/\(D:\d+\)/, "(date)"); + expect(oldData.data).toEqual( + "123 0 obj\n" + + "<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Helv 5 Tf) /DR " + + "<< /Font << /Helv 314 0 R>>>> /Rect [0 0 32 10] " + + `/V (${value}) /AP << /N 2 0 R>> /M (date)>>\nendobj\n` + ); + + const compressedData = [ + 120, 156, 211, 15, 169, 80, 112, 242, 117, 86, 40, 84, 112, 10, 81, 208, + 247, 72, 205, 41, 83, 48, 85, 8, 73, 83, 48, 84, 48, 0, 66, 8, 25, 146, + 171, 96, 164, 96, 172, 103, 96, 174, 16, 146, 162, 160, 145, 56, 194, + 129, 166, 66, 72, 150, 130, 107, 136, 66, 160, 130, 171, 175, 51, 0, + 222, 235, 111, 133, + ]; + const compressedStream = String.fromCharCode(...compressedData); + expect(newData.data).toEqual( + "2 0 obj\n<< /Subtype /Form /Resources " + + "<< /Font << /Helv 314 0 R>>>> /BBox [0 0 32 10] /Filter /FlateDecode /Length 68>> stream\n" + + `${compressedStream}\nendstream\nendobj\n` + ); + }); + it("should get field object for usage in JS sandbox", async function () { const textWidgetRef = Ref.get(123, 0); const xDictRef = Ref.get(141, 0); diff --git a/test/unit/writer_spec.js b/test/unit/writer_spec.js index c201c6dfc..6d3101277 100644 --- a/test/unit/writer_spec.js +++ b/test/unit/writer_spec.js @@ -20,7 +20,7 @@ import { StringStream } from "../../src/core/stream.js"; describe("Writer", function () { describe("Incremental update", function () { - it("should update a file with new objects", function () { + it("should update a file with new objects", async function () { const originalData = new Uint8Array(); const newRefs = [ { ref: Ref.get(123, 0x2d), data: "abc\n" }, @@ -37,7 +37,7 @@ describe("Writer", function () { info: {}, }; - let data = incrementalUpdate({ originalData, xrefInfo, newRefs }); + let data = await incrementalUpdate({ originalData, xrefInfo, newRefs }); data = bytesToString(data); const expected = @@ -60,7 +60,7 @@ describe("Writer", function () { expect(data).toEqual(expected); }); - it("should update a file, missing the /ID-entry, with new objects", function () { + it("should update a file, missing the /ID-entry, with new objects", async function () { const originalData = new Uint8Array(); const newRefs = [{ ref: Ref.get(123, 0x2d), data: "abc\n" }]; const xrefInfo = { @@ -74,7 +74,7 @@ describe("Writer", function () { info: {}, }; - let data = incrementalUpdate({ originalData, xrefInfo, newRefs }); + let data = await incrementalUpdate({ originalData, xrefInfo, newRefs }); data = bytesToString(data); const expected = @@ -96,7 +96,7 @@ describe("Writer", function () { }); describe("writeDict", function () { - it("should write a Dict", function () { + it("should write a Dict", async function () { const dict = new Dict(null); dict.set("A", Name.get("B")); dict.set("B", Ref.get(123, 456)); @@ -121,7 +121,7 @@ describe("Writer", function () { dict.set("NullVal", null); const buffer = []; - writeDict(dict, buffer, null); + await writeDict(dict, buffer, null); const expected = "<< /A /B /B 123 456 R /C 789 /D (hello world) " + @@ -134,14 +134,14 @@ describe("Writer", function () { expect(buffer.join("")).toEqual(expected); }); - it("should write a Dict in escaping PDF names", function () { + it("should write a Dict in escaping PDF names", async function () { const dict = new Dict(null); dict.set("\xfeA#", Name.get("hello")); dict.set("B", Name.get("#hello")); dict.set("C", Name.get("he\xfello\xff")); const buffer = []; - writeDict(dict, buffer, null); + await writeDict(dict, buffer, null); const expected = "<< /#feA#23 /hello /B /#23hello /C /he#fello#ff>>"; @@ -150,7 +150,7 @@ describe("Writer", function () { }); describe("XFA", function () { - it("should update AcroForm when no datasets in XFA array", function () { + it("should update AcroForm when no datasets in XFA array", async function () { const originalData = new Uint8Array(); const newRefs = []; @@ -176,7 +176,7 @@ describe("Writer", function () { info: {}, }; - let data = incrementalUpdate({ + let data = await incrementalUpdate({ originalData, xrefInfo, newRefs,