From 77b9657e578a2bc209b605498f6ada5d6770ac5c Mon Sep 17 00:00:00 2001 From: Calixte Denizet <calixte.denizet@gmail.com> Date: Fri, 3 Sep 2021 14:28:31 +0200 Subject: [PATCH] XFA - Overwrite AcroForm dictionary when saving if no datasets in XFA (bug 1720179) - aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1720179 - in some pdfs the XFA array in AcroForm dictionary doesn't contain an entry for 'datasets' (which contains saved data), so basically this patch allows to overwrite the AcroForm dictionary with an updated XFA array when doing an incremental update. --- src/core/catalog.js | 5 ++++ src/core/worker.js | 14 ++++++++- src/core/writer.js | 62 +++++++++++++++++++++++++++++++++++++-- test/unit/writer_spec.js | 63 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 4 deletions(-) diff --git a/src/core/catalog.js b/src/core/catalog.js index 37cc32b28..3ca66bace 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -130,6 +130,11 @@ class Catalog { return shadow(this, "acroForm", acroForm); } + get acroFormRef() { + const value = this._catDict.getRaw("AcroForm"); + return shadow(this, "acroFormRef", isRef(value) ? value : null); + } + get metadata() { const streamRef = this._catDict.getRaw("Metadata"); if (!isRef(streamRef)) { diff --git a/src/core/worker.js b/src/core/worker.js index 6a98cb5ef..023d5307c 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -573,6 +573,7 @@ class WorkerMessageHandler { const promises = [ pdfManager.onLoadedStream(), pdfManager.ensureCatalog("acroForm"), + pdfManager.ensureCatalog("acroFormRef"), pdfManager.ensureDoc("xref"), pdfManager.ensureDoc("startXRef"), ]; @@ -597,6 +598,7 @@ class WorkerMessageHandler { return Promise.all(promises).then(function ([ stream, acroForm, + acroFormRef, xref, startXRef, ...refs @@ -621,15 +623,22 @@ class WorkerMessageHandler { } } - const xfa = (acroForm instanceof Dict && acroForm.get("XFA")) || []; + const xfa = (acroForm instanceof Dict && acroForm.get("XFA")) || null; let xfaDatasets = null; + let hasDatasets = false; if (Array.isArray(xfa)) { for (let i = 0, ii = xfa.length; i < ii; i += 2) { if (xfa[i] === "datasets") { xfaDatasets = xfa[i + 1]; + acroFormRef = null; + hasDatasets = true; } } + if (xfaDatasets === null) { + xfaDatasets = xref.getNewRef(); + } } else { + acroFormRef = null; // TODO: Support XFA streams. warn("Unsupported XFA type."); } @@ -666,6 +675,9 @@ class WorkerMessageHandler { newRefs, xref, datasetsRef: xfaDatasets, + hasDatasets, + acroFormRef, + acroForm, xfaData, }); }); diff --git a/src/core/writer.js b/src/core/writer.js index 1f598b7a0..c6531979d 100644 --- a/src/core/writer.js +++ b/src/core/writer.js @@ -146,10 +146,54 @@ function writeXFADataForAcroform(str, newRefs) { return buffer.join(""); } -function updateXFA(xfaData, datasetsRef, newRefs, xref) { - if (datasetsRef === null || xref === null) { +function updateXFA({ + xfaData, + datasetsRef, + hasDatasets, + acroFormRef, + acroForm, + newRefs, + xref, + xrefInfo, +}) { + if (xref === null) { return; } + + if (!hasDatasets) { + if (!acroFormRef) { + warn("XFA - Cannot save it"); + return; + } + + // We've a XFA array which doesn't contain a datasets entry. + // So we'll update the AcroForm dictionary to have an XFA containing + // the datasets. + const oldXfa = acroForm.get("XFA"); + const newXfa = oldXfa.slice(); + newXfa.splice(2, 0, "datasets"); + newXfa.splice(3, 0, datasetsRef); + + acroForm.set("XFA", newXfa); + + const encrypt = xref.encrypt; + let transform = null; + if (encrypt) { + transform = encrypt.createCipherTransform( + acroFormRef.num, + acroFormRef.gen + ); + } + + const buffer = [`${acroFormRef.num} ${acroFormRef.gen} obj\n`]; + writeDict(acroForm, buffer, transform); + buffer.push("\n"); + + acroForm.set("XFA", oldXfa); + + newRefs.push({ ref: acroFormRef, data: buffer.join("") }); + } + if (xfaData === null) { const datasets = xref.fetchIfRef(datasetsRef); xfaData = writeXFADataForAcroform(datasets.getString(), newRefs); @@ -178,9 +222,21 @@ function incrementalUpdate({ newRefs, xref = null, datasetsRef = null, + hasDatasets = false, + acroFormRef = null, + acroForm = null, xfaData = null, }) { - updateXFA(xfaData, datasetsRef, newRefs, xref); + updateXFA({ + xfaData, + datasetsRef, + hasDatasets, + acroFormRef, + acroForm, + newRefs, + xref, + xrefInfo, + }); const newXref = new Dict(null); const refForXrefTable = xrefInfo.newRef; diff --git a/test/unit/writer_spec.js b/test/unit/writer_spec.js index 767349aed..81d978547 100644 --- a/test/unit/writer_spec.js +++ b/test/unit/writer_spec.js @@ -142,4 +142,67 @@ describe("Writer", function () { expect(buffer.join("")).toEqual(expected); }); }); + + describe("XFA", function () { + it("should update AcroForm when no datasets in XFA array", function () { + const originalData = new Uint8Array(); + const newRefs = []; + + const acroForm = new Dict(null); + acroForm.set("XFA", [ + "preamble", + Ref.get(123, 0), + "postamble", + Ref.get(456, 0), + ]); + const acroFormRef = Ref.get(789, 0); + const datasetsRef = Ref.get(101112, 0); + const xfaData = "<hello>world</hello>"; + + const xrefInfo = { + newRef: Ref.get(131415, 0), + startXRef: 314, + fileIds: null, + rootRef: null, + infoRef: null, + encryptRef: null, + filename: "foo.pdf", + info: {}, + }; + + let data = incrementalUpdate({ + originalData, + xrefInfo, + newRefs, + datasetsRef, + hasDatasets: false, + acroFormRef, + acroForm, + xfaData, + xref: {}, + }); + data = bytesToString(data); + + const expected = + "\n" + + "789 0 obj\n" + + "<< /XFA [(preamble) 123 0 R (datasets) 101112 0 R (postamble) 456 0 R]>>\n" + + "101112 0 obj\n" + + "<< /Type /EmbeddedFile /Length 20>>\n" + + "stream\n" + + "<hello>world</hello>\n" + + "endstream\n" + + "endobj\n" + + "131415 0 obj\n" + + "<< /Size 131416 /Prev 314 /Type /XRef /Index [0 1 789 1 101112 1 131415 1] /W [1 1 2] /Length 16>> stream\n" + + "\u0000\u0001ÿÿ\u0001\u0001\u0000\u0000\u0001T\u0000\u0000\u0001²\u0000\u0000\n" + + "endstream\n" + + "endobj\n" + + "startxref\n" + + "178\n" + + "%%EOF\n"; + + expect(data).toEqual(expected); + }); + }); });