pdf.js/test/unit/document_spec.js

154 lines
5.0 KiB
JavaScript
Raw Normal View History

/* Copyright 2017 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
Redo the form type detection logic and include unit tests Good form type detection is important to get reliable telemetry and to only show the fallback bar if a form cannot be filled out by the user. PDF.js only supports AcroForm data, so XFA data is explicitly unsupported (tracked in issue #2373). However, the previous form type detection couldn't separate AcroForm and XFA well enough, causing form type telemetry to be incorrect sometimes and the fallback bar to be shown for forms that could in fact be filled out by the user. The solution in this commit is found by studying the specification and the form documents that are available to us. In a nutshell the rules are: - There is XFA data if the `XFA` entry is a non-empty array or stream. - There is AcroForm data if the `Fields` entry is a non-empty array and it doesn't consist of only document signatures. The document signatures part was not handled in the old code, causing a document with only XFA data to also be marked as having AcroForm data. Moreover, the old code didn't check all the data types. Now that AcroForm and XFA can be distinguished, the viewer is configured to only show the fallback bar for documents that only have XFA data. If a document also has AcroForm data, the viewer can use that to render the form. We have not found documents where the XFA data was necessary in that case. Finally, we include unit tests to ensure that all cases are covered and move the form type detection out of the `parse` function so that it's only executed if the document information is actually requested (potentially making initial parsing a tiny bit faster).
2020-08-23 21:04:49 +09:00
import { createIdFactory, XRefMock } from "./test_utils.js";
import { Dict, Name, Ref } from "../../src/core/primitives.js";
import { PDFDocument } from "../../src/core/document.js";
import { StringStream } from "../../src/core/stream.js";
describe("document", function () {
describe("Page", function () {
it("should create correct objId/fontId using the idFactory", function () {
const idFactory1 = createIdFactory(/* pageIndex = */ 0);
const idFactory2 = createIdFactory(/* pageIndex = */ 1);
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
expect(idFactory1.createObjId()).toEqual("p0_1");
expect(idFactory1.createObjId()).toEqual("p0_2");
expect(idFactory1.createFontId()).toEqual("f1");
expect(idFactory1.createFontId()).toEqual("f2");
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
expect(idFactory1.getDocId()).toEqual("g_d0");
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
expect(idFactory2.createObjId()).toEqual("p1_1");
expect(idFactory2.createObjId()).toEqual("p1_2");
expect(idFactory2.createFontId()).toEqual("f1");
expect(idFactory2.createFontId()).toEqual("f2");
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
expect(idFactory2.getDocId()).toEqual("g_d0");
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
expect(idFactory1.createObjId()).toEqual("p0_3");
expect(idFactory1.createObjId()).toEqual("p0_4");
expect(idFactory1.createFontId()).toEqual("f3");
expect(idFactory1.createFontId()).toEqual("f4");
Enable auto-formatting of the entire code-base using Prettier (issue 11444) Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes). Prettier is being used for a couple of reasons: - To be consistent with `mozilla-central`, where Prettier is already in use across the tree. - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters. Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some). Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long. *Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit. (On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)
2019-12-25 23:59:37 +09:00
expect(idFactory1.getDocId()).toEqual("g_d0");
});
});
Redo the form type detection logic and include unit tests Good form type detection is important to get reliable telemetry and to only show the fallback bar if a form cannot be filled out by the user. PDF.js only supports AcroForm data, so XFA data is explicitly unsupported (tracked in issue #2373). However, the previous form type detection couldn't separate AcroForm and XFA well enough, causing form type telemetry to be incorrect sometimes and the fallback bar to be shown for forms that could in fact be filled out by the user. The solution in this commit is found by studying the specification and the form documents that are available to us. In a nutshell the rules are: - There is XFA data if the `XFA` entry is a non-empty array or stream. - There is AcroForm data if the `Fields` entry is a non-empty array and it doesn't consist of only document signatures. The document signatures part was not handled in the old code, causing a document with only XFA data to also be marked as having AcroForm data. Moreover, the old code didn't check all the data types. Now that AcroForm and XFA can be distinguished, the viewer is configured to only show the fallback bar for documents that only have XFA data. If a document also has AcroForm data, the viewer can use that to render the form. We have not found documents where the XFA data was necessary in that case. Finally, we include unit tests to ensure that all cases are covered and move the form type detection out of the `parse` function so that it's only executed if the document information is actually requested (potentially making initial parsing a tiny bit faster).
2020-08-23 21:04:49 +09:00
describe("PDFDocument", function () {
const pdfManager = {
get docId() {
return "d0";
},
};
const stream = new StringStream("Dummy_PDF_data");
function getDocument(acroForm) {
const pdfDocument = new PDFDocument(pdfManager, stream);
pdfDocument.catalog = { acroForm };
return pdfDocument;
}
it("should get form info when no form data is present", function () {
const pdfDocument = getDocument(null);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
});
it("should get form info when XFA is present", function () {
const acroForm = new Dict();
// The `XFA` entry can only be a non-empty array or stream.
acroForm.set("XFA", []);
let pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
acroForm.set("XFA", ["foo", "bar"]);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: true,
});
acroForm.set("XFA", new StringStream(""));
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
acroForm.set("XFA", new StringStream("non-empty"));
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: true,
});
});
it("should get form info when AcroForm is present", function () {
const acroForm = new Dict();
// The `Fields` entry can only be a non-empty array.
acroForm.set("Fields", []);
let pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
acroForm.set("Fields", ["foo", "bar"]);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: true,
hasXfa: false,
});
// If the first bit of the `SigFlags` entry is set and the `Fields` array
// only contains document signatures, then there is no AcroForm data.
acroForm.set("Fields", ["foo", "bar"]);
acroForm.set("SigFlags", 2);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: true,
hasXfa: false,
});
const annotationDict = new Dict();
annotationDict.set("FT", Name.get("Sig"));
annotationDict.set("Rect", [0, 0, 0, 0]);
const annotationRef = Ref.get(11, 0);
const kidsDict = new Dict();
kidsDict.set("Kids", [annotationRef]);
const kidsRef = Ref.get(10, 0);
pdfDocument.xref = new XRefMock([
{ ref: annotationRef, data: annotationDict },
{ ref: kidsRef, data: kidsDict },
]);
acroForm.set("Fields", [kidsRef]);
acroForm.set("SigFlags", 3);
pdfDocument = getDocument(acroForm);
expect(pdfDocument.formInfo).toEqual({
hasAcroForm: false,
hasXfa: false,
});
});
});
});