Improve handling of named destinations in out-of-order NameTrees (PR 10274 follow-up)

According to the specification, see https://web.archive.org/web/20210404042322if_/https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G6.2384179, the keys of a NameTree/NumberTree should be ordered.
For corrupt PDF files, which violate this assumption, it's thus possible that trying to lookup a single entry fails.

Previously, in PR 10274, we implemented a fallback that only applies to the "bottom" node of a NameTree/NumberTree, which in general might not actually help for sufficiently corrupt NameTree/NumberTree data.
Instead we remove the current *limited* fallback from `NameOrNumberTree.get`, and defer to the call-site to handle this case explicitly e.g. by using `NameOrNumberTree.getAll` for data where that makes sense. For well-formed documents, these changes should *not* lead to any additional data fetching/parsing.

Finally, as part of these changes, the validation of named destination data is improved in the `Catalog` and a new unit-test is also added.
This commit is contained in:
Jonas Jenwald 2021-05-21 13:58:33 +02:00
parent faf6b10939
commit 8d5689387b
6 changed files with 58 additions and 27 deletions

View File

@ -53,7 +53,10 @@ import { MetadataParser } from "./metadata_parser.js";
import { StructTreeRoot } from "./struct_tree.js";
function fetchDestination(dest) {
return isDict(dest) ? dest.get("D") : dest;
if (dest instanceof Dict) {
dest = dest.get("D");
}
return Array.isArray(dest) ? dest : null;
}
class Catalog {
@ -515,22 +518,41 @@ class Catalog {
dests = Object.create(null);
if (obj instanceof NameTree) {
for (const [key, value] of obj.getAll()) {
dests[key] = fetchDestination(value);
const dest = fetchDestination(value);
if (dest) {
dests[key] = dest;
}
}
} else if (obj instanceof Dict) {
obj.forEach(function (key, value) {
if (value) {
dests[key] = fetchDestination(value);
const dest = fetchDestination(value);
if (dest) {
dests[key] = dest;
}
});
}
return shadow(this, "destinations", dests);
}
getDestination(destinationId) {
getDestination(id) {
const obj = this._readDests();
if (obj instanceof NameTree || obj instanceof Dict) {
return fetchDestination(obj.get(destinationId) || null);
if (obj instanceof NameTree) {
const dest = fetchDestination(obj.get(id));
if (dest) {
return dest;
}
// Fallback to checking the *entire* NameTree, in an attempt to handle
// corrupt PDF documents with out-of-order NameTrees (fixes issue 10272).
const allDest = this.destinations[id];
if (allDest) {
warn(`Found "${id}" at an incorrect position in the NameTree.`);
return allDest;
}
} else if (obj instanceof Dict) {
const dest = fetchDestination(obj.get(id));
if (dest) {
return dest;
}
}
return null;
}

View File

@ -13,7 +13,7 @@
* limitations under the License.
*/
import { FormatError, info, unreachable, warn } from "../shared/util.js";
import { FormatError, unreachable, warn } from "../shared/util.js";
import { isDict, RefSet } from "./primitives.js";
/**
@ -133,23 +133,6 @@ class NameOrNumberTree {
return xref.fetchIfRef(entries[m + 1]);
}
}
// Fallback to an exhaustive search, in an attempt to handle corrupt
// PDF files where keys are not correctly ordered (fixes issue 10272).
info(
`Falling back to an exhaustive search, for key "${key}", ` +
`in "${this._type}" tree.`
);
for (let m = 0, mm = entries.length; m < mm; m += 2) {
const currentKey = xref.fetchIfRef(entries[m]);
if (currentKey === key) {
warn(
`The "${key}" key was found at an incorrect, ` +
`i.e. out-of-order, position in "${this._type}" tree.`
);
return xref.fetchIfRef(entries[m + 1]);
}
}
}
return null;
}

View File

@ -730,8 +730,9 @@ class PDFDocumentProxy {
/**
* @param {string} id - The named destination to get.
* @returns {Promise<Array<any>>} A promise that is resolved with all
* information of the given named destination.
* @returns {Promise<Array<any> | null>} A promise that is resolved with all
* information of the given named destination, or `null` when the named
* destination is not present in the PDF file.
*/
getDestination(id) {
return this._transport.getDestination(id);

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/2598691/ATS_TEST_PVC_2_1.pdf

View File

@ -917,6 +917,12 @@
"link": true,
"type": "load"
},
{ "id": "issue10272",
"file": "pdfs/issue10272.pdf",
"md5": "bf3b2f74c6878d38a70dc0825f1b9a02",
"link": true,
"type": "other"
},
{ "id": "issue10388",
"file": "pdfs/issue10388_reduced.pdf",
"md5": "62a6b2adbea1535432bd94a3516e2d4c",

View File

@ -669,6 +669,24 @@ describe("api", function () {
await loadingTask.destroy();
});
it("gets a destination, from out-of-order /Names (NameTree) dictionary (issue 10272)", async function () {
if (isNodeJS) {
pending("Linked test-cases are not supported in Node.js.");
}
const loadingTask = getDocument(buildGetDocumentParams("issue10272.pdf"));
const pdfDoc = await loadingTask.promise;
const destination = await pdfDoc.getDestination("link_1");
expect(destination).toEqual([
{ num: 17, gen: 0 },
{ name: "XYZ" },
69,
125,
0,
]);
await loadingTask.destroy();
});
it("gets non-string destination", async function () {
let numberPromise = pdfDocument.getDestination(4.3);
let booleanPromise = pdfDocument.getDestination(true);