Merge pull request #14340 from Snuffleupagus/Metadata-fetch-error

Handle errors when fetching the raw /Metadata (issue 14305)
This commit is contained in:
Tim van der Meij 2021-12-04 13:19:37 +01:00 committed by GitHub
commit 3117985c55
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 3624 additions and 19 deletions

View File

@ -50,6 +50,7 @@ import {
warn,
} from "../shared/util.js";
import { NameTree, NumberTree } from "./name_number_tree.js";
import { BaseStream } from "./base_stream.js";
import { ColorSpace } from "./colorspace.js";
import { FileSpec } from "./file_spec.js";
import { GlobalImageCache } from "./image_utils.js";
@ -153,37 +154,37 @@ class Catalog {
get metadata() {
const streamRef = this._catDict.getRaw("Metadata");
if (!isRef(streamRef)) {
if (!(streamRef instanceof Ref)) {
return shadow(this, "metadata", null);
}
const suppressEncryption = !(
this.xref.encrypt && this.xref.encrypt.encryptMetadata
);
const stream = this.xref.fetch(streamRef, suppressEncryption);
let metadata = null;
try {
const suppressEncryption = !(
this.xref.encrypt && this.xref.encrypt.encryptMetadata
);
const stream = this.xref.fetch(streamRef, suppressEncryption);
if (isStream(stream) && isDict(stream.dict)) {
const type = stream.dict.get("Type");
const subtype = stream.dict.get("Subtype");
if (stream instanceof BaseStream && stream.dict instanceof Dict) {
const type = stream.dict.get("Type");
const subtype = stream.dict.get("Subtype");
if (isName(type, "Metadata") && isName(subtype, "XML")) {
// XXX: This should examine the charset the XML document defines,
// however since there are currently no real means to decode arbitrary
// charsets, let's just hope that the author of the PDF was reasonable
// enough to stick with the XML default charset, which is UTF-8.
try {
if (isName(type, "Metadata") && isName(subtype, "XML")) {
// XXX: This should examine the charset the XML document defines,
// however since there are currently no real means to decode arbitrary
// charsets, let's just hope that the author of the PDF was reasonable
// enough to stick with the XML default charset, which is UTF-8.
const data = stringToUTF8String(stream.getString());
if (data) {
metadata = new MetadataParser(data).serializable;
}
} catch (e) {
if (e instanceof MissingDataException) {
throw e;
}
info("Skipping invalid metadata.");
}
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info(`Skipping invalid Metadata: "${ex}".`);
}
return shadow(this, "metadata", metadata);
}

View File

@ -498,3 +498,4 @@
!poppler-91414-0-54.pdf
!poppler-742-0-fuzzed.pdf
!poppler-937-0-fuzzed.pdf
!PDFBOX-3148-2-fuzzed.pdf

File diff suppressed because it is too large Load Diff

View File

@ -1439,6 +1439,8 @@ describe("api", function () {
const { info, metadata, contentDispositionFilename, contentLength } =
await pdfDoc.getMetadata();
// Custom, non-standard, information dictionary entries.
expect(info.Custom).toEqual(undefined);
// The following are PDF.js specific, non-standard, properties.
expect(info.PDFFormatVersion).toEqual(null);
expect(info.Language).toEqual(null);
@ -1456,6 +1458,33 @@ describe("api", function () {
await loadingTask.destroy();
});
it("gets metadata, with corrupt /Metadata XRef entry", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("PDFBOX-3148-2-fuzzed.pdf")
);
const pdfDoc = await loadingTask.promise;
const { info, metadata, contentDispositionFilename, contentLength } =
await pdfDoc.getMetadata();
// Custom, non-standard, information dictionary entries.
expect(info.Custom).toEqual(undefined);
// The following are PDF.js specific, non-standard, properties.
expect(info.PDFFormatVersion).toEqual("1.6");
expect(info.Language).toEqual(null);
expect(info.EncryptFilterName).toEqual(null);
expect(info.IsLinearized).toEqual(false);
expect(info.IsAcroFormPresent).toEqual(true);
expect(info.IsXFAPresent).toEqual(false);
expect(info.IsCollectionPresent).toEqual(false);
expect(info.IsSignaturesPresent).toEqual(false);
expect(metadata).toEqual(null);
expect(contentDispositionFilename).toEqual(null);
expect(contentLength).toEqual(244351);
await loadingTask.destroy();
});
it("gets markInfo", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("annotation-line.pdf")