Merge pull request #14340 from Snuffleupagus/Metadata-fetch-error

Handle errors when fetching the raw /Metadata (issue 14305)
This commit is contained in:
Tim van der Meij 2021-12-04 13:19:37 +01:00 committed by GitHub
commit 3117985c55
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 3624 additions and 19 deletions

View File

@ -50,6 +50,7 @@ import {
warn, warn,
} from "../shared/util.js"; } from "../shared/util.js";
import { NameTree, NumberTree } from "./name_number_tree.js"; import { NameTree, NumberTree } from "./name_number_tree.js";
import { BaseStream } from "./base_stream.js";
import { ColorSpace } from "./colorspace.js"; import { ColorSpace } from "./colorspace.js";
import { FileSpec } from "./file_spec.js"; import { FileSpec } from "./file_spec.js";
import { GlobalImageCache } from "./image_utils.js"; import { GlobalImageCache } from "./image_utils.js";
@ -153,37 +154,37 @@ class Catalog {
get metadata() { get metadata() {
const streamRef = this._catDict.getRaw("Metadata"); const streamRef = this._catDict.getRaw("Metadata");
if (!isRef(streamRef)) { if (!(streamRef instanceof Ref)) {
return shadow(this, "metadata", null); return shadow(this, "metadata", null);
} }
const suppressEncryption = !(
this.xref.encrypt && this.xref.encrypt.encryptMetadata
);
const stream = this.xref.fetch(streamRef, suppressEncryption);
let metadata = null; let metadata = null;
try {
const suppressEncryption = !(
this.xref.encrypt && this.xref.encrypt.encryptMetadata
);
const stream = this.xref.fetch(streamRef, suppressEncryption);
if (isStream(stream) && isDict(stream.dict)) { if (stream instanceof BaseStream && stream.dict instanceof Dict) {
const type = stream.dict.get("Type"); const type = stream.dict.get("Type");
const subtype = stream.dict.get("Subtype"); const subtype = stream.dict.get("Subtype");
if (isName(type, "Metadata") && isName(subtype, "XML")) { if (isName(type, "Metadata") && isName(subtype, "XML")) {
// XXX: This should examine the charset the XML document defines, // XXX: This should examine the charset the XML document defines,
// however since there are currently no real means to decode arbitrary // however since there are currently no real means to decode arbitrary
// charsets, let's just hope that the author of the PDF was reasonable // charsets, let's just hope that the author of the PDF was reasonable
// enough to stick with the XML default charset, which is UTF-8. // enough to stick with the XML default charset, which is UTF-8.
try {
const data = stringToUTF8String(stream.getString()); const data = stringToUTF8String(stream.getString());
if (data) { if (data) {
metadata = new MetadataParser(data).serializable; metadata = new MetadataParser(data).serializable;
} }
} catch (e) {
if (e instanceof MissingDataException) {
throw e;
}
info("Skipping invalid metadata.");
} }
} }
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info(`Skipping invalid Metadata: "${ex}".`);
} }
return shadow(this, "metadata", metadata); return shadow(this, "metadata", metadata);
} }

View File

@ -498,3 +498,4 @@
!poppler-91414-0-54.pdf !poppler-91414-0-54.pdf
!poppler-742-0-fuzzed.pdf !poppler-742-0-fuzzed.pdf
!poppler-937-0-fuzzed.pdf !poppler-937-0-fuzzed.pdf
!PDFBOX-3148-2-fuzzed.pdf

File diff suppressed because it is too large Load Diff

View File

@ -1439,6 +1439,8 @@ describe("api", function () {
const { info, metadata, contentDispositionFilename, contentLength } = const { info, metadata, contentDispositionFilename, contentLength } =
await pdfDoc.getMetadata(); await pdfDoc.getMetadata();
// Custom, non-standard, information dictionary entries.
expect(info.Custom).toEqual(undefined);
// The following are PDF.js specific, non-standard, properties. // The following are PDF.js specific, non-standard, properties.
expect(info.PDFFormatVersion).toEqual(null); expect(info.PDFFormatVersion).toEqual(null);
expect(info.Language).toEqual(null); expect(info.Language).toEqual(null);
@ -1456,6 +1458,33 @@ describe("api", function () {
await loadingTask.destroy(); await loadingTask.destroy();
}); });
it("gets metadata, with corrupt /Metadata XRef entry", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("PDFBOX-3148-2-fuzzed.pdf")
);
const pdfDoc = await loadingTask.promise;
const { info, metadata, contentDispositionFilename, contentLength } =
await pdfDoc.getMetadata();
// Custom, non-standard, information dictionary entries.
expect(info.Custom).toEqual(undefined);
// The following are PDF.js specific, non-standard, properties.
expect(info.PDFFormatVersion).toEqual("1.6");
expect(info.Language).toEqual(null);
expect(info.EncryptFilterName).toEqual(null);
expect(info.IsLinearized).toEqual(false);
expect(info.IsAcroFormPresent).toEqual(true);
expect(info.IsXFAPresent).toEqual(false);
expect(info.IsCollectionPresent).toEqual(false);
expect(info.IsSignaturesPresent).toEqual(false);
expect(metadata).toEqual(null);
expect(contentDispositionFilename).toEqual(null);
expect(contentLength).toEqual(244351);
await loadingTask.destroy();
});
it("gets markInfo", async function () { it("gets markInfo", async function () {
const loadingTask = getDocument( const loadingTask = getDocument(
buildGetDocumentParams("annotation-line.pdf") buildGetDocumentParams("annotation-line.pdf")