Merge pull request #14340 from Snuffleupagus/Metadata-fetch-error
Handle errors when fetching the raw /Metadata (issue 14305)
This commit is contained in:
commit
3117985c55
@ -50,6 +50,7 @@ import {
|
|||||||
warn,
|
warn,
|
||||||
} from "../shared/util.js";
|
} from "../shared/util.js";
|
||||||
import { NameTree, NumberTree } from "./name_number_tree.js";
|
import { NameTree, NumberTree } from "./name_number_tree.js";
|
||||||
|
import { BaseStream } from "./base_stream.js";
|
||||||
import { ColorSpace } from "./colorspace.js";
|
import { ColorSpace } from "./colorspace.js";
|
||||||
import { FileSpec } from "./file_spec.js";
|
import { FileSpec } from "./file_spec.js";
|
||||||
import { GlobalImageCache } from "./image_utils.js";
|
import { GlobalImageCache } from "./image_utils.js";
|
||||||
@ -153,37 +154,37 @@ class Catalog {
|
|||||||
|
|
||||||
get metadata() {
|
get metadata() {
|
||||||
const streamRef = this._catDict.getRaw("Metadata");
|
const streamRef = this._catDict.getRaw("Metadata");
|
||||||
if (!isRef(streamRef)) {
|
if (!(streamRef instanceof Ref)) {
|
||||||
return shadow(this, "metadata", null);
|
return shadow(this, "metadata", null);
|
||||||
}
|
}
|
||||||
|
|
||||||
const suppressEncryption = !(
|
|
||||||
this.xref.encrypt && this.xref.encrypt.encryptMetadata
|
|
||||||
);
|
|
||||||
const stream = this.xref.fetch(streamRef, suppressEncryption);
|
|
||||||
let metadata = null;
|
let metadata = null;
|
||||||
|
try {
|
||||||
|
const suppressEncryption = !(
|
||||||
|
this.xref.encrypt && this.xref.encrypt.encryptMetadata
|
||||||
|
);
|
||||||
|
const stream = this.xref.fetch(streamRef, suppressEncryption);
|
||||||
|
|
||||||
if (isStream(stream) && isDict(stream.dict)) {
|
if (stream instanceof BaseStream && stream.dict instanceof Dict) {
|
||||||
const type = stream.dict.get("Type");
|
const type = stream.dict.get("Type");
|
||||||
const subtype = stream.dict.get("Subtype");
|
const subtype = stream.dict.get("Subtype");
|
||||||
|
|
||||||
if (isName(type, "Metadata") && isName(subtype, "XML")) {
|
if (isName(type, "Metadata") && isName(subtype, "XML")) {
|
||||||
// XXX: This should examine the charset the XML document defines,
|
// XXX: This should examine the charset the XML document defines,
|
||||||
// however since there are currently no real means to decode arbitrary
|
// however since there are currently no real means to decode arbitrary
|
||||||
// charsets, let's just hope that the author of the PDF was reasonable
|
// charsets, let's just hope that the author of the PDF was reasonable
|
||||||
// enough to stick with the XML default charset, which is UTF-8.
|
// enough to stick with the XML default charset, which is UTF-8.
|
||||||
try {
|
|
||||||
const data = stringToUTF8String(stream.getString());
|
const data = stringToUTF8String(stream.getString());
|
||||||
if (data) {
|
if (data) {
|
||||||
metadata = new MetadataParser(data).serializable;
|
metadata = new MetadataParser(data).serializable;
|
||||||
}
|
}
|
||||||
} catch (e) {
|
|
||||||
if (e instanceof MissingDataException) {
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
info("Skipping invalid metadata.");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch (ex) {
|
||||||
|
if (ex instanceof MissingDataException) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
info(`Skipping invalid Metadata: "${ex}".`);
|
||||||
}
|
}
|
||||||
return shadow(this, "metadata", metadata);
|
return shadow(this, "metadata", metadata);
|
||||||
}
|
}
|
||||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -498,3 +498,4 @@
|
|||||||
!poppler-91414-0-54.pdf
|
!poppler-91414-0-54.pdf
|
||||||
!poppler-742-0-fuzzed.pdf
|
!poppler-742-0-fuzzed.pdf
|
||||||
!poppler-937-0-fuzzed.pdf
|
!poppler-937-0-fuzzed.pdf
|
||||||
|
!PDFBOX-3148-2-fuzzed.pdf
|
||||||
|
3574
test/pdfs/PDFBOX-3148-2-fuzzed.pdf
Normal file
3574
test/pdfs/PDFBOX-3148-2-fuzzed.pdf
Normal file
File diff suppressed because it is too large
Load Diff
@ -1439,6 +1439,8 @@ describe("api", function () {
|
|||||||
const { info, metadata, contentDispositionFilename, contentLength } =
|
const { info, metadata, contentDispositionFilename, contentLength } =
|
||||||
await pdfDoc.getMetadata();
|
await pdfDoc.getMetadata();
|
||||||
|
|
||||||
|
// Custom, non-standard, information dictionary entries.
|
||||||
|
expect(info.Custom).toEqual(undefined);
|
||||||
// The following are PDF.js specific, non-standard, properties.
|
// The following are PDF.js specific, non-standard, properties.
|
||||||
expect(info.PDFFormatVersion).toEqual(null);
|
expect(info.PDFFormatVersion).toEqual(null);
|
||||||
expect(info.Language).toEqual(null);
|
expect(info.Language).toEqual(null);
|
||||||
@ -1456,6 +1458,33 @@ describe("api", function () {
|
|||||||
await loadingTask.destroy();
|
await loadingTask.destroy();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("gets metadata, with corrupt /Metadata XRef entry", async function () {
|
||||||
|
const loadingTask = getDocument(
|
||||||
|
buildGetDocumentParams("PDFBOX-3148-2-fuzzed.pdf")
|
||||||
|
);
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const { info, metadata, contentDispositionFilename, contentLength } =
|
||||||
|
await pdfDoc.getMetadata();
|
||||||
|
|
||||||
|
// Custom, non-standard, information dictionary entries.
|
||||||
|
expect(info.Custom).toEqual(undefined);
|
||||||
|
// The following are PDF.js specific, non-standard, properties.
|
||||||
|
expect(info.PDFFormatVersion).toEqual("1.6");
|
||||||
|
expect(info.Language).toEqual(null);
|
||||||
|
expect(info.EncryptFilterName).toEqual(null);
|
||||||
|
expect(info.IsLinearized).toEqual(false);
|
||||||
|
expect(info.IsAcroFormPresent).toEqual(true);
|
||||||
|
expect(info.IsXFAPresent).toEqual(false);
|
||||||
|
expect(info.IsCollectionPresent).toEqual(false);
|
||||||
|
expect(info.IsSignaturesPresent).toEqual(false);
|
||||||
|
|
||||||
|
expect(metadata).toEqual(null);
|
||||||
|
expect(contentDispositionFilename).toEqual(null);
|
||||||
|
expect(contentLength).toEqual(244351);
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
it("gets markInfo", async function () {
|
it("gets markInfo", async function () {
|
||||||
const loadingTask = getDocument(
|
const loadingTask = getDocument(
|
||||||
buildGetDocumentParams("annotation-line.pdf")
|
buildGetDocumentParams("annotation-line.pdf")
|
||||||
|
Loading…
Reference in New Issue
Block a user