Fallback gracefully when encountering corrupt PDF files with empty /MediaBox and /CropBox entries
This is based on a real-world PDF file I encountered very recently[1], although I'm currently unable to recall where I saw it. Note that different PDF viewers handle these sort of errors differently, with Adobe Reader outright failing to render the attached PDF file whereas PDFium mostly handles it "correctly". The patch makes the following notable changes: - Refactor the `cropBox` and `mediaBox` getters, on the `Page`, to reduce unnecessary duplication. (This will also help in the future, if support for extracting additional page bounding boxes are added to the API.) - Ensure that the page bounding boxes, i.e. `cropBox` and `mediaBox`, are never empty to prevent issues/weirdness in the viewer. - Ensure that the `view` getter on the `Page` will never return an empty intersection of the `cropBox` and `mediaBox`. - Add an *optional* parameter to `Util.intersect`, to allow checking that the computed intersection isn't actually empty. - Change `Util.intersect` to have consistent return types, since Arrays are of type `Object` and falling back to returning a `Boolean` thus seem strange. --- [1] In that case I believe that only the `cropBox` was empty, but it seemed like a good idea to attempt to fix a bunch of related cases all at once.
This commit is contained in:
parent
aaef00ce5d
commit
d637b25e36
@ -95,24 +95,28 @@ class Page {
|
||||
this._getInheritableProperty('Resources') || Dict.empty);
|
||||
}
|
||||
|
||||
get mediaBox() {
|
||||
const mediaBox = this._getInheritableProperty('MediaBox',
|
||||
/* getArray = */ true);
|
||||
// Reset invalid media box to letter size.
|
||||
if (!Array.isArray(mediaBox) || mediaBox.length !== 4) {
|
||||
return shadow(this, 'mediaBox', LETTER_SIZE_MEDIABOX);
|
||||
_getBoundingBox(name) {
|
||||
const box = this._getInheritableProperty(name, /* getArray = */ true);
|
||||
|
||||
if (Array.isArray(box) && box.length === 4) {
|
||||
if ((box[2] - box[0]) !== 0 && (box[3] - box[1]) !== 0) {
|
||||
return box;
|
||||
}
|
||||
warn(`Empty /${name} entry.`);
|
||||
}
|
||||
return shadow(this, 'mediaBox', mediaBox);
|
||||
return null;
|
||||
}
|
||||
|
||||
get mediaBox() {
|
||||
// Reset invalid media box to letter size.
|
||||
return shadow(this, 'mediaBox',
|
||||
this._getBoundingBox('MediaBox') || LETTER_SIZE_MEDIABOX);
|
||||
}
|
||||
|
||||
get cropBox() {
|
||||
const cropBox = this._getInheritableProperty('CropBox',
|
||||
/* getArray = */ true);
|
||||
// Reset invalid crop box to media box.
|
||||
if (!Array.isArray(cropBox) || cropBox.length !== 4) {
|
||||
return shadow(this, 'cropBox', this.mediaBox);
|
||||
}
|
||||
return shadow(this, 'cropBox', cropBox);
|
||||
return shadow(this, 'cropBox',
|
||||
this._getBoundingBox('CropBox') || this.mediaBox);
|
||||
}
|
||||
|
||||
get userUnit() {
|
||||
@ -129,12 +133,18 @@ class Page {
|
||||
// extend beyond the boundaries of the media box. If they do, they are
|
||||
// effectively reduced to their intersection with the media box."
|
||||
const { cropBox, mediaBox, } = this;
|
||||
let view;
|
||||
if (cropBox === mediaBox || isArrayEqual(cropBox, mediaBox)) {
|
||||
return shadow(this, 'view', mediaBox);
|
||||
view = mediaBox;
|
||||
} else {
|
||||
const box = Util.intersect(cropBox, mediaBox, /* skipEmpty = */ true);
|
||||
if (box) {
|
||||
view = box;
|
||||
} else {
|
||||
warn('Empty /CropBox and /MediaBox intersection.');
|
||||
}
|
||||
}
|
||||
|
||||
const intersection = Util.intersect(cropBox, mediaBox);
|
||||
return shadow(this, 'view', intersection || mediaBox);
|
||||
return shadow(this, 'view', view || mediaBox);
|
||||
}
|
||||
|
||||
get rotate() {
|
||||
|
@ -748,7 +748,7 @@ var Util = (function UtilClosure() {
|
||||
// Returns a rectangle [x1, y1, x2, y2] corresponding to the
|
||||
// intersection of rect1 and rect2. If no intersection, returns 'false'
|
||||
// The rectangle coordinates of rect1, rect2 should be [x1, y1, x2, y2]
|
||||
Util.intersect = function Util_intersect(rect1, rect2) {
|
||||
Util.intersect = function Util_intersect(rect1, rect2, skipEmpty = false) {
|
||||
function compare(a, b) {
|
||||
return a - b;
|
||||
}
|
||||
@ -768,7 +768,7 @@ var Util = (function UtilClosure() {
|
||||
result[0] = orderedX[1];
|
||||
result[2] = orderedX[2];
|
||||
} else {
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
|
||||
// Y: first and second points belong to different rectangles?
|
||||
@ -778,9 +778,13 @@ var Util = (function UtilClosure() {
|
||||
result[1] = orderedY[1];
|
||||
result[3] = orderedY[2];
|
||||
} else {
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
|
||||
if (skipEmpty &&
|
||||
((result[2] - result[0]) === 0 || (result[3] - result[1]) === 0)) {
|
||||
return null;
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -1,6 +1,7 @@
|
||||
*.pdf
|
||||
*.error
|
||||
|
||||
!boundingBox_invalid.pdf
|
||||
!tracemonkey.pdf
|
||||
!TrueType_without_cmap.pdf
|
||||
!franz.pdf
|
||||
|
130
test/pdfs/boundingBox_invalid.pdf
Normal file
130
test/pdfs/boundingBox_invalid.pdf
Normal file
@ -0,0 +1,130 @@
|
||||
%PDF-1.7
|
||||
%âãÏÓ
|
||||
1 0 obj
|
||||
<<
|
||||
/Pages 2 0 R
|
||||
/Type /Catalog
|
||||
>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<<
|
||||
/Kids [3 0 R 4 0 R 5 0 R]
|
||||
/Type /Pages
|
||||
/Count 3
|
||||
>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<<
|
||||
/Parent 2 0 R
|
||||
/Resources
|
||||
<<
|
||||
/Font
|
||||
<<
|
||||
/F1 6 0 R
|
||||
>>
|
||||
>>
|
||||
/MediaBox [0 0 0 0]
|
||||
/Type /Page
|
||||
/Contents 7 0 R
|
||||
>>
|
||||
endobj
|
||||
4 0 obj
|
||||
<<
|
||||
/CropBox [0 0 0 0]
|
||||
/Parent 2 0 R
|
||||
/Resources
|
||||
<<
|
||||
/Font
|
||||
<<
|
||||
/F1 6 0 R
|
||||
>>
|
||||
>>
|
||||
/MediaBox [0 0 800 600]
|
||||
/Type /Page
|
||||
/Contents 8 0 R
|
||||
>>
|
||||
endobj
|
||||
5 0 obj
|
||||
<<
|
||||
/CropBox [600 800 1000 1000]
|
||||
/Parent 2 0 R
|
||||
/Resources
|
||||
<<
|
||||
/Font
|
||||
<<
|
||||
/F1 6 0 R
|
||||
>>
|
||||
>>
|
||||
/MediaBox [0 0 600 800]
|
||||
/Type /Page
|
||||
/Contents 9 0 R
|
||||
>>
|
||||
endobj
|
||||
6 0 obj
|
||||
<<
|
||||
/BaseFont /Times-Roman
|
||||
/Subtype /Type1
|
||||
/Type /Font
|
||||
/Encoding /WinAnsiEncoding
|
||||
>>
|
||||
endobj
|
||||
7 0 obj
|
||||
<<
|
||||
/Length 48
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
25 700 TD
|
||||
/F1 30 Tf
|
||||
(Empty /MediaBox) Tj
|
||||
ET
|
||||
|
||||
|
||||
endstream
|
||||
endobj
|
||||
8 0 obj
|
||||
<<
|
||||
/Length 46
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
25 500 TD
|
||||
/F1 30 Tf
|
||||
(Empty /CropBox) Tj
|
||||
ET
|
||||
|
||||
endstream
|
||||
endobj
|
||||
9 0 obj
|
||||
<<
|
||||
/Length 73
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
25 700 TD
|
||||
/F1 30 Tf
|
||||
(Empty /CropBox and /MediaBox intersection) Tj
|
||||
ET
|
||||
|
||||
endstream
|
||||
endobj xref
|
||||
0 10
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000066 00000 n
|
||||
0000000137 00000 n
|
||||
0000000263 00000 n
|
||||
0000000412 00000 n
|
||||
0000000571 00000 n
|
||||
0000000672 00000 n
|
||||
0000000773 00000 n
|
||||
0000000872 00000 n
|
||||
trailer
|
||||
|
||||
<<
|
||||
/Root 1 0 R
|
||||
/Size 10
|
||||
>>
|
||||
startxref
|
||||
997
|
||||
%%EOF
|
@ -39,6 +39,13 @@
|
||||
"lastPage": 1,
|
||||
"type": "text"
|
||||
},
|
||||
{ "id": "boundingBox_invalid",
|
||||
"file": "pdfs/boundingBox_invalid.pdf",
|
||||
"md5": "f6dfc471bf43abac00cdcf05d81cb070",
|
||||
"rounds": 1,
|
||||
"link": false,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue11016",
|
||||
"file": "pdfs/issue11016_reduced.pdf",
|
||||
"md5": "b75578bd052d2e6acdcc85b615eab6b1",
|
||||
|
@ -1065,9 +1065,35 @@ describe('api', function() {
|
||||
it('gets userUnit', function () {
|
||||
expect(page.userUnit).toEqual(1.0);
|
||||
});
|
||||
it('gets view', function () {
|
||||
|
||||
it('gets view', function() {
|
||||
expect(page.view).toEqual([0, 0, 595.28, 841.89]);
|
||||
});
|
||||
it('gets view, with empty/invalid bounding boxes', function(done) {
|
||||
const viewLoadingTask = getDocument(buildGetDocumentParams(
|
||||
'boundingBox_invalid.pdf'));
|
||||
|
||||
viewLoadingTask.promise.then((pdfDoc) => {
|
||||
const numPages = pdfDoc.numPages;
|
||||
expect(numPages).toEqual(3);
|
||||
|
||||
const viewPromises = [];
|
||||
for (let i = 0; i < numPages; i++) {
|
||||
viewPromises[i] = pdfDoc.getPage(i + 1).then((pdfPage) => {
|
||||
return pdfPage.view;
|
||||
});
|
||||
}
|
||||
|
||||
Promise.all(viewPromises).then(([page1, page2, page3]) => {
|
||||
expect(page1).toEqual([0, 0, 612, 792]);
|
||||
expect(page2).toEqual([0, 0, 800, 600]);
|
||||
expect(page3).toEqual([0, 0, 600, 800]);
|
||||
|
||||
viewLoadingTask.destroy().then(done);
|
||||
});
|
||||
}).catch(done.fail);
|
||||
});
|
||||
|
||||
it('gets viewport', function () {
|
||||
var viewport = page.getViewport({ scale: 1.5, rotation: 90, });
|
||||
expect(viewport.viewBox).toEqual(page.view);
|
||||
|
Loading…
Reference in New Issue
Block a user