Merge pull request #11059 from Snuffleupagus/boundingBox-more-validation

Fallback gracefully when encountering corrupt PDF files with empty /MediaBox and /CropBox entries
This commit is contained in:
Tim van der Meij 2019-08-09 22:39:01 +02:00 committed by GitHub
commit fbe8c6127c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 199 additions and 21 deletions

View File

@ -95,24 +95,28 @@ class Page {
this._getInheritableProperty('Resources') || Dict.empty);
}
get mediaBox() {
const mediaBox = this._getInheritableProperty('MediaBox',
/* getArray = */ true);
// Reset invalid media box to letter size.
if (!Array.isArray(mediaBox) || mediaBox.length !== 4) {
return shadow(this, 'mediaBox', LETTER_SIZE_MEDIABOX);
_getBoundingBox(name) {
const box = this._getInheritableProperty(name, /* getArray = */ true);
if (Array.isArray(box) && box.length === 4) {
if ((box[2] - box[0]) !== 0 && (box[3] - box[1]) !== 0) {
return box;
}
warn(`Empty /${name} entry.`);
}
return shadow(this, 'mediaBox', mediaBox);
return null;
}
get mediaBox() {
// Reset invalid media box to letter size.
return shadow(this, 'mediaBox',
this._getBoundingBox('MediaBox') || LETTER_SIZE_MEDIABOX);
}
get cropBox() {
const cropBox = this._getInheritableProperty('CropBox',
/* getArray = */ true);
// Reset invalid crop box to media box.
if (!Array.isArray(cropBox) || cropBox.length !== 4) {
return shadow(this, 'cropBox', this.mediaBox);
}
return shadow(this, 'cropBox', cropBox);
return shadow(this, 'cropBox',
this._getBoundingBox('CropBox') || this.mediaBox);
}
get userUnit() {
@ -129,12 +133,18 @@ class Page {
// extend beyond the boundaries of the media box. If they do, they are
// effectively reduced to their intersection with the media box."
const { cropBox, mediaBox, } = this;
let view;
if (cropBox === mediaBox || isArrayEqual(cropBox, mediaBox)) {
return shadow(this, 'view', mediaBox);
view = mediaBox;
} else {
const box = Util.intersect(cropBox, mediaBox, /* skipEmpty = */ true);
if (box) {
view = box;
} else {
warn('Empty /CropBox and /MediaBox intersection.');
}
}
const intersection = Util.intersect(cropBox, mediaBox);
return shadow(this, 'view', intersection || mediaBox);
return shadow(this, 'view', view || mediaBox);
}
get rotate() {

View File

@ -748,7 +748,7 @@ var Util = (function UtilClosure() {
// Returns a rectangle [x1, y1, x2, y2] corresponding to the
// intersection of rect1 and rect2. If no intersection, returns 'false'
// The rectangle coordinates of rect1, rect2 should be [x1, y1, x2, y2]
Util.intersect = function Util_intersect(rect1, rect2) {
Util.intersect = function Util_intersect(rect1, rect2, skipEmpty = false) {
function compare(a, b) {
return a - b;
}
@ -768,7 +768,7 @@ var Util = (function UtilClosure() {
result[0] = orderedX[1];
result[2] = orderedX[2];
} else {
return false;
return null;
}
// Y: first and second points belong to different rectangles?
@ -778,9 +778,13 @@ var Util = (function UtilClosure() {
result[1] = orderedY[1];
result[3] = orderedY[2];
} else {
return false;
return null;
}
if (skipEmpty &&
((result[2] - result[0]) === 0 || (result[3] - result[1]) === 0)) {
return null;
}
return result;
};

View File

@ -1,6 +1,7 @@
*.pdf
*.error
!boundingBox_invalid.pdf
!tracemonkey.pdf
!TrueType_without_cmap.pdf
!franz.pdf

View File

@ -0,0 +1,130 @@
%PDF-1.7
%âãÏÓ
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
2 0 obj
<<
/Kids [3 0 R 4 0 R 5 0 R]
/Type /Pages
/Count 3
>>
endobj
3 0 obj
<<
/Parent 2 0 R
/Resources
<<
/Font
<<
/F1 6 0 R
>>
>>
/MediaBox [0 0 0 0]
/Type /Page
/Contents 7 0 R
>>
endobj
4 0 obj
<<
/CropBox [0 0 0 0]
/Parent 2 0 R
/Resources
<<
/Font
<<
/F1 6 0 R
>>
>>
/MediaBox [0 0 800 600]
/Type /Page
/Contents 8 0 R
>>
endobj
5 0 obj
<<
/CropBox [600 800 1000 1000]
/Parent 2 0 R
/Resources
<<
/Font
<<
/F1 6 0 R
>>
>>
/MediaBox [0 0 600 800]
/Type /Page
/Contents 9 0 R
>>
endobj
6 0 obj
<<
/BaseFont /Times-Roman
/Subtype /Type1
/Type /Font
/Encoding /WinAnsiEncoding
>>
endobj
7 0 obj
<<
/Length 48
>>
stream
BT
25 700 TD
/F1 30 Tf
(Empty /MediaBox) Tj
ET
endstream
endobj
8 0 obj
<<
/Length 46
>>
stream
BT
25 500 TD
/F1 30 Tf
(Empty /CropBox) Tj
ET
endstream
endobj
9 0 obj
<<
/Length 73
>>
stream
BT
25 700 TD
/F1 30 Tf
(Empty /CropBox and /MediaBox intersection) Tj
ET
endstream
endobj xref
0 10
0000000000 65535 f
0000000015 00000 n
0000000066 00000 n
0000000137 00000 n
0000000263 00000 n
0000000412 00000 n
0000000571 00000 n
0000000672 00000 n
0000000773 00000 n
0000000872 00000 n
trailer
<<
/Root 1 0 R
/Size 10
>>
startxref
997
%%EOF

View File

@ -39,6 +39,13 @@
"lastPage": 1,
"type": "text"
},
{ "id": "boundingBox_invalid",
"file": "pdfs/boundingBox_invalid.pdf",
"md5": "f6dfc471bf43abac00cdcf05d81cb070",
"rounds": 1,
"link": false,
"type": "eq"
},
{ "id": "issue11016",
"file": "pdfs/issue11016_reduced.pdf",
"md5": "b75578bd052d2e6acdcc85b615eab6b1",

View File

@ -1065,9 +1065,35 @@ describe('api', function() {
it('gets userUnit', function () {
expect(page.userUnit).toEqual(1.0);
});
it('gets view', function () {
it('gets view', function() {
expect(page.view).toEqual([0, 0, 595.28, 841.89]);
});
it('gets view, with empty/invalid bounding boxes', function(done) {
const viewLoadingTask = getDocument(buildGetDocumentParams(
'boundingBox_invalid.pdf'));
viewLoadingTask.promise.then((pdfDoc) => {
const numPages = pdfDoc.numPages;
expect(numPages).toEqual(3);
const viewPromises = [];
for (let i = 0; i < numPages; i++) {
viewPromises[i] = pdfDoc.getPage(i + 1).then((pdfPage) => {
return pdfPage.view;
});
}
Promise.all(viewPromises).then(([page1, page2, page3]) => {
expect(page1).toEqual([0, 0, 612, 792]);
expect(page2).toEqual([0, 0, 800, 600]);
expect(page3).toEqual([0, 0, 600, 800]);
viewLoadingTask.destroy().then(done);
});
}).catch(done.fail);
});
it('gets viewport', function () {
var viewport = page.getViewport({ scale: 1.5, rotation: 90, });
expect(viewport.viewBox).toEqual(page.view);