From 3ce420131f174548e648cd49bc9d59b90afbf22e Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 15 Jul 2018 18:53:08 +0200 Subject: [PATCH] Prefer the Width/Height of the image data, rather than the image dictionary, for JPEG 2000 images (issue 9650) According to the PDF specification, see https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#page=45 > When using the JPXDecode filter with image XObjects, the following changes to and constraints on some entries in the image dictionary shall apply (see 8.9.5, "Image Dictionaries" for details on these entries): > > - Width and Height shall match the corresponding width and height values in the JPEG2000 data. > > - . . . Hence it seems reasonable to use the Width/Height of the image data *itself*, rather than the image dictionary when there's a mismatch. Given that JPEG 2000 images are already being parsed, in order to obtain basic parameters, the actual Width/Height is readily available in the `PDFImage` constructor. --- src/core/image.js | 23 ++++++++++++++++++----- test/pdfs/issue9650.pdf.link | 1 + test/test_manifest.json | 8 ++++++++ 3 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 test/pdfs/issue9650.pdf.link diff --git a/src/core/image.js b/src/core/image.js index fd87b4271..1509a3e1e 100644 --- a/src/core/image.js +++ b/src/core/image.js @@ -91,6 +91,9 @@ var PDFImage = (function PDFImageClosure() { var jpxImage = new JpxImage(); jpxImage.parseImageProperties(image.stream); image.stream.reset(); + + image.width = jpxImage.width; + image.height = jpxImage.height; image.bitsPerComponent = jpxImage.bitsPerComponent; image.numComps = jpxImage.componentsCount; break; @@ -105,13 +108,23 @@ var PDFImage = (function PDFImageClosure() { } // TODO cache rendered images? - this.width = dict.get('Width', 'W'); - this.height = dict.get('Height', 'H'); + let width = dict.get('Width', 'W'); + let height = dict.get('Height', 'H'); - if (this.width < 1 || this.height < 1) { - throw new FormatError(`Invalid image width: ${this.width} or ` + - `height: ${this.height}`); + if ((Number.isInteger(image.width) && image.width > 0) && + (Number.isInteger(image.height) && image.height > 0) && + (image.width !== width || image.height !== height)) { + warn('PDFImage - using the Width/Height of the image data, ' + + 'rather than the image dictionary.'); + width = image.width; + height = image.height; } + if (width < 1 || height < 1) { + throw new FormatError(`Invalid image width: ${width} or ` + + `height: ${height}`); + } + this.width = width; + this.height = height; this.interpolate = dict.get('Interpolate', 'I') || false; this.imageMask = dict.get('ImageMask', 'IM') || false; diff --git a/test/pdfs/issue9650.pdf.link b/test/pdfs/issue9650.pdf.link new file mode 100644 index 000000000..b76a808e1 --- /dev/null +++ b/test/pdfs/issue9650.pdf.link @@ -0,0 +1 @@ +https://github.com/mozilla/pdf.js/files/1898753/Kred.Eingangsrechnungen.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index defb40f4f..0cd02a1a8 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -3259,6 +3259,14 @@ "lastPage": 1, "type": "eq" }, + { "id": "issue9650", + "file": "pdfs/issue9650.pdf", + "md5": "20d50bda6b1080b6d9088811299c791e", + "rounds": 1, + "link": true, + "lastPage": 1, + "type": "eq" + }, { "id": "issue9679", "file": "pdfs/issue9679.pdf", "md5": "3077d06add3875705aa1021c7b116023",