62a9c26cda
When JPEG images are decoded by the browser, on the main-thread, there's a handful of short-lived copies of the image data; see c3f4690bde/src/display/api.js (L2364-L2408)
That code thus becomes quite problematic for very big JPEG images, since it increases peak memory usage a lot during decoding. In the referenced issue there's a couple of JPEG images whose dimensions are `10006 x 7088` (i.e. ~68 mega-pixels), which causes the *peak* memory usage to increase by close to `1 GB` (i.e. one giga-byte) in my testing.
By letting the PDF.js JPEG decoder, rather than the browser, handle very large images the *peak* memory usage is considerably reduced and the allocated memory also seem to be reclaimed faster.
*Please note:* This will lead to movement in some existing `eq` tests.
260 lines
9.0 KiB
JavaScript
260 lines
9.0 KiB
JavaScript
/* Copyright 2012 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
import { createObjectURL, shadow } from "../shared/util.js";
|
|
import { DecodeStream } from "./stream.js";
|
|
import { isDict } from "./primitives.js";
|
|
import { JpegImage } from "./jpg.js";
|
|
|
|
/**
|
|
* Depending on the type of JPEG a JpegStream is handled in different ways. For
|
|
* JPEG's that are supported natively such as DeviceGray and DeviceRGB the image
|
|
* data is stored and then loaded by the browser. For unsupported JPEG's we use
|
|
* a library to decode these images and the stream behaves like all the other
|
|
* DecodeStreams.
|
|
*/
|
|
const JpegStream = (function JpegStreamClosure() {
|
|
function JpegStream(stream, maybeLength, dict, params) {
|
|
// Some images may contain 'junk' before the SOI (start-of-image) marker.
|
|
// Note: this seems to mainly affect inline images.
|
|
let ch;
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
// Find the first byte of the SOI marker (0xFFD8).
|
|
if (ch === 0xff) {
|
|
stream.skip(-1); // Reset the stream position to the SOI.
|
|
break;
|
|
}
|
|
}
|
|
this.stream = stream;
|
|
this.maybeLength = maybeLength;
|
|
this.dict = dict;
|
|
this.params = params;
|
|
|
|
DecodeStream.call(this, maybeLength);
|
|
}
|
|
|
|
JpegStream.prototype = Object.create(DecodeStream.prototype);
|
|
|
|
Object.defineProperty(JpegStream.prototype, "bytes", {
|
|
get: function JpegStream_bytes() {
|
|
// If `this.maybeLength` is null, we'll get the entire stream.
|
|
return shadow(this, "bytes", this.stream.getBytes(this.maybeLength));
|
|
},
|
|
configurable: true,
|
|
});
|
|
|
|
JpegStream.prototype.ensureBuffer = function(requested) {
|
|
// No-op, since `this.readBlock` will always parse the entire image and
|
|
// directly insert all of its data into `this.buffer`.
|
|
};
|
|
|
|
JpegStream.prototype.readBlock = function() {
|
|
if (this.eof) {
|
|
return;
|
|
}
|
|
const jpegOptions = {
|
|
decodeTransform: undefined,
|
|
colorTransform: undefined,
|
|
};
|
|
|
|
// Checking if values need to be transformed before conversion.
|
|
const decodeArr = this.dict.getArray("Decode", "D");
|
|
if (this.forceRGB && Array.isArray(decodeArr)) {
|
|
const bitsPerComponent = this.dict.get("BitsPerComponent") || 8;
|
|
const decodeArrLength = decodeArr.length;
|
|
const transform = new Int32Array(decodeArrLength);
|
|
let transformNeeded = false;
|
|
const maxValue = (1 << bitsPerComponent) - 1;
|
|
for (let i = 0; i < decodeArrLength; i += 2) {
|
|
transform[i] = ((decodeArr[i + 1] - decodeArr[i]) * 256) | 0;
|
|
transform[i + 1] = (decodeArr[i] * maxValue) | 0;
|
|
if (transform[i] !== 256 || transform[i + 1] !== 0) {
|
|
transformNeeded = true;
|
|
}
|
|
}
|
|
if (transformNeeded) {
|
|
jpegOptions.decodeTransform = transform;
|
|
}
|
|
}
|
|
// Fetching the 'ColorTransform' entry, if it exists.
|
|
if (isDict(this.params)) {
|
|
const colorTransform = this.params.get("ColorTransform");
|
|
if (Number.isInteger(colorTransform)) {
|
|
jpegOptions.colorTransform = colorTransform;
|
|
}
|
|
}
|
|
const jpegImage = new JpegImage(jpegOptions);
|
|
|
|
jpegImage.parse(this.bytes);
|
|
const data = jpegImage.getData({
|
|
width: this.drawWidth,
|
|
height: this.drawHeight,
|
|
forceRGB: this.forceRGB,
|
|
isSourcePDF: true,
|
|
});
|
|
this.buffer = data;
|
|
this.bufferLength = data.length;
|
|
this.eof = true;
|
|
};
|
|
|
|
Object.defineProperty(JpegStream.prototype, "maybeValidDimensions", {
|
|
get: function JpegStream_maybeValidDimensions() {
|
|
const { dict, stream } = this;
|
|
const dictHeight = dict.get("Height", "H");
|
|
const startPos = stream.pos;
|
|
|
|
let validDimensions = true,
|
|
foundSOF = false,
|
|
b;
|
|
while ((b = stream.getByte()) !== -1) {
|
|
if (b !== 0xff) {
|
|
// Not a valid marker.
|
|
continue;
|
|
}
|
|
switch (stream.getByte()) {
|
|
case 0xc0: // SOF0
|
|
case 0xc1: // SOF1
|
|
case 0xc2: // SOF2
|
|
// These three SOF{n} markers are the only ones that the built-in
|
|
// PDF.js JPEG decoder currently supports.
|
|
foundSOF = true;
|
|
|
|
stream.pos += 2; // Skip marker length.
|
|
stream.pos += 1; // Skip precision.
|
|
const scanLines = stream.getUint16();
|
|
const samplesPerLine = stream.getUint16();
|
|
|
|
// Letting the browser handle the JPEG decoding, on the main-thread,
|
|
// will cause a *large* increase in peak memory usage since there's
|
|
// a handful of short-lived copies of the image data. For very big
|
|
// JPEG images, always let the PDF.js image decoder handle them to
|
|
// reduce overall memory usage during decoding (see issue 11694).
|
|
if (scanLines * samplesPerLine > 1e6) {
|
|
validDimensions = false;
|
|
break;
|
|
}
|
|
|
|
// The "normal" case, where the image data and dictionary agrees.
|
|
if (scanLines === dictHeight) {
|
|
break;
|
|
}
|
|
// A DNL (Define Number of Lines) marker is expected,
|
|
// which browsers (usually) cannot decode natively.
|
|
if (scanLines === 0) {
|
|
validDimensions = false;
|
|
break;
|
|
}
|
|
// The dimensions of the image, among other properties, should
|
|
// always be taken from the image data *itself* rather than the
|
|
// XObject dictionary. However there's cases of corrupt images that
|
|
// browsers cannot decode natively, for example:
|
|
// - JPEG images with DNL markers, where the SOF `scanLines`
|
|
// parameter has an unexpected value (see issue 8614).
|
|
// - JPEG images with too large SOF `scanLines` parameter, where
|
|
// the EOI marker is encountered prematurely (see issue 10880).
|
|
// In an attempt to handle these kinds of corrupt images, compare
|
|
// the dimensions in the image data with the dictionary and *always*
|
|
// let the PDF.js JPEG decoder (rather than the browser) handle the
|
|
// image if the difference is larger than one order of magnitude
|
|
// (since that would generally suggest that something is off).
|
|
if (scanLines > dictHeight * 10) {
|
|
validDimensions = false;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 0xc3: // SOF3
|
|
/* falls through */
|
|
case 0xc5: // SOF5
|
|
case 0xc6: // SOF6
|
|
case 0xc7: // SOF7
|
|
/* falls through */
|
|
case 0xc9: // SOF9
|
|
case 0xca: // SOF10
|
|
case 0xcb: // SOF11
|
|
/* falls through */
|
|
case 0xcd: // SOF13
|
|
case 0xce: // SOF14
|
|
case 0xcf: // SOF15
|
|
foundSOF = true;
|
|
break;
|
|
|
|
case 0xc4: // DHT
|
|
case 0xcc: // DAC
|
|
/* falls through */
|
|
case 0xda: // SOS
|
|
case 0xdb: // DQT
|
|
case 0xdc: // DNL
|
|
case 0xdd: // DRI
|
|
case 0xde: // DHP
|
|
case 0xdf: // EXP
|
|
/* falls through */
|
|
case 0xe0: // APP0
|
|
case 0xe1: // APP1
|
|
case 0xe2: // APP2
|
|
case 0xe3: // APP3
|
|
case 0xe4: // APP4
|
|
case 0xe5: // APP5
|
|
case 0xe6: // APP6
|
|
case 0xe7: // APP7
|
|
case 0xe8: // APP8
|
|
case 0xe9: // APP9
|
|
case 0xea: // APP10
|
|
case 0xeb: // APP11
|
|
case 0xec: // APP12
|
|
case 0xed: // APP13
|
|
case 0xee: // APP14
|
|
case 0xef: // APP15
|
|
/* falls through */
|
|
case 0xfe: // COM
|
|
const markerLength = stream.getUint16();
|
|
if (markerLength > 2) {
|
|
stream.skip(markerLength - 2); // Jump to the next marker.
|
|
} else {
|
|
// The marker length is invalid, resetting the stream position.
|
|
stream.skip(-2);
|
|
}
|
|
break;
|
|
|
|
case 0xff: // Fill byte.
|
|
// Avoid skipping a valid marker, resetting the stream position.
|
|
stream.skip(-1);
|
|
break;
|
|
|
|
case 0xd9: // EOI
|
|
foundSOF = true;
|
|
break;
|
|
}
|
|
if (foundSOF) {
|
|
break;
|
|
}
|
|
}
|
|
// Finally, don't forget to reset the stream position.
|
|
stream.pos = startPos;
|
|
|
|
return shadow(this, "maybeValidDimensions", validDimensions);
|
|
},
|
|
configurable: true,
|
|
});
|
|
|
|
JpegStream.prototype.getIR = function(forceDataSchema = false) {
|
|
return createObjectURL(this.bytes, "image/jpeg", forceDataSchema);
|
|
};
|
|
|
|
return JpegStream;
|
|
})();
|
|
|
|
export { JpegStream };
|