Estimate the size of decoded streams in advance.
When decoding a stream, the decode buffer is often grown multiple times, its byte size increasing like so: 512, 1024, 2048, etc. This patch estimates the minimum size in advance (using the length of the encoded stream), often allowing the smaller sizes to be skipped. It also renames numerous |length| variables as |maybeLength| to make it clear that they can be |null|. I measured this change on eight documents. This change reduces the cumulative size of decode buffer allocations by 0--32%, with 10--20% being typical. This reduces peak RSS by 10 or 20 MiB for several of them.
This commit is contained in:
parent
c3ed71c9c5
commit
b3024db677
@ -431,9 +431,9 @@ var CipherTransform = (function CipherTransformClosure() {
|
||||
this.streamCipherConstructor = streamCipherConstructor;
|
||||
}
|
||||
CipherTransform.prototype = {
|
||||
createStream: function CipherTransform_createStream(stream) {
|
||||
createStream: function CipherTransform_createStream(stream, length) {
|
||||
var cipher = new this.streamCipherConstructor();
|
||||
return new DecryptStream(stream,
|
||||
return new DecryptStream(stream, length,
|
||||
function cipherTransformDecryptStream(data, finalize) {
|
||||
return cipher.decryptBlock(data, finalize);
|
||||
}
|
||||
|
@ -643,7 +643,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
var preprocessor = new EvaluatorPreprocessor(stream, xref);
|
||||
var res = resources;
|
||||
|
||||
var chunk = '';
|
||||
var chunkBuf = [];
|
||||
var font = null;
|
||||
var charSpace = 0, wordSpace = 0;
|
||||
var operation;
|
||||
@ -692,37 +692,37 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
var items = args[0];
|
||||
for (var j = 0, jj = items.length; j < jj; j++) {
|
||||
if (typeof items[j] === 'string') {
|
||||
chunk += fontCharsToUnicode(items[j], font);
|
||||
chunkBuf.push(fontCharsToUnicode(items[j], font));
|
||||
} else if (items[j] < 0 && font.spaceWidth > 0) {
|
||||
var fakeSpaces = -items[j] / font.spaceWidth;
|
||||
if (fakeSpaces > MULTI_SPACE_FACTOR) {
|
||||
fakeSpaces = Math.round(fakeSpaces);
|
||||
while (fakeSpaces--) {
|
||||
chunk += ' ';
|
||||
chunkBuf.push(' ');
|
||||
}
|
||||
} else if (fakeSpaces > SPACE_FACTOR) {
|
||||
chunk += ' ';
|
||||
chunkBuf.push(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case OPS.showText:
|
||||
chunk += fontCharsToUnicode(args[0], font);
|
||||
chunkBuf.push(fontCharsToUnicode(args[0], font));
|
||||
break;
|
||||
case OPS.nextLineShowText:
|
||||
// For search, adding a extra white space for line breaks would be
|
||||
// better here, but that causes too much spaces in the
|
||||
// text-selection divs.
|
||||
chunk += fontCharsToUnicode(args[0], font);
|
||||
chunkBuf.push(fontCharsToUnicode(args[0], font));
|
||||
break;
|
||||
case OPS.nextLineSetSpacingShowText:
|
||||
// Note comment in "'"
|
||||
chunk += fontCharsToUnicode(args[2], font);
|
||||
chunkBuf.push(fontCharsToUnicode(args[2], font));
|
||||
break;
|
||||
case OPS.paintXObject:
|
||||
// Set the chunk such that the following if won't add something
|
||||
// to the state.
|
||||
chunk = '';
|
||||
chunkBuf.length = 0;
|
||||
|
||||
if (args[0].code) {
|
||||
break;
|
||||
@ -771,7 +771,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
break;
|
||||
} // switch
|
||||
|
||||
if (chunk !== '') {
|
||||
if (chunkBuf.length > 0) {
|
||||
var chunk = chunkBuf.join('');
|
||||
var bidiResult = PDFJS.bidi(chunk, -1, font.vertical);
|
||||
var bidiText = {
|
||||
str: bidiResult.str,
|
||||
@ -793,7 +794,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
bidiText.size = fontHeight;
|
||||
bidiTexts.push(bidiText);
|
||||
|
||||
chunk = '';
|
||||
chunkBuf.length = 0;
|
||||
}
|
||||
} // while
|
||||
|
||||
|
@ -170,7 +170,7 @@ var Parser = (function ParserClosure() {
|
||||
var length = (stream.pos - 4) - startPos;
|
||||
var imageStream = stream.makeSubStream(startPos, length, dict);
|
||||
if (cipherTransform)
|
||||
imageStream = cipherTransform.createStream(imageStream);
|
||||
imageStream = cipherTransform.createStream(imageStream, length);
|
||||
imageStream = this.filter(imageStream, dict, length);
|
||||
imageStream.dict = dict;
|
||||
|
||||
@ -251,7 +251,7 @@ var Parser = (function ParserClosure() {
|
||||
|
||||
stream = stream.makeSubStream(pos, length, dict);
|
||||
if (cipherTransform)
|
||||
stream = cipherTransform.createStream(stream);
|
||||
stream = cipherTransform.createStream(stream, length);
|
||||
stream = this.filter(stream, dict, length);
|
||||
stream.dict = dict;
|
||||
return stream;
|
||||
@ -261,6 +261,8 @@ var Parser = (function ParserClosure() {
|
||||
var params = this.fetchIfRef(dict.get('DecodeParms', 'DP'));
|
||||
if (isName(filter))
|
||||
return this.makeFilter(stream, filter.name, length, params);
|
||||
|
||||
var maybeLength = length;
|
||||
if (isArray(filter)) {
|
||||
var filterArray = filter;
|
||||
var paramsArray = params;
|
||||
@ -272,22 +274,23 @@ var Parser = (function ParserClosure() {
|
||||
params = null;
|
||||
if (isArray(paramsArray) && (i in paramsArray))
|
||||
params = paramsArray[i];
|
||||
stream = this.makeFilter(stream, filter.name, length, params);
|
||||
stream = this.makeFilter(stream, filter.name, maybeLength, params);
|
||||
// after the first stream the length variable is invalid
|
||||
length = null;
|
||||
maybeLength = null;
|
||||
}
|
||||
}
|
||||
return stream;
|
||||
},
|
||||
makeFilter: function Parser_makeFilter(stream, name, length, params) {
|
||||
makeFilter: function Parser_makeFilter(stream, name, maybeLength, params) {
|
||||
if (stream.dict.get('Length') === 0) {
|
||||
return new NullStream(stream);
|
||||
}
|
||||
if (name == 'FlateDecode' || name == 'Fl') {
|
||||
if (params) {
|
||||
return new PredictorStream(new FlateStream(stream), params);
|
||||
return new PredictorStream(new FlateStream(stream, maybeLength),
|
||||
maybeLength, params);
|
||||
}
|
||||
return new FlateStream(stream);
|
||||
return new FlateStream(stream, maybeLength);
|
||||
}
|
||||
if (name == 'LZWDecode' || name == 'LZW') {
|
||||
var earlyChange = 1;
|
||||
@ -295,30 +298,31 @@ var Parser = (function ParserClosure() {
|
||||
if (params.has('EarlyChange'))
|
||||
earlyChange = params.get('EarlyChange');
|
||||
return new PredictorStream(
|
||||
new LZWStream(stream, earlyChange), params);
|
||||
new LZWStream(stream, maybeLength, earlyChange),
|
||||
maybeLength, params);
|
||||
}
|
||||
return new LZWStream(stream, earlyChange);
|
||||
return new LZWStream(stream, maybeLength, earlyChange);
|
||||
}
|
||||
if (name == 'DCTDecode' || name == 'DCT') {
|
||||
return new JpegStream(stream, length, stream.dict, this.xref);
|
||||
return new JpegStream(stream, maybeLength, stream.dict, this.xref);
|
||||
}
|
||||
if (name == 'JPXDecode' || name == 'JPX') {
|
||||
return new JpxStream(stream, length, stream.dict);
|
||||
return new JpxStream(stream, maybeLength, stream.dict);
|
||||
}
|
||||
if (name == 'ASCII85Decode' || name == 'A85') {
|
||||
return new Ascii85Stream(stream);
|
||||
return new Ascii85Stream(stream, maybeLength);
|
||||
}
|
||||
if (name == 'ASCIIHexDecode' || name == 'AHx') {
|
||||
return new AsciiHexStream(stream);
|
||||
return new AsciiHexStream(stream, maybeLength);
|
||||
}
|
||||
if (name == 'CCITTFaxDecode' || name == 'CCF') {
|
||||
return new CCITTFaxStream(stream, params);
|
||||
return new CCITTFaxStream(stream, maybeLength, params);
|
||||
}
|
||||
if (name == 'RunLengthDecode' || name == 'RL') {
|
||||
return new RunLengthStream(stream);
|
||||
return new RunLengthStream(stream, maybeLength);
|
||||
}
|
||||
if (name == 'JBIG2Decode') {
|
||||
return new Jbig2Stream(stream, length, stream.dict);
|
||||
return new Jbig2Stream(stream, maybeLength, stream.dict);
|
||||
}
|
||||
warn('filter "' + name + '" not supported yet');
|
||||
return stream;
|
||||
|
@ -98,11 +98,18 @@ var StringStream = (function StringStreamClosure() {
|
||||
|
||||
// super class for the decoding streams
|
||||
var DecodeStream = (function DecodeStreamClosure() {
|
||||
function DecodeStream() {
|
||||
function DecodeStream(maybeMinBufferLength) {
|
||||
this.pos = 0;
|
||||
this.bufferLength = 0;
|
||||
this.eof = false;
|
||||
this.buffer = null;
|
||||
this.minBufferLength = 512;
|
||||
if (maybeMinBufferLength) {
|
||||
// Compute the first power of two that is as big as maybeMinBufferLength.
|
||||
while (this.minBufferLength < maybeMinBufferLength) {
|
||||
this.minBufferLength *= 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DecodeStream.prototype = {
|
||||
@ -117,7 +124,7 @@ var DecodeStream = (function DecodeStreamClosure() {
|
||||
} else {
|
||||
current = 0;
|
||||
}
|
||||
var size = 512;
|
||||
var size = this.minBufferLength;
|
||||
while (size < requested) {
|
||||
size *= 2;
|
||||
}
|
||||
@ -197,7 +204,7 @@ var DecodeStream = (function DecodeStreamClosure() {
|
||||
var StreamsSequenceStream = (function StreamsSequenceStreamClosure() {
|
||||
function StreamsSequenceStream(streams) {
|
||||
this.streams = streams;
|
||||
DecodeStream.call(this);
|
||||
DecodeStream.call(this, /* maybeLength = */ null);
|
||||
}
|
||||
|
||||
StreamsSequenceStream.prototype = Object.create(DecodeStream.prototype);
|
||||
@ -328,7 +335,7 @@ var FlateStream = (function FlateStreamClosure() {
|
||||
0x50003, 0x50013, 0x5000b, 0x5001b, 0x50007, 0x50017, 0x5000f, 0x00000
|
||||
]), 5];
|
||||
|
||||
function FlateStream(str) {
|
||||
function FlateStream(str, maybeLength) {
|
||||
this.str = str;
|
||||
this.dict = str.dict;
|
||||
|
||||
@ -346,7 +353,7 @@ var FlateStream = (function FlateStreamClosure() {
|
||||
this.codeSize = 0;
|
||||
this.codeBuf = 0;
|
||||
|
||||
DecodeStream.call(this);
|
||||
DecodeStream.call(this, maybeLength);
|
||||
}
|
||||
|
||||
FlateStream.prototype = Object.create(DecodeStream.prototype);
|
||||
@ -581,7 +588,7 @@ var FlateStream = (function FlateStreamClosure() {
|
||||
})();
|
||||
|
||||
var PredictorStream = (function PredictorStreamClosure() {
|
||||
function PredictorStream(str, params) {
|
||||
function PredictorStream(str, maybeLength, params) {
|
||||
var predictor = this.predictor = params.get('Predictor') || 1;
|
||||
|
||||
if (predictor <= 1)
|
||||
@ -604,7 +611,7 @@ var PredictorStream = (function PredictorStreamClosure() {
|
||||
this.pixBytes = (colors * bits + 7) >> 3;
|
||||
this.rowBytes = (columns * colors * bits + 7) >> 3;
|
||||
|
||||
DecodeStream.call(this);
|
||||
DecodeStream.call(this, maybeLength);
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -774,21 +781,22 @@ var PredictorStream = (function PredictorStreamClosure() {
|
||||
* DecodeStreams.
|
||||
*/
|
||||
var JpegStream = (function JpegStreamClosure() {
|
||||
function JpegStream(stream, length, dict, xref) {
|
||||
function JpegStream(stream, maybeLength, dict, xref) {
|
||||
// TODO: per poppler, some images may have 'junk' before that
|
||||
// need to be removed
|
||||
this.stream = stream;
|
||||
this.length = length;
|
||||
this.maybeLength = maybeLength;
|
||||
this.dict = dict;
|
||||
|
||||
DecodeStream.call(this);
|
||||
DecodeStream.call(this, maybeLength);
|
||||
}
|
||||
|
||||
JpegStream.prototype = Object.create(DecodeStream.prototype);
|
||||
|
||||
Object.defineProperty(JpegStream.prototype, 'bytes', {
|
||||
get: function JpegStream_bytes() {
|
||||
return shadow(this, 'bytes', this.stream.getBytes(this.length));
|
||||
// If this.maybeLength is null, we'll get the entire stream.
|
||||
return shadow(this, 'bytes', this.stream.getBytes(this.maybeLength));
|
||||
},
|
||||
configurable: true
|
||||
});
|
||||
@ -841,19 +849,20 @@ var JpegStream = (function JpegStreamClosure() {
|
||||
* the stream behaves like all the other DecodeStreams.
|
||||
*/
|
||||
var JpxStream = (function JpxStreamClosure() {
|
||||
function JpxStream(stream, length, dict) {
|
||||
function JpxStream(stream, maybeLength, dict) {
|
||||
this.stream = stream;
|
||||
this.length = length;
|
||||
this.maybeLength = maybeLength;
|
||||
this.dict = dict;
|
||||
|
||||
DecodeStream.call(this);
|
||||
DecodeStream.call(this, maybeLength);
|
||||
}
|
||||
|
||||
JpxStream.prototype = Object.create(DecodeStream.prototype);
|
||||
|
||||
Object.defineProperty(JpxStream.prototype, 'bytes', {
|
||||
get: function JpxStream_bytes() {
|
||||
return shadow(this, 'bytes', this.stream.getBytes(this.length));
|
||||
// If this.maybeLength is null, we'll get the entire stream.
|
||||
return shadow(this, 'bytes', this.stream.getBytes(this.maybeLength));
|
||||
},
|
||||
configurable: true
|
||||
});
|
||||
@ -948,19 +957,20 @@ var JpxStream = (function JpxStreamClosure() {
|
||||
* the stream behaves like all the other DecodeStreams.
|
||||
*/
|
||||
var Jbig2Stream = (function Jbig2StreamClosure() {
|
||||
function Jbig2Stream(stream, length, dict) {
|
||||
function Jbig2Stream(stream, maybeLength, dict) {
|
||||
this.stream = stream;
|
||||
this.length = length;
|
||||
this.maybeLength = maybeLength;
|
||||
this.dict = dict;
|
||||
|
||||
DecodeStream.call(this);
|
||||
DecodeStream.call(this, maybeLength);
|
||||
}
|
||||
|
||||
Jbig2Stream.prototype = Object.create(DecodeStream.prototype);
|
||||
|
||||
Object.defineProperty(Jbig2Stream.prototype, 'bytes', {
|
||||
get: function Jbig2Stream_bytes() {
|
||||
return shadow(this, 'bytes', this.stream.getBytes(this.length));
|
||||
// If this.maybeLength is null, we'll get the entire stream.
|
||||
return shadow(this, 'bytes', this.stream.getBytes(this.maybeLength));
|
||||
},
|
||||
configurable: true
|
||||
});
|
||||
@ -1004,14 +1014,14 @@ var Jbig2Stream = (function Jbig2StreamClosure() {
|
||||
})();
|
||||
|
||||
var DecryptStream = (function DecryptStreamClosure() {
|
||||
function DecryptStream(str, decrypt) {
|
||||
function DecryptStream(str, maybeLength, decrypt) {
|
||||
this.str = str;
|
||||
this.dict = str.dict;
|
||||
this.decrypt = decrypt;
|
||||
this.nextChunk = null;
|
||||
this.initialized = false;
|
||||
|
||||
DecodeStream.call(this);
|
||||
DecodeStream.call(this, maybeLength);
|
||||
}
|
||||
|
||||
var chunkSize = 512;
|
||||
@ -1048,12 +1058,17 @@ var DecryptStream = (function DecryptStreamClosure() {
|
||||
})();
|
||||
|
||||
var Ascii85Stream = (function Ascii85StreamClosure() {
|
||||
function Ascii85Stream(str) {
|
||||
function Ascii85Stream(str, maybeLength) {
|
||||
this.str = str;
|
||||
this.dict = str.dict;
|
||||
this.input = new Uint8Array(5);
|
||||
|
||||
DecodeStream.call(this);
|
||||
// Most streams increase in size when decoded, but Ascii85 streams
|
||||
// typically shrink by ~20%.
|
||||
if (maybeLength) {
|
||||
maybeLength = 0.8 * maybeLength;
|
||||
}
|
||||
DecodeStream.call(this, maybeLength);
|
||||
}
|
||||
|
||||
Ascii85Stream.prototype = Object.create(DecodeStream.prototype);
|
||||
@ -1121,13 +1136,18 @@ var Ascii85Stream = (function Ascii85StreamClosure() {
|
||||
})();
|
||||
|
||||
var AsciiHexStream = (function AsciiHexStreamClosure() {
|
||||
function AsciiHexStream(str) {
|
||||
function AsciiHexStream(str, maybeLength) {
|
||||
this.str = str;
|
||||
this.dict = str.dict;
|
||||
|
||||
this.firstDigit = -1;
|
||||
|
||||
DecodeStream.call(this);
|
||||
// Most streams increase in size when decoded, but AsciiHex streams shrink
|
||||
// by 50%.
|
||||
if (maybeLength) {
|
||||
maybeLength = 0.5 * maybeLength;
|
||||
}
|
||||
DecodeStream.call(this, maybeLength);
|
||||
}
|
||||
|
||||
AsciiHexStream.prototype = Object.create(DecodeStream.prototype);
|
||||
@ -1178,11 +1198,11 @@ var AsciiHexStream = (function AsciiHexStreamClosure() {
|
||||
})();
|
||||
|
||||
var RunLengthStream = (function RunLengthStreamClosure() {
|
||||
function RunLengthStream(str) {
|
||||
function RunLengthStream(str, maybeLength) {
|
||||
this.str = str;
|
||||
this.dict = str.dict;
|
||||
|
||||
DecodeStream.call(this);
|
||||
DecodeStream.call(this, maybeLength);
|
||||
}
|
||||
|
||||
RunLengthStream.prototype = Object.create(DecodeStream.prototype);
|
||||
@ -1650,7 +1670,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() {
|
||||
[2, 2], [2, 2], [2, 2], [2, 2]
|
||||
];
|
||||
|
||||
function CCITTFaxStream(str, params) {
|
||||
function CCITTFaxStream(str, maybeLength, params) {
|
||||
this.str = str;
|
||||
this.dict = str.dict;
|
||||
|
||||
@ -1691,7 +1711,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() {
|
||||
this.eatBits(1);
|
||||
}
|
||||
|
||||
DecodeStream.call(this);
|
||||
DecodeStream.call(this, maybeLength);
|
||||
}
|
||||
|
||||
CCITTFaxStream.prototype = Object.create(DecodeStream.prototype);
|
||||
@ -2186,7 +2206,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() {
|
||||
})();
|
||||
|
||||
var LZWStream = (function LZWStreamClosure() {
|
||||
function LZWStream(str, earlyChange) {
|
||||
function LZWStream(str, maybeLength, earlyChange) {
|
||||
this.str = str;
|
||||
this.dict = str.dict;
|
||||
this.cachedData = 0;
|
||||
@ -2209,7 +2229,7 @@ var LZWStream = (function LZWStreamClosure() {
|
||||
}
|
||||
this.lzwState = lzwState;
|
||||
|
||||
DecodeStream.call(this);
|
||||
DecodeStream.call(this, maybeLength);
|
||||
}
|
||||
|
||||
LZWStream.prototype = Object.create(DecodeStream.prototype);
|
||||
|
@ -30,7 +30,7 @@ describe('stream', function() {
|
||||
|
||||
var input = new Stream(new Uint8Array([2, 100, 3, 2, 1, 255, 2, 1, 255]),
|
||||
0, 9, dict);
|
||||
var predictor = new PredictorStream(input, dict);
|
||||
var predictor = new PredictorStream(input, /* length = */ 9, dict);
|
||||
var result = predictor.getBytes(6);
|
||||
|
||||
expect(result).toMatchTypedArray(
|
||||
|
Loading…
Reference in New Issue
Block a user