From b3024db67708eee5b9106708406deae87f525eda Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 10 Mar 2014 22:18:30 -0700 Subject: [PATCH] Estimate the size of decoded streams in advance. When decoding a stream, the decode buffer is often grown multiple times, its byte size increasing like so: 512, 1024, 2048, etc. This patch estimates the minimum size in advance (using the length of the encoded stream), often allowing the smaller sizes to be skipped. It also renames numerous |length| variables as |maybeLength| to make it clear that they can be |null|. I measured this change on eight documents. This change reduces the cumulative size of decode buffer allocations by 0--32%, with 10--20% being typical. This reduces peak RSS by 10 or 20 MiB for several of them. --- src/core/crypto.js | 4 +- src/core/evaluator.js | 21 +++++----- src/core/parser.js | 36 ++++++++++-------- src/core/stream.js | 82 +++++++++++++++++++++++++--------------- test/unit/stream_spec.js | 2 +- 5 files changed, 85 insertions(+), 60 deletions(-) diff --git a/src/core/crypto.js b/src/core/crypto.js index cc54f6e37..dca99a06b 100644 --- a/src/core/crypto.js +++ b/src/core/crypto.js @@ -431,9 +431,9 @@ var CipherTransform = (function CipherTransformClosure() { this.streamCipherConstructor = streamCipherConstructor; } CipherTransform.prototype = { - createStream: function CipherTransform_createStream(stream) { + createStream: function CipherTransform_createStream(stream, length) { var cipher = new this.streamCipherConstructor(); - return new DecryptStream(stream, + return new DecryptStream(stream, length, function cipherTransformDecryptStream(data, finalize) { return cipher.decryptBlock(data, finalize); } diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 2cf00d0bb..17ed7f33e 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -643,7 +643,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var preprocessor = new EvaluatorPreprocessor(stream, xref); var res = resources; - var chunk = ''; + var chunkBuf = []; var font = null; var charSpace = 0, wordSpace = 0; var operation; @@ -692,37 +692,37 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var items = args[0]; for (var j = 0, jj = items.length; j < jj; j++) { if (typeof items[j] === 'string') { - chunk += fontCharsToUnicode(items[j], font); + chunkBuf.push(fontCharsToUnicode(items[j], font)); } else if (items[j] < 0 && font.spaceWidth > 0) { var fakeSpaces = -items[j] / font.spaceWidth; if (fakeSpaces > MULTI_SPACE_FACTOR) { fakeSpaces = Math.round(fakeSpaces); while (fakeSpaces--) { - chunk += ' '; + chunkBuf.push(' '); } } else if (fakeSpaces > SPACE_FACTOR) { - chunk += ' '; + chunkBuf.push(' '); } } } break; case OPS.showText: - chunk += fontCharsToUnicode(args[0], font); + chunkBuf.push(fontCharsToUnicode(args[0], font)); break; case OPS.nextLineShowText: // For search, adding a extra white space for line breaks would be // better here, but that causes too much spaces in the // text-selection divs. - chunk += fontCharsToUnicode(args[0], font); + chunkBuf.push(fontCharsToUnicode(args[0], font)); break; case OPS.nextLineSetSpacingShowText: // Note comment in "'" - chunk += fontCharsToUnicode(args[2], font); + chunkBuf.push(fontCharsToUnicode(args[2], font)); break; case OPS.paintXObject: // Set the chunk such that the following if won't add something // to the state. - chunk = ''; + chunkBuf.length = 0; if (args[0].code) { break; @@ -771,7 +771,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { break; } // switch - if (chunk !== '') { + if (chunkBuf.length > 0) { + var chunk = chunkBuf.join(''); var bidiResult = PDFJS.bidi(chunk, -1, font.vertical); var bidiText = { str: bidiResult.str, @@ -793,7 +794,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { bidiText.size = fontHeight; bidiTexts.push(bidiText); - chunk = ''; + chunkBuf.length = 0; } } // while diff --git a/src/core/parser.js b/src/core/parser.js index 28440eb3f..5d7aba4af 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -170,7 +170,7 @@ var Parser = (function ParserClosure() { var length = (stream.pos - 4) - startPos; var imageStream = stream.makeSubStream(startPos, length, dict); if (cipherTransform) - imageStream = cipherTransform.createStream(imageStream); + imageStream = cipherTransform.createStream(imageStream, length); imageStream = this.filter(imageStream, dict, length); imageStream.dict = dict; @@ -251,7 +251,7 @@ var Parser = (function ParserClosure() { stream = stream.makeSubStream(pos, length, dict); if (cipherTransform) - stream = cipherTransform.createStream(stream); + stream = cipherTransform.createStream(stream, length); stream = this.filter(stream, dict, length); stream.dict = dict; return stream; @@ -261,6 +261,8 @@ var Parser = (function ParserClosure() { var params = this.fetchIfRef(dict.get('DecodeParms', 'DP')); if (isName(filter)) return this.makeFilter(stream, filter.name, length, params); + + var maybeLength = length; if (isArray(filter)) { var filterArray = filter; var paramsArray = params; @@ -272,22 +274,23 @@ var Parser = (function ParserClosure() { params = null; if (isArray(paramsArray) && (i in paramsArray)) params = paramsArray[i]; - stream = this.makeFilter(stream, filter.name, length, params); + stream = this.makeFilter(stream, filter.name, maybeLength, params); // after the first stream the length variable is invalid - length = null; + maybeLength = null; } } return stream; }, - makeFilter: function Parser_makeFilter(stream, name, length, params) { + makeFilter: function Parser_makeFilter(stream, name, maybeLength, params) { if (stream.dict.get('Length') === 0) { return new NullStream(stream); } if (name == 'FlateDecode' || name == 'Fl') { if (params) { - return new PredictorStream(new FlateStream(stream), params); + return new PredictorStream(new FlateStream(stream, maybeLength), + maybeLength, params); } - return new FlateStream(stream); + return new FlateStream(stream, maybeLength); } if (name == 'LZWDecode' || name == 'LZW') { var earlyChange = 1; @@ -295,30 +298,31 @@ var Parser = (function ParserClosure() { if (params.has('EarlyChange')) earlyChange = params.get('EarlyChange'); return new PredictorStream( - new LZWStream(stream, earlyChange), params); + new LZWStream(stream, maybeLength, earlyChange), + maybeLength, params); } - return new LZWStream(stream, earlyChange); + return new LZWStream(stream, maybeLength, earlyChange); } if (name == 'DCTDecode' || name == 'DCT') { - return new JpegStream(stream, length, stream.dict, this.xref); + return new JpegStream(stream, maybeLength, stream.dict, this.xref); } if (name == 'JPXDecode' || name == 'JPX') { - return new JpxStream(stream, length, stream.dict); + return new JpxStream(stream, maybeLength, stream.dict); } if (name == 'ASCII85Decode' || name == 'A85') { - return new Ascii85Stream(stream); + return new Ascii85Stream(stream, maybeLength); } if (name == 'ASCIIHexDecode' || name == 'AHx') { - return new AsciiHexStream(stream); + return new AsciiHexStream(stream, maybeLength); } if (name == 'CCITTFaxDecode' || name == 'CCF') { - return new CCITTFaxStream(stream, params); + return new CCITTFaxStream(stream, maybeLength, params); } if (name == 'RunLengthDecode' || name == 'RL') { - return new RunLengthStream(stream); + return new RunLengthStream(stream, maybeLength); } if (name == 'JBIG2Decode') { - return new Jbig2Stream(stream, length, stream.dict); + return new Jbig2Stream(stream, maybeLength, stream.dict); } warn('filter "' + name + '" not supported yet'); return stream; diff --git a/src/core/stream.js b/src/core/stream.js index 819879506..fe8337b34 100644 --- a/src/core/stream.js +++ b/src/core/stream.js @@ -98,11 +98,18 @@ var StringStream = (function StringStreamClosure() { // super class for the decoding streams var DecodeStream = (function DecodeStreamClosure() { - function DecodeStream() { + function DecodeStream(maybeMinBufferLength) { this.pos = 0; this.bufferLength = 0; this.eof = false; this.buffer = null; + this.minBufferLength = 512; + if (maybeMinBufferLength) { + // Compute the first power of two that is as big as maybeMinBufferLength. + while (this.minBufferLength < maybeMinBufferLength) { + this.minBufferLength *= 2; + } + } } DecodeStream.prototype = { @@ -117,7 +124,7 @@ var DecodeStream = (function DecodeStreamClosure() { } else { current = 0; } - var size = 512; + var size = this.minBufferLength; while (size < requested) { size *= 2; } @@ -197,7 +204,7 @@ var DecodeStream = (function DecodeStreamClosure() { var StreamsSequenceStream = (function StreamsSequenceStreamClosure() { function StreamsSequenceStream(streams) { this.streams = streams; - DecodeStream.call(this); + DecodeStream.call(this, /* maybeLength = */ null); } StreamsSequenceStream.prototype = Object.create(DecodeStream.prototype); @@ -328,7 +335,7 @@ var FlateStream = (function FlateStreamClosure() { 0x50003, 0x50013, 0x5000b, 0x5001b, 0x50007, 0x50017, 0x5000f, 0x00000 ]), 5]; - function FlateStream(str) { + function FlateStream(str, maybeLength) { this.str = str; this.dict = str.dict; @@ -346,7 +353,7 @@ var FlateStream = (function FlateStreamClosure() { this.codeSize = 0; this.codeBuf = 0; - DecodeStream.call(this); + DecodeStream.call(this, maybeLength); } FlateStream.prototype = Object.create(DecodeStream.prototype); @@ -581,7 +588,7 @@ var FlateStream = (function FlateStreamClosure() { })(); var PredictorStream = (function PredictorStreamClosure() { - function PredictorStream(str, params) { + function PredictorStream(str, maybeLength, params) { var predictor = this.predictor = params.get('Predictor') || 1; if (predictor <= 1) @@ -604,7 +611,7 @@ var PredictorStream = (function PredictorStreamClosure() { this.pixBytes = (colors * bits + 7) >> 3; this.rowBytes = (columns * colors * bits + 7) >> 3; - DecodeStream.call(this); + DecodeStream.call(this, maybeLength); return this; } @@ -774,21 +781,22 @@ var PredictorStream = (function PredictorStreamClosure() { * DecodeStreams. */ var JpegStream = (function JpegStreamClosure() { - function JpegStream(stream, length, dict, xref) { + function JpegStream(stream, maybeLength, dict, xref) { // TODO: per poppler, some images may have 'junk' before that // need to be removed this.stream = stream; - this.length = length; + this.maybeLength = maybeLength; this.dict = dict; - DecodeStream.call(this); + DecodeStream.call(this, maybeLength); } JpegStream.prototype = Object.create(DecodeStream.prototype); Object.defineProperty(JpegStream.prototype, 'bytes', { get: function JpegStream_bytes() { - return shadow(this, 'bytes', this.stream.getBytes(this.length)); + // If this.maybeLength is null, we'll get the entire stream. + return shadow(this, 'bytes', this.stream.getBytes(this.maybeLength)); }, configurable: true }); @@ -841,19 +849,20 @@ var JpegStream = (function JpegStreamClosure() { * the stream behaves like all the other DecodeStreams. */ var JpxStream = (function JpxStreamClosure() { - function JpxStream(stream, length, dict) { + function JpxStream(stream, maybeLength, dict) { this.stream = stream; - this.length = length; + this.maybeLength = maybeLength; this.dict = dict; - DecodeStream.call(this); + DecodeStream.call(this, maybeLength); } JpxStream.prototype = Object.create(DecodeStream.prototype); Object.defineProperty(JpxStream.prototype, 'bytes', { get: function JpxStream_bytes() { - return shadow(this, 'bytes', this.stream.getBytes(this.length)); + // If this.maybeLength is null, we'll get the entire stream. + return shadow(this, 'bytes', this.stream.getBytes(this.maybeLength)); }, configurable: true }); @@ -948,19 +957,20 @@ var JpxStream = (function JpxStreamClosure() { * the stream behaves like all the other DecodeStreams. */ var Jbig2Stream = (function Jbig2StreamClosure() { - function Jbig2Stream(stream, length, dict) { + function Jbig2Stream(stream, maybeLength, dict) { this.stream = stream; - this.length = length; + this.maybeLength = maybeLength; this.dict = dict; - DecodeStream.call(this); + DecodeStream.call(this, maybeLength); } Jbig2Stream.prototype = Object.create(DecodeStream.prototype); Object.defineProperty(Jbig2Stream.prototype, 'bytes', { get: function Jbig2Stream_bytes() { - return shadow(this, 'bytes', this.stream.getBytes(this.length)); + // If this.maybeLength is null, we'll get the entire stream. + return shadow(this, 'bytes', this.stream.getBytes(this.maybeLength)); }, configurable: true }); @@ -1004,14 +1014,14 @@ var Jbig2Stream = (function Jbig2StreamClosure() { })(); var DecryptStream = (function DecryptStreamClosure() { - function DecryptStream(str, decrypt) { + function DecryptStream(str, maybeLength, decrypt) { this.str = str; this.dict = str.dict; this.decrypt = decrypt; this.nextChunk = null; this.initialized = false; - DecodeStream.call(this); + DecodeStream.call(this, maybeLength); } var chunkSize = 512; @@ -1048,12 +1058,17 @@ var DecryptStream = (function DecryptStreamClosure() { })(); var Ascii85Stream = (function Ascii85StreamClosure() { - function Ascii85Stream(str) { + function Ascii85Stream(str, maybeLength) { this.str = str; this.dict = str.dict; this.input = new Uint8Array(5); - DecodeStream.call(this); + // Most streams increase in size when decoded, but Ascii85 streams + // typically shrink by ~20%. + if (maybeLength) { + maybeLength = 0.8 * maybeLength; + } + DecodeStream.call(this, maybeLength); } Ascii85Stream.prototype = Object.create(DecodeStream.prototype); @@ -1121,13 +1136,18 @@ var Ascii85Stream = (function Ascii85StreamClosure() { })(); var AsciiHexStream = (function AsciiHexStreamClosure() { - function AsciiHexStream(str) { + function AsciiHexStream(str, maybeLength) { this.str = str; this.dict = str.dict; this.firstDigit = -1; - DecodeStream.call(this); + // Most streams increase in size when decoded, but AsciiHex streams shrink + // by 50%. + if (maybeLength) { + maybeLength = 0.5 * maybeLength; + } + DecodeStream.call(this, maybeLength); } AsciiHexStream.prototype = Object.create(DecodeStream.prototype); @@ -1178,11 +1198,11 @@ var AsciiHexStream = (function AsciiHexStreamClosure() { })(); var RunLengthStream = (function RunLengthStreamClosure() { - function RunLengthStream(str) { + function RunLengthStream(str, maybeLength) { this.str = str; this.dict = str.dict; - DecodeStream.call(this); + DecodeStream.call(this, maybeLength); } RunLengthStream.prototype = Object.create(DecodeStream.prototype); @@ -1650,7 +1670,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() { [2, 2], [2, 2], [2, 2], [2, 2] ]; - function CCITTFaxStream(str, params) { + function CCITTFaxStream(str, maybeLength, params) { this.str = str; this.dict = str.dict; @@ -1691,7 +1711,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() { this.eatBits(1); } - DecodeStream.call(this); + DecodeStream.call(this, maybeLength); } CCITTFaxStream.prototype = Object.create(DecodeStream.prototype); @@ -2186,7 +2206,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() { })(); var LZWStream = (function LZWStreamClosure() { - function LZWStream(str, earlyChange) { + function LZWStream(str, maybeLength, earlyChange) { this.str = str; this.dict = str.dict; this.cachedData = 0; @@ -2209,7 +2229,7 @@ var LZWStream = (function LZWStreamClosure() { } this.lzwState = lzwState; - DecodeStream.call(this); + DecodeStream.call(this, maybeLength); } LZWStream.prototype = Object.create(DecodeStream.prototype); diff --git a/test/unit/stream_spec.js b/test/unit/stream_spec.js index 05a42d740..443ae6a84 100644 --- a/test/unit/stream_spec.js +++ b/test/unit/stream_spec.js @@ -30,7 +30,7 @@ describe('stream', function() { var input = new Stream(new Uint8Array([2, 100, 3, 2, 1, 255, 2, 1, 255]), 0, 9, dict); - var predictor = new PredictorStream(input, dict); + var predictor = new PredictorStream(input, /* length = */ 9, dict); var result = predictor.getBytes(6); expect(result).toMatchTypedArray(