Merge pull request #4444 from nnethercote/min-length

Estimate the size of decoded streams in advance.
This commit is contained in:
Yury Delendik 2014-03-13 10:58:31 -05:00
commit 1c0e1cc591
5 changed files with 85 additions and 60 deletions

View File

@ -431,9 +431,9 @@ var CipherTransform = (function CipherTransformClosure() {
this.streamCipherConstructor = streamCipherConstructor; this.streamCipherConstructor = streamCipherConstructor;
} }
CipherTransform.prototype = { CipherTransform.prototype = {
createStream: function CipherTransform_createStream(stream) { createStream: function CipherTransform_createStream(stream, length) {
var cipher = new this.streamCipherConstructor(); var cipher = new this.streamCipherConstructor();
return new DecryptStream(stream, return new DecryptStream(stream, length,
function cipherTransformDecryptStream(data, finalize) { function cipherTransformDecryptStream(data, finalize) {
return cipher.decryptBlock(data, finalize); return cipher.decryptBlock(data, finalize);
} }

View File

@ -645,7 +645,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var preprocessor = new EvaluatorPreprocessor(stream, xref); var preprocessor = new EvaluatorPreprocessor(stream, xref);
var res = resources; var res = resources;
var chunk = ''; var chunkBuf = [];
var font = null; var font = null;
var charSpace = 0, wordSpace = 0; var charSpace = 0, wordSpace = 0;
var operation; var operation;
@ -694,37 +694,37 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var items = args[0]; var items = args[0];
for (var j = 0, jj = items.length; j < jj; j++) { for (var j = 0, jj = items.length; j < jj; j++) {
if (typeof items[j] === 'string') { if (typeof items[j] === 'string') {
chunk += fontCharsToUnicode(items[j], font); chunkBuf.push(fontCharsToUnicode(items[j], font));
} else if (items[j] < 0 && font.spaceWidth > 0) { } else if (items[j] < 0 && font.spaceWidth > 0) {
var fakeSpaces = -items[j] / font.spaceWidth; var fakeSpaces = -items[j] / font.spaceWidth;
if (fakeSpaces > MULTI_SPACE_FACTOR) { if (fakeSpaces > MULTI_SPACE_FACTOR) {
fakeSpaces = Math.round(fakeSpaces); fakeSpaces = Math.round(fakeSpaces);
while (fakeSpaces--) { while (fakeSpaces--) {
chunk += ' '; chunkBuf.push(' ');
} }
} else if (fakeSpaces > SPACE_FACTOR) { } else if (fakeSpaces > SPACE_FACTOR) {
chunk += ' '; chunkBuf.push(' ');
} }
} }
} }
break; break;
case OPS.showText: case OPS.showText:
chunk += fontCharsToUnicode(args[0], font); chunkBuf.push(fontCharsToUnicode(args[0], font));
break; break;
case OPS.nextLineShowText: case OPS.nextLineShowText:
// For search, adding a extra white space for line breaks would be // For search, adding a extra white space for line breaks would be
// better here, but that causes too much spaces in the // better here, but that causes too much spaces in the
// text-selection divs. // text-selection divs.
chunk += fontCharsToUnicode(args[0], font); chunkBuf.push(fontCharsToUnicode(args[0], font));
break; break;
case OPS.nextLineSetSpacingShowText: case OPS.nextLineSetSpacingShowText:
// Note comment in "'" // Note comment in "'"
chunk += fontCharsToUnicode(args[2], font); chunkBuf.push(fontCharsToUnicode(args[2], font));
break; break;
case OPS.paintXObject: case OPS.paintXObject:
// Set the chunk such that the following if won't add something // Set the chunk such that the following if won't add something
// to the state. // to the state.
chunk = ''; chunkBuf.length = 0;
if (args[0].code) { if (args[0].code) {
break; break;
@ -773,7 +773,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
break; break;
} // switch } // switch
if (chunk !== '') { if (chunkBuf.length > 0) {
var chunk = chunkBuf.join('');
var bidiResult = PDFJS.bidi(chunk, -1, font.vertical); var bidiResult = PDFJS.bidi(chunk, -1, font.vertical);
var bidiText = { var bidiText = {
str: bidiResult.str, str: bidiResult.str,
@ -795,7 +796,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
bidiText.size = fontHeight; bidiText.size = fontHeight;
bidiTexts.push(bidiText); bidiTexts.push(bidiText);
chunk = ''; chunkBuf.length = 0;
} }
} // while } // while

View File

@ -170,7 +170,7 @@ var Parser = (function ParserClosure() {
var length = (stream.pos - 4) - startPos; var length = (stream.pos - 4) - startPos;
var imageStream = stream.makeSubStream(startPos, length, dict); var imageStream = stream.makeSubStream(startPos, length, dict);
if (cipherTransform) if (cipherTransform)
imageStream = cipherTransform.createStream(imageStream); imageStream = cipherTransform.createStream(imageStream, length);
imageStream = this.filter(imageStream, dict, length); imageStream = this.filter(imageStream, dict, length);
imageStream.dict = dict; imageStream.dict = dict;
@ -251,7 +251,7 @@ var Parser = (function ParserClosure() {
stream = stream.makeSubStream(pos, length, dict); stream = stream.makeSubStream(pos, length, dict);
if (cipherTransform) if (cipherTransform)
stream = cipherTransform.createStream(stream); stream = cipherTransform.createStream(stream, length);
stream = this.filter(stream, dict, length); stream = this.filter(stream, dict, length);
stream.dict = dict; stream.dict = dict;
return stream; return stream;
@ -261,6 +261,8 @@ var Parser = (function ParserClosure() {
var params = this.fetchIfRef(dict.get('DecodeParms', 'DP')); var params = this.fetchIfRef(dict.get('DecodeParms', 'DP'));
if (isName(filter)) if (isName(filter))
return this.makeFilter(stream, filter.name, length, params); return this.makeFilter(stream, filter.name, length, params);
var maybeLength = length;
if (isArray(filter)) { if (isArray(filter)) {
var filterArray = filter; var filterArray = filter;
var paramsArray = params; var paramsArray = params;
@ -272,22 +274,23 @@ var Parser = (function ParserClosure() {
params = null; params = null;
if (isArray(paramsArray) && (i in paramsArray)) if (isArray(paramsArray) && (i in paramsArray))
params = paramsArray[i]; params = paramsArray[i];
stream = this.makeFilter(stream, filter.name, length, params); stream = this.makeFilter(stream, filter.name, maybeLength, params);
// after the first stream the length variable is invalid // after the first stream the length variable is invalid
length = null; maybeLength = null;
} }
} }
return stream; return stream;
}, },
makeFilter: function Parser_makeFilter(stream, name, length, params) { makeFilter: function Parser_makeFilter(stream, name, maybeLength, params) {
if (stream.dict.get('Length') === 0) { if (stream.dict.get('Length') === 0) {
return new NullStream(stream); return new NullStream(stream);
} }
if (name == 'FlateDecode' || name == 'Fl') { if (name == 'FlateDecode' || name == 'Fl') {
if (params) { if (params) {
return new PredictorStream(new FlateStream(stream), params); return new PredictorStream(new FlateStream(stream, maybeLength),
maybeLength, params);
} }
return new FlateStream(stream); return new FlateStream(stream, maybeLength);
} }
if (name == 'LZWDecode' || name == 'LZW') { if (name == 'LZWDecode' || name == 'LZW') {
var earlyChange = 1; var earlyChange = 1;
@ -295,30 +298,31 @@ var Parser = (function ParserClosure() {
if (params.has('EarlyChange')) if (params.has('EarlyChange'))
earlyChange = params.get('EarlyChange'); earlyChange = params.get('EarlyChange');
return new PredictorStream( return new PredictorStream(
new LZWStream(stream, earlyChange), params); new LZWStream(stream, maybeLength, earlyChange),
maybeLength, params);
} }
return new LZWStream(stream, earlyChange); return new LZWStream(stream, maybeLength, earlyChange);
} }
if (name == 'DCTDecode' || name == 'DCT') { if (name == 'DCTDecode' || name == 'DCT') {
return new JpegStream(stream, length, stream.dict, this.xref); return new JpegStream(stream, maybeLength, stream.dict, this.xref);
} }
if (name == 'JPXDecode' || name == 'JPX') { if (name == 'JPXDecode' || name == 'JPX') {
return new JpxStream(stream, length, stream.dict); return new JpxStream(stream, maybeLength, stream.dict);
} }
if (name == 'ASCII85Decode' || name == 'A85') { if (name == 'ASCII85Decode' || name == 'A85') {
return new Ascii85Stream(stream); return new Ascii85Stream(stream, maybeLength);
} }
if (name == 'ASCIIHexDecode' || name == 'AHx') { if (name == 'ASCIIHexDecode' || name == 'AHx') {
return new AsciiHexStream(stream); return new AsciiHexStream(stream, maybeLength);
} }
if (name == 'CCITTFaxDecode' || name == 'CCF') { if (name == 'CCITTFaxDecode' || name == 'CCF') {
return new CCITTFaxStream(stream, params); return new CCITTFaxStream(stream, maybeLength, params);
} }
if (name == 'RunLengthDecode' || name == 'RL') { if (name == 'RunLengthDecode' || name == 'RL') {
return new RunLengthStream(stream); return new RunLengthStream(stream, maybeLength);
} }
if (name == 'JBIG2Decode') { if (name == 'JBIG2Decode') {
return new Jbig2Stream(stream, length, stream.dict); return new Jbig2Stream(stream, maybeLength, stream.dict);
} }
warn('filter "' + name + '" not supported yet'); warn('filter "' + name + '" not supported yet');
return stream; return stream;

View File

@ -98,11 +98,18 @@ var StringStream = (function StringStreamClosure() {
// super class for the decoding streams // super class for the decoding streams
var DecodeStream = (function DecodeStreamClosure() { var DecodeStream = (function DecodeStreamClosure() {
function DecodeStream() { function DecodeStream(maybeMinBufferLength) {
this.pos = 0; this.pos = 0;
this.bufferLength = 0; this.bufferLength = 0;
this.eof = false; this.eof = false;
this.buffer = null; this.buffer = null;
this.minBufferLength = 512;
if (maybeMinBufferLength) {
// Compute the first power of two that is as big as maybeMinBufferLength.
while (this.minBufferLength < maybeMinBufferLength) {
this.minBufferLength *= 2;
}
}
} }
DecodeStream.prototype = { DecodeStream.prototype = {
@ -117,7 +124,7 @@ var DecodeStream = (function DecodeStreamClosure() {
} else { } else {
current = 0; current = 0;
} }
var size = 512; var size = this.minBufferLength;
while (size < requested) { while (size < requested) {
size *= 2; size *= 2;
} }
@ -197,7 +204,7 @@ var DecodeStream = (function DecodeStreamClosure() {
var StreamsSequenceStream = (function StreamsSequenceStreamClosure() { var StreamsSequenceStream = (function StreamsSequenceStreamClosure() {
function StreamsSequenceStream(streams) { function StreamsSequenceStream(streams) {
this.streams = streams; this.streams = streams;
DecodeStream.call(this); DecodeStream.call(this, /* maybeLength = */ null);
} }
StreamsSequenceStream.prototype = Object.create(DecodeStream.prototype); StreamsSequenceStream.prototype = Object.create(DecodeStream.prototype);
@ -328,7 +335,7 @@ var FlateStream = (function FlateStreamClosure() {
0x50003, 0x50013, 0x5000b, 0x5001b, 0x50007, 0x50017, 0x5000f, 0x00000 0x50003, 0x50013, 0x5000b, 0x5001b, 0x50007, 0x50017, 0x5000f, 0x00000
]), 5]; ]), 5];
function FlateStream(str) { function FlateStream(str, maybeLength) {
this.str = str; this.str = str;
this.dict = str.dict; this.dict = str.dict;
@ -346,7 +353,7 @@ var FlateStream = (function FlateStreamClosure() {
this.codeSize = 0; this.codeSize = 0;
this.codeBuf = 0; this.codeBuf = 0;
DecodeStream.call(this); DecodeStream.call(this, maybeLength);
} }
FlateStream.prototype = Object.create(DecodeStream.prototype); FlateStream.prototype = Object.create(DecodeStream.prototype);
@ -581,7 +588,7 @@ var FlateStream = (function FlateStreamClosure() {
})(); })();
var PredictorStream = (function PredictorStreamClosure() { var PredictorStream = (function PredictorStreamClosure() {
function PredictorStream(str, params) { function PredictorStream(str, maybeLength, params) {
var predictor = this.predictor = params.get('Predictor') || 1; var predictor = this.predictor = params.get('Predictor') || 1;
if (predictor <= 1) if (predictor <= 1)
@ -604,7 +611,7 @@ var PredictorStream = (function PredictorStreamClosure() {
this.pixBytes = (colors * bits + 7) >> 3; this.pixBytes = (colors * bits + 7) >> 3;
this.rowBytes = (columns * colors * bits + 7) >> 3; this.rowBytes = (columns * colors * bits + 7) >> 3;
DecodeStream.call(this); DecodeStream.call(this, maybeLength);
return this; return this;
} }
@ -774,21 +781,22 @@ var PredictorStream = (function PredictorStreamClosure() {
* DecodeStreams. * DecodeStreams.
*/ */
var JpegStream = (function JpegStreamClosure() { var JpegStream = (function JpegStreamClosure() {
function JpegStream(stream, length, dict, xref) { function JpegStream(stream, maybeLength, dict, xref) {
// TODO: per poppler, some images may have 'junk' before that // TODO: per poppler, some images may have 'junk' before that
// need to be removed // need to be removed
this.stream = stream; this.stream = stream;
this.length = length; this.maybeLength = maybeLength;
this.dict = dict; this.dict = dict;
DecodeStream.call(this); DecodeStream.call(this, maybeLength);
} }
JpegStream.prototype = Object.create(DecodeStream.prototype); JpegStream.prototype = Object.create(DecodeStream.prototype);
Object.defineProperty(JpegStream.prototype, 'bytes', { Object.defineProperty(JpegStream.prototype, 'bytes', {
get: function JpegStream_bytes() { get: function JpegStream_bytes() {
return shadow(this, 'bytes', this.stream.getBytes(this.length)); // If this.maybeLength is null, we'll get the entire stream.
return shadow(this, 'bytes', this.stream.getBytes(this.maybeLength));
}, },
configurable: true configurable: true
}); });
@ -841,19 +849,20 @@ var JpegStream = (function JpegStreamClosure() {
* the stream behaves like all the other DecodeStreams. * the stream behaves like all the other DecodeStreams.
*/ */
var JpxStream = (function JpxStreamClosure() { var JpxStream = (function JpxStreamClosure() {
function JpxStream(stream, length, dict) { function JpxStream(stream, maybeLength, dict) {
this.stream = stream; this.stream = stream;
this.length = length; this.maybeLength = maybeLength;
this.dict = dict; this.dict = dict;
DecodeStream.call(this); DecodeStream.call(this, maybeLength);
} }
JpxStream.prototype = Object.create(DecodeStream.prototype); JpxStream.prototype = Object.create(DecodeStream.prototype);
Object.defineProperty(JpxStream.prototype, 'bytes', { Object.defineProperty(JpxStream.prototype, 'bytes', {
get: function JpxStream_bytes() { get: function JpxStream_bytes() {
return shadow(this, 'bytes', this.stream.getBytes(this.length)); // If this.maybeLength is null, we'll get the entire stream.
return shadow(this, 'bytes', this.stream.getBytes(this.maybeLength));
}, },
configurable: true configurable: true
}); });
@ -948,19 +957,20 @@ var JpxStream = (function JpxStreamClosure() {
* the stream behaves like all the other DecodeStreams. * the stream behaves like all the other DecodeStreams.
*/ */
var Jbig2Stream = (function Jbig2StreamClosure() { var Jbig2Stream = (function Jbig2StreamClosure() {
function Jbig2Stream(stream, length, dict) { function Jbig2Stream(stream, maybeLength, dict) {
this.stream = stream; this.stream = stream;
this.length = length; this.maybeLength = maybeLength;
this.dict = dict; this.dict = dict;
DecodeStream.call(this); DecodeStream.call(this, maybeLength);
} }
Jbig2Stream.prototype = Object.create(DecodeStream.prototype); Jbig2Stream.prototype = Object.create(DecodeStream.prototype);
Object.defineProperty(Jbig2Stream.prototype, 'bytes', { Object.defineProperty(Jbig2Stream.prototype, 'bytes', {
get: function Jbig2Stream_bytes() { get: function Jbig2Stream_bytes() {
return shadow(this, 'bytes', this.stream.getBytes(this.length)); // If this.maybeLength is null, we'll get the entire stream.
return shadow(this, 'bytes', this.stream.getBytes(this.maybeLength));
}, },
configurable: true configurable: true
}); });
@ -1004,14 +1014,14 @@ var Jbig2Stream = (function Jbig2StreamClosure() {
})(); })();
var DecryptStream = (function DecryptStreamClosure() { var DecryptStream = (function DecryptStreamClosure() {
function DecryptStream(str, decrypt) { function DecryptStream(str, maybeLength, decrypt) {
this.str = str; this.str = str;
this.dict = str.dict; this.dict = str.dict;
this.decrypt = decrypt; this.decrypt = decrypt;
this.nextChunk = null; this.nextChunk = null;
this.initialized = false; this.initialized = false;
DecodeStream.call(this); DecodeStream.call(this, maybeLength);
} }
var chunkSize = 512; var chunkSize = 512;
@ -1048,12 +1058,17 @@ var DecryptStream = (function DecryptStreamClosure() {
})(); })();
var Ascii85Stream = (function Ascii85StreamClosure() { var Ascii85Stream = (function Ascii85StreamClosure() {
function Ascii85Stream(str) { function Ascii85Stream(str, maybeLength) {
this.str = str; this.str = str;
this.dict = str.dict; this.dict = str.dict;
this.input = new Uint8Array(5); this.input = new Uint8Array(5);
DecodeStream.call(this); // Most streams increase in size when decoded, but Ascii85 streams
// typically shrink by ~20%.
if (maybeLength) {
maybeLength = 0.8 * maybeLength;
}
DecodeStream.call(this, maybeLength);
} }
Ascii85Stream.prototype = Object.create(DecodeStream.prototype); Ascii85Stream.prototype = Object.create(DecodeStream.prototype);
@ -1121,13 +1136,18 @@ var Ascii85Stream = (function Ascii85StreamClosure() {
})(); })();
var AsciiHexStream = (function AsciiHexStreamClosure() { var AsciiHexStream = (function AsciiHexStreamClosure() {
function AsciiHexStream(str) { function AsciiHexStream(str, maybeLength) {
this.str = str; this.str = str;
this.dict = str.dict; this.dict = str.dict;
this.firstDigit = -1; this.firstDigit = -1;
DecodeStream.call(this); // Most streams increase in size when decoded, but AsciiHex streams shrink
// by 50%.
if (maybeLength) {
maybeLength = 0.5 * maybeLength;
}
DecodeStream.call(this, maybeLength);
} }
AsciiHexStream.prototype = Object.create(DecodeStream.prototype); AsciiHexStream.prototype = Object.create(DecodeStream.prototype);
@ -1178,11 +1198,11 @@ var AsciiHexStream = (function AsciiHexStreamClosure() {
})(); })();
var RunLengthStream = (function RunLengthStreamClosure() { var RunLengthStream = (function RunLengthStreamClosure() {
function RunLengthStream(str) { function RunLengthStream(str, maybeLength) {
this.str = str; this.str = str;
this.dict = str.dict; this.dict = str.dict;
DecodeStream.call(this); DecodeStream.call(this, maybeLength);
} }
RunLengthStream.prototype = Object.create(DecodeStream.prototype); RunLengthStream.prototype = Object.create(DecodeStream.prototype);
@ -1650,7 +1670,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() {
[2, 2], [2, 2], [2, 2], [2, 2] [2, 2], [2, 2], [2, 2], [2, 2]
]; ];
function CCITTFaxStream(str, params) { function CCITTFaxStream(str, maybeLength, params) {
this.str = str; this.str = str;
this.dict = str.dict; this.dict = str.dict;
@ -1691,7 +1711,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() {
this.eatBits(1); this.eatBits(1);
} }
DecodeStream.call(this); DecodeStream.call(this, maybeLength);
} }
CCITTFaxStream.prototype = Object.create(DecodeStream.prototype); CCITTFaxStream.prototype = Object.create(DecodeStream.prototype);
@ -2186,7 +2206,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() {
})(); })();
var LZWStream = (function LZWStreamClosure() { var LZWStream = (function LZWStreamClosure() {
function LZWStream(str, earlyChange) { function LZWStream(str, maybeLength, earlyChange) {
this.str = str; this.str = str;
this.dict = str.dict; this.dict = str.dict;
this.cachedData = 0; this.cachedData = 0;
@ -2209,7 +2229,7 @@ var LZWStream = (function LZWStreamClosure() {
} }
this.lzwState = lzwState; this.lzwState = lzwState;
DecodeStream.call(this); DecodeStream.call(this, maybeLength);
} }
LZWStream.prototype = Object.create(DecodeStream.prototype); LZWStream.prototype = Object.create(DecodeStream.prototype);

View File

@ -30,7 +30,7 @@ describe('stream', function() {
var input = new Stream(new Uint8Array([2, 100, 3, 2, 1, 255, 2, 1, 255]), var input = new Stream(new Uint8Array([2, 100, 3, 2, 1, 255, 2, 1, 255]),
0, 9, dict); 0, 9, dict);
var predictor = new PredictorStream(input, dict); var predictor = new PredictorStream(input, /* length = */ 9, dict);
var result = predictor.getBytes(6); var result = predictor.getBytes(6);
expect(result).toMatchTypedArray( expect(result).toMatchTypedArray(