From 3f031f69c270bedd3c404e752dc12a72324aca99 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald <jonas.jenwald@gmail.com> Date: Tue, 7 Jan 2020 19:59:16 +0100 Subject: [PATCH 1/2] Move additional worker-thread only functions from `src/shared/util.js` and into a `src/core/core_utils.js` instead This moves the `log2`, `readInt8`, `readUint16`, `readUint32`, and `isSpace` functions since they are only used in the worker-thread. --- src/core/core_utils.js | 38 ++++++++++++++++++++++++++++++++++++ src/core/document.js | 2 +- src/core/fonts.js | 4 +--- src/core/jbig2.js | 10 ++-------- src/core/jpx.js | 10 ++-------- src/core/parser.js | 3 +-- src/core/ps_parser.js | 3 ++- src/core/stream.js | 8 ++------ src/core/type1_parser.js | 3 ++- src/shared/util.js | 38 ------------------------------------ test/unit/core_utils_spec.js | 31 +++++++++++++++++++++++++++++ test/unit/util_spec.js | 31 ----------------------------- 12 files changed, 82 insertions(+), 99 deletions(-) diff --git a/src/core/core_utils.js b/src/core/core_utils.js index f754e68c7..1be5c978a 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -132,6 +132,39 @@ function toRomanNumerals(number, lowerCase = false) { return lowerCase ? romanStr.toLowerCase() : romanStr; } +// Calculate the base 2 logarithm of the number `x`. This differs from the +// native function in the sense that it returns the ceiling value and that it +// returns 0 instead of `Infinity`/`NaN` for `x` values smaller than/equal to 0. +function log2(x) { + if (x <= 0) { + return 0; + } + return Math.ceil(Math.log2(x)); +} + +function readInt8(data, offset) { + return (data[offset] << 24) >> 24; +} + +function readUint16(data, offset) { + return (data[offset] << 8) | data[offset + 1]; +} + +function readUint32(data, offset) { + return ( + ((data[offset] << 24) | + (data[offset + 1] << 16) | + (data[offset + 2] << 8) | + data[offset + 3]) >>> + 0 + ); +} + +// Checks if ch is one of the following characters: SPACE, TAB, CR or LF. +function isSpace(ch) { + return ch === 0x20 || ch === 0x09 || ch === 0x0d || ch === 0x0a; +} + export { getLookupTableFactory, MissingDataException, @@ -139,4 +172,9 @@ export { XRefParseException, getInheritableProperty, toRomanNumerals, + log2, + readInt8, + readUint16, + readUint32, + isSpace, }; diff --git a/src/core/document.js b/src/core/document.js index 420fa00c3..b58f8f352 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -23,7 +23,6 @@ import { isArrayEqual, isBool, isNum, - isSpace, isString, OPS, shadow, @@ -36,6 +35,7 @@ import { Catalog, ObjectLoader, XRef } from "./obj.js"; import { Dict, isDict, isName, isStream, Ref } from "./primitives.js"; import { getInheritableProperty, + isSpace, MissingDataException, XRefEntryException, XRefParseException, diff --git a/src/core/fonts.js b/src/core/fonts.js index 6cb730728..1aa2ee32a 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -21,8 +21,6 @@ import { FormatError, info, isNum, - isSpace, - readUint32, shadow, string32, unreachable, @@ -60,9 +58,9 @@ import { getUnicodeRangeFor, mapSpecialUnicodeValues, } from "./unicode.js"; +import { isSpace, MissingDataException, readUint32 } from "./core_utils.js"; import { FontRendererFactory } from "./font_renderer.js"; import { IdentityCMap } from "./cmap.js"; -import { MissingDataException } from "./core_utils.js"; import { Stream } from "./stream.js"; import { Type1Parser } from "./type1_parser.js"; diff --git a/src/core/jbig2.js b/src/core/jbig2.js index 2eafb1e73..b5b8f6179 100644 --- a/src/core/jbig2.js +++ b/src/core/jbig2.js @@ -13,14 +13,8 @@ * limitations under the License. */ -import { - BaseException, - log2, - readInt8, - readUint16, - readUint32, - shadow, -} from "../shared/util.js"; +import { BaseException, shadow } from "../shared/util.js"; +import { log2, readInt8, readUint16, readUint32 } from "./core_utils.js"; import { ArithmeticDecoder } from "./arithmetic_decoder.js"; import { CCITTFaxDecoder } from "./ccitt.js"; diff --git a/src/core/jpx.js b/src/core/jpx.js index 513d36b31..3e135be8d 100644 --- a/src/core/jpx.js +++ b/src/core/jpx.js @@ -13,14 +13,8 @@ * limitations under the License. */ -import { - BaseException, - info, - log2, - readUint16, - readUint32, - warn, -} from "../shared/util.js"; +import { BaseException, info, warn } from "../shared/util.js"; +import { log2, readUint16, readUint32 } from "./core_utils.js"; import { ArithmeticDecoder } from "./arithmetic_decoder.js"; class JpxError extends BaseException { diff --git a/src/core/parser.js b/src/core/parser.js index 3c9862962..bd92cf4ab 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -29,7 +29,6 @@ import { FormatError, info, isNum, - isSpace, StreamType, warn, } from "../shared/util.js"; @@ -44,11 +43,11 @@ import { Name, Ref, } from "./primitives.js"; +import { isSpace, MissingDataException } from "./core_utils.js"; import { CCITTFaxStream } from "./ccitt_stream.js"; import { Jbig2Stream } from "./jbig2_stream.js"; import { JpegStream } from "./jpeg_stream.js"; import { JpxStream } from "./jpx_stream.js"; -import { MissingDataException } from "./core_utils.js"; const MAX_LENGTH_TO_CACHE = 1000; const MAX_ADLER32_LENGTH = 5552; diff --git a/src/core/ps_parser.js b/src/core/ps_parser.js index 7cc6b3e82..b3e873fcd 100644 --- a/src/core/ps_parser.js +++ b/src/core/ps_parser.js @@ -14,8 +14,9 @@ */ /* eslint no-var: error */ -import { FormatError, isSpace, shadow } from "../shared/util.js"; +import { FormatError, shadow } from "../shared/util.js"; import { EOF } from "./primitives.js"; +import { isSpace } from "./core_utils.js"; class PostScriptParser { constructor(lexer) { diff --git a/src/core/stream.js b/src/core/stream.js index 253b261dd..b0b213011 100644 --- a/src/core/stream.js +++ b/src/core/stream.js @@ -19,13 +19,9 @@ * license. */ -import { - FormatError, - isSpace, - stringToBytes, - unreachable, -} from "../shared/util.js"; +import { FormatError, stringToBytes, unreachable } from "../shared/util.js"; import { isDict } from "./primitives.js"; +import { isSpace } from "./core_utils.js"; var Stream = (function StreamClosure() { function Stream(arrayBuffer, start, length, dict) { diff --git a/src/core/type1_parser.js b/src/core/type1_parser.js index fb84c984d..ad35fa768 100644 --- a/src/core/type1_parser.js +++ b/src/core/type1_parser.js @@ -13,9 +13,10 @@ * limitations under the License. */ -import { isSpace, warn } from "../shared/util.js"; import { getEncoding } from "./encodings.js"; +import { isSpace } from "./core_utils.js"; import { Stream } from "./stream.js"; +import { warn } from "../shared/util.js"; // Hinting is currently disabled due to unknown problems on windows // in tracemonkey and various other pdfs with type1 fonts. diff --git a/src/shared/util.js b/src/shared/util.js index e32617c0b..6df2ee036 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -547,34 +547,6 @@ function string32(value) { ); } -// Calculate the base 2 logarithm of the number `x`. This differs from the -// native function in the sense that it returns the ceiling value and that it -// returns 0 instead of `Infinity`/`NaN` for `x` values smaller than/equal to 0. -function log2(x) { - if (x <= 0) { - return 0; - } - return Math.ceil(Math.log2(x)); -} - -function readInt8(data, start) { - return (data[start] << 24) >> 24; -} - -function readUint16(data, offset) { - return (data[offset] << 8) | data[offset + 1]; -} - -function readUint32(data, offset) { - return ( - ((data[offset] << 24) | - (data[offset + 1] << 16) | - (data[offset + 2] << 8) | - data[offset + 3]) >>> - 0 - ); -} - // Lazy test the endianness of the platform // NOTE: This will be 'true' for simulated TypedArrays function isLittleEndian() { @@ -835,11 +807,6 @@ function isArrayEqual(arr1, arr2) { }); } -// Checks if ch is one of the following characters: SPACE, TAB, CR or LF. -function isSpace(ch) { - return ch === 0x20 || ch === 0x09 || ch === 0x0d || ch === 0x0a; -} - /** * Promise Capability object. * @@ -949,15 +916,10 @@ export { isEmptyObj, isNum, isString, - isSpace, isSameOrigin, createValidAbsoluteUrl, isLittleEndian, isEvalSupported, - log2, - readInt8, - readUint16, - readUint32, removeNullCharacters, setVerbosityLevel, shadow, diff --git a/test/unit/core_utils_spec.js b/test/unit/core_utils_spec.js index a4184c9f8..9e0590847 100644 --- a/test/unit/core_utils_spec.js +++ b/test/unit/core_utils_spec.js @@ -16,6 +16,8 @@ import { Dict, Ref } from "../../src/core/primitives.js"; import { getInheritableProperty, + isSpace, + log2, toRomanNumerals, } from "../../src/core/core_utils.js"; import { XRefMock } from "./test_utils.js"; @@ -180,4 +182,33 @@ describe("core_utils", function() { expect(toRomanNumerals(2019, /* lowercase = */ true)).toEqual("mmxix"); }); }); + + describe("log2", function() { + it("handles values smaller than/equal to zero", function() { + expect(log2(0)).toEqual(0); + expect(log2(-1)).toEqual(0); + }); + + it("handles values larger than zero", function() { + expect(log2(1)).toEqual(0); + expect(log2(2)).toEqual(1); + expect(log2(3)).toEqual(2); + expect(log2(3.14)).toEqual(2); + }); + }); + + describe("isSpace", function() { + it("handles space characters", function() { + expect(isSpace(0x20)).toEqual(true); + expect(isSpace(0x09)).toEqual(true); + expect(isSpace(0x0d)).toEqual(true); + expect(isSpace(0x0a)).toEqual(true); + }); + + it("handles non-space characters", function() { + expect(isSpace(0x0b)).toEqual(false); + expect(isSpace(null)).toEqual(false); + expect(isSpace(undefined)).toEqual(false); + }); + }); }); diff --git a/test/unit/util_spec.js b/test/unit/util_spec.js index 38f83632e..99fb3fe97 100644 --- a/test/unit/util_spec.js +++ b/test/unit/util_spec.js @@ -22,9 +22,7 @@ import { isEmptyObj, isNum, isSameOrigin, - isSpace, isString, - log2, removeNullCharacters, string32, stringToBytes, @@ -118,21 +116,6 @@ describe("util", function() { }); }); - describe("isSpace", function() { - it("handles space characters", function() { - expect(isSpace(0x20)).toEqual(true); - expect(isSpace(0x09)).toEqual(true); - expect(isSpace(0x0d)).toEqual(true); - expect(isSpace(0x0a)).toEqual(true); - }); - - it("handles non-space characters", function() { - expect(isSpace(0x0b)).toEqual(false); - expect(isSpace(null)).toEqual(false); - expect(isSpace(undefined)).toEqual(false); - }); - }); - describe("isString", function() { it("handles string values", function() { expect(isString("foo")).toEqual(true); @@ -147,20 +130,6 @@ describe("util", function() { }); }); - describe("log2", function() { - it("handles values smaller than/equal to zero", function() { - expect(log2(0)).toEqual(0); - expect(log2(-1)).toEqual(0); - }); - - it("handles values larger than zero", function() { - expect(log2(1)).toEqual(0); - expect(log2(2)).toEqual(1); - expect(log2(3)).toEqual(2); - expect(log2(3.14)).toEqual(2); - }); - }); - describe("string32", function() { it("converts unsigned 32-bit integers to strings", function() { expect(string32(0x74727565)).toEqual("true"); From 188b320e182bc9d81b2daa5176f279fadf3c03d6 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald <jonas.jenwald@gmail.com> Date: Tue, 7 Jan 2020 20:22:14 +0100 Subject: [PATCH 2/2] Convert `src/core/jpg.js` to use the `readUint16` helper function in `src/core/core_utils.js`, rather than re-implementing it twice The other image decoders, i.e. the JBIG2 and JPEG 2000 ones, are using the common helper function `readUint16`. Most likely, the only reason that the JPEG decoder is doing it this way is because it originated *outside* of the PDF.js library. Hence we can simply re-factor `src/core/jpg.js` to use the common `readUint16` helper function, which is especially nice given that the functionality was essentially *duplicated* in the code. --- src/core/jpg.js | 65 +++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/src/core/jpg.js b/src/core/jpg.js index a17948e89..12d732814 100644 --- a/src/core/jpg.js +++ b/src/core/jpg.js @@ -14,6 +14,7 @@ */ import { assert, BaseException, warn } from "../shared/util.js"; +import { readUint16 } from "./core_utils.js"; class JpegError extends BaseException { constructor(msg) { @@ -148,8 +149,10 @@ var JpegImage = (function JpegImageClosure() { var nextByte = data[offset++]; if (nextByte) { if (nextByte === 0xdc && parseDNLMarker) { - offset += 2; // Skip data length. - const scanLines = (data[offset++] << 8) | data[offset++]; + offset += 2; // Skip marker length. + + const scanLines = readUint16(data, offset); + offset += 2; if (scanLines > 0 && scanLines !== frame.scanLines) { throw new DNLMarkerError( "Found DNL marker (0xFFDC) while parsing scan data", @@ -706,17 +709,13 @@ var JpegImage = (function JpegImageClosure() { } function findNextFileMarker(data, currentPos, startPos = currentPos) { - function peekUint16(pos) { - return (data[pos] << 8) | data[pos + 1]; - } - const maxPos = data.length - 1; var newPos = startPos < currentPos ? startPos : currentPos; if (currentPos >= maxPos) { return null; // Don't attempt to read non-existent data and just return. } - var currentMarker = peekUint16(currentPos); + var currentMarker = readUint16(data, currentPos); if (currentMarker >= 0xffc0 && currentMarker <= 0xfffe) { return { invalid: null, @@ -724,12 +723,12 @@ var JpegImage = (function JpegImageClosure() { offset: currentPos, }; } - var newMarker = peekUint16(newPos); + var newMarker = readUint16(data, newPos); while (!(newMarker >= 0xffc0 && newMarker <= 0xfffe)) { if (++newPos >= maxPos) { return null; // Don't attempt to read non-existent data and just return. } - newMarker = peekUint16(newPos); + newMarker = readUint16(data, newPos); } return { invalid: currentMarker.toString(16), @@ -740,15 +739,10 @@ var JpegImage = (function JpegImageClosure() { JpegImage.prototype = { parse(data, { dnlScanLines = null } = {}) { - function readUint16() { - var value = (data[offset] << 8) | data[offset + 1]; - offset += 2; - return value; - } - function readDataBlock() { - var length = readUint16(); - var endOffset = offset + length - 2; + const length = readUint16(data, offset); + offset += 2; + let endOffset = offset + length - 2; var fileMarker = findNextFileMarker(data, endOffset, offset); if (fileMarker && fileMarker.invalid) { @@ -796,12 +790,15 @@ var JpegImage = (function JpegImageClosure() { var quantizationTables = []; var huffmanTablesAC = [], huffmanTablesDC = []; - var fileMarker = readUint16(); + + let fileMarker = readUint16(data, offset); + offset += 2; if (fileMarker !== /* SOI (Start of Image) = */ 0xffd8) { throw new JpegError("SOI not found"); } + fileMarker = readUint16(data, offset); + offset += 2; - fileMarker = readUint16(); markerLoop: while (fileMarker !== /* EOI (End of Image) = */ 0xffd9) { var i, j, l; switch (fileMarker) { @@ -868,7 +865,8 @@ var JpegImage = (function JpegImageClosure() { break; case 0xffdb: // DQT (Define Quantization Tables) - var quantizationTablesLength = readUint16(); + const quantizationTablesLength = readUint16(data, offset); + offset += 2; var quantizationTablesEnd = quantizationTablesLength + offset - 2; var z; while (offset < quantizationTablesEnd) { @@ -884,7 +882,8 @@ var JpegImage = (function JpegImageClosure() { // 16 bit values for (j = 0; j < 64; j++) { z = dctZigZag[j]; - tableData[z] = readUint16(); + tableData[z] = readUint16(data, offset); + offset += 2; } } else { throw new JpegError("DQT - invalid table spec"); @@ -899,14 +898,17 @@ var JpegImage = (function JpegImageClosure() { if (frame) { throw new JpegError("Only single frame JPEGs supported"); } - readUint16(); // skip data length + offset += 2; // Skip marker length. + frame = {}; frame.extended = fileMarker === 0xffc1; frame.progressive = fileMarker === 0xffc2; frame.precision = data[offset++]; - const sofScanLines = readUint16(); + const sofScanLines = readUint16(data, offset); + offset += 2; frame.scanLines = dnlScanLines || sofScanLines; - frame.samplesPerLine = readUint16(); + frame.samplesPerLine = readUint16(data, offset); + offset += 2; frame.components = []; frame.componentIds = {}; var componentsCount = data[offset++], @@ -939,7 +941,8 @@ var JpegImage = (function JpegImageClosure() { break; case 0xffc4: // DHT (Define Huffman Tables) - var huffmanLength = readUint16(); + const huffmanLength = readUint16(data, offset); + offset += 2; for (i = 2; i < huffmanLength; ) { var huffmanTableSpec = data[offset++]; var codeLengths = new Uint8Array(16); @@ -960,8 +963,10 @@ var JpegImage = (function JpegImageClosure() { break; case 0xffdd: // DRI (Define Restart Interval) - readUint16(); // skip data length - resetInterval = readUint16(); + offset += 2; // Skip marker length. + + resetInterval = readUint16(data, offset); + offset += 2; break; case 0xffda: // SOS (Start of Scan) @@ -971,7 +976,8 @@ var JpegImage = (function JpegImageClosure() { // parse DNL markers during re-parsing of the JPEG scan data. const parseDNLMarker = ++numSOSMarkers === 1 && !dnlScanLines; - readUint16(); // scanLength + offset += 2; // Skip marker length. + var selectorsCount = data[offset++]; var components = [], component; @@ -1055,7 +1061,8 @@ var JpegImage = (function JpegImageClosure() { "JpegImage.parse - unknown marker: " + fileMarker.toString(16) ); } - fileMarker = readUint16(); + fileMarker = readUint16(data, offset); + offset += 2; } this.width = frame.samplesPerLine;