diff --git a/make.js b/make.js index 62b71ca26..6ba805b02 100644 --- a/make.js +++ b/make.js @@ -349,7 +349,8 @@ target.bundle = function(args) { 'core/jpx.js', 'core/jbig2.js', 'core/bidi.js', - 'core/cmap.js' + 'core/cmap.js', + 'core/murmurhash3.js' ]; if (!defines.SINGLE_FILE) { diff --git a/src/core/evaluator.js b/src/core/evaluator.js index d8ce942d7..d4ee4543e 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -17,11 +17,11 @@ /* globals assert, assertWellFormed, ColorSpace, DecodeStream, Dict, Encodings, error, ErrorFont, Font, FONT_IDENTITY_MATRIX, fontCharsToUnicode, FontFlags, ImageKind, info, isArray, isCmd, isDict, isEOF, isName, - isNum, isStream, isString, JpegStream, Lexer, Metrics, Name, Parser, - Pattern, PDFImage, PDFJS, serifFonts, stdFontMap, symbolsFonts, - getTilingPatternIR, warn, Util, Promise, LegacyPromise, - RefSetCache, isRef, TextRenderingMode, CMapFactory, OPS, - UNSUPPORTED_FEATURES, UnsupportedManager */ + isNum, isStream, isString, JpegStream, Lexer, Metrics, + MurmurHash3_64, Name, Parser, Pattern, PDFImage, PDFJS, serifFonts, + stdFontMap, symbolsFonts, getTilingPatternIR, warn, Util, Promise, + LegacyPromise, RefSetCache, isRef, TextRenderingMode, CMapFactory, + OPS, UNSUPPORTED_FEATURES, UnsupportedManager */ 'use strict'; @@ -413,6 +413,36 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { if (!isDict(font)) { return errorFont(); } + + var preEvaluatedFont = this.preEvaluateFont(font, xref); + var descriptor = preEvaluatedFont.descriptor; + var fontID = fontRef.num + '_' + fontRef.gen; + if (isDict(descriptor)) { + if (!descriptor.fontAliases) { + descriptor.fontAliases = Object.create(null); + } + + var fontAliases = descriptor.fontAliases; + var hash = preEvaluatedFont.hash; + if (fontAliases[hash]) { + var aliasFontRef = fontAliases[hash].aliasRef; + if (aliasFontRef && this.fontCache.has(aliasFontRef)) { + this.fontCache.putAlias(fontRef, aliasFontRef); + var cachedFont = this.fontCache.get(fontRef); + return cachedFont; + } + } + + if (!fontAliases[hash]) { + fontAliases[hash] = { + fontID: Font.getFontID() + }; + } + + fontAliases[hash].aliasRef = fontRef; + fontID = fontAliases[hash].fontID; + } + // Workaround for bad PDF generators that don't reference fonts // properly, i.e. by not using an object identifier. // Check if the fontRef is a Dict (as opposed to a standard object), @@ -426,12 +456,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { // Keep track of each font we translated so the caller can // load them asynchronously before calling display on a page. font.loadedName = 'g_font_' + (fontRefIsDict ? - fontName.replace(/\W/g, '') : (fontRef.num + '_' + fontRef.gen)); + fontName.replace(/\W/g, '') : fontID); if (!font.translated) { var translated; try { - translated = this.translateFont(font, xref); + translated = this.translateFont(preEvaluatedFont, xref); } catch (e) { UnsupportedManager.notify(UNSUPPORTED_FEATURES.font); translated = new ErrorFont(e instanceof Error ? e.message : e); @@ -1127,7 +1157,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return widths; }, - translateFont: function PartialEvaluator_translateFont(dict, xref) { + preEvaluateFont: function PartialEvaluator_preEvaluateFont(dict, xref) { var baseDict = dict; var type = dict.get('Subtype'); assertWellFormed(isName(type), 'invalid font Subtype'); @@ -1148,9 +1178,55 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { assertWellFormed(isName(type), 'invalid font Subtype'); composite = true; } - var maxCharIndex = (composite ? 0xFFFF : 0xFF); var descriptor = dict.get('FontDescriptor'); + if (descriptor) { + var hash = new MurmurHash3_64(); + var encoding = baseDict.getRaw('Encoding'); + if (isName(encoding)) { + hash.update(encoding.name); + } else if (isRef(encoding)) { + hash.update(encoding.num + '_' + encoding.gen); + } + + var toUnicode = dict.get('ToUnicode') || baseDict.get('ToUnicode'); + if (isStream(toUnicode)) { + var stream = toUnicode.str || toUnicode; + var uint8array = stream.buffer ? + new Uint8Array(stream.buffer.buffer, 0, stream.bufferLength) : + new Uint8Array(stream.bytes.buffer, + stream.start, stream.end - stream.start); + hash.update(uint8array); + + } else if (isName(toUnicode)) { + hash.update(toUnicode.name); + } + + var widths = dict.get('Widths') || baseDict.get('Widths'); + if (widths) { + var uint8array = new Uint8Array(new Uint32Array(widths).buffer); + hash.update(uint8array); + } + } + + return { + descriptor: descriptor, + dict: dict, + baseDict: baseDict, + composite: composite, + hash: hash ? hash.hexdigest() : '' + }; + }, + + translateFont: function PartialEvaluator_translateFont(preEvaluatedFont, + xref) { + var baseDict = preEvaluatedFont.baseDict; + var dict = preEvaluatedFont.dict; + var composite = preEvaluatedFont.composite; + var descriptor = preEvaluatedFont.descriptor; + var type = dict.get('Subtype'); + var maxCharIndex = (composite ? 0xFFFF : 0xFF); + if (!descriptor) { if (type.name == 'Type3') { // FontDescriptor is only required for Type3 fonts when the document diff --git a/src/core/fonts.js b/src/core/fonts.js index e8280739e..9a76e8830 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -2328,6 +2328,13 @@ var Font = (function FontClosure() { this.loading = true; } + Font.getFontID = (function () { + var ID = 1; + return function Font_getFontID() { + return String(ID++); + }; + })(); + function int16(b0, b1) { return (b0 << 8) + b1; } diff --git a/src/core/murmurhash3.js b/src/core/murmurhash3.js new file mode 100644 index 000000000..c2e716330 --- /dev/null +++ b/src/core/murmurhash3.js @@ -0,0 +1,146 @@ +/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ + +/* Copyright 2014 Opera Software ASA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + * Based on https://code.google.com/p/smhasher/wiki/MurmurHash3. + * Hashes roughly 100 KB per millisecond on i7 3.4 GHz. + */ + +'use strict'; + +var MurmurHash3_64 = (function MurmurHash3_64Closure (seed) { + // Workaround for missing math precison in JS. + var MASK_HIGH = 0xffff0000; + var MASK_LOW = 0xffff; + + function MurmurHash3_64 (seed) { + var SEED = 0xc3d2e1f0; + this.h1 = seed ? seed & 0xffffffff : SEED; + this.h2 = seed ? seed & 0xffffffff : SEED; + } + + MurmurHash3_64.prototype = { + update: function MurmurHash3_64_update(input) { + if (typeof input == 'string') { + var data = new Uint8Array(input.length * 2); + var length = 0; + for (var i = 0; i < input.length; i++) { + var code = input.charCodeAt(i); + if (code <= 0xff) { + data[length++] = code; + } + else { + data[length++] = code >>> 8; + data[length++] = code & 0xff; + } + } + } else { + if (!(input instanceof Uint8Array)) { + throw new Error('Wrong data format in MurmurHash3_64_update. ' + + 'Input must be a string or Uint8Array'); + } + data = input; + length = data.length; + } + + var blockCounts = length >> 2; + var tailLength = length - blockCounts * 4; + var dataUint32 = new Uint32Array(data.buffer, 0, blockCounts); + var k1 = 0; + var k2 = 0; + var h1 = this.h1; + var h2 = this.h2; + var C1 = 0xcc9e2d51; + var C2 = 0x1b873593; + var C1_LOW = C1 & MASK_LOW; + var C2_LOW = C2 & MASK_LOW; + + for (var i = 0; i < blockCounts; i++) { + if (i & 1) { + k1 = dataUint32[i]; + k1 = (k1 * C1 & MASK_HIGH) | (k1 * C1_LOW & MASK_LOW); + k1 = k1 << 15 | k1 >>> 17; + k1 = (k1 * C2 & MASK_HIGH) | (k1 * C2_LOW & MASK_LOW); + h1 ^= k1; + h1 = h1 << 13 | h1 >>> 19; + h1 = h1 * 5 + 0xe6546b64; + } else { + k2 = dataUint32[i]; + k2 = (k2 * C1 & MASK_HIGH) | (k2 * C1_LOW & MASK_LOW); + k2 = k2 << 15 | k2 >>> 17; + k2 = (k2 * C2 & MASK_HIGH) | (k2 * C2_LOW & MASK_LOW); + h2 ^= k2; + h2 = h2 << 13 | h2 >>> 19; + h2 = h2 * 5 + 0xe6546b64; + } + } + + k1 = 0; + + switch (tailLength) { + case 3: + k1 ^= data[blockCounts * 4 + 2] << 16; + /* falls through */ + case 2: + k1 ^= data[blockCounts * 4 + 1] << 8; + /* falls through */ + case 1: + k1 ^= data[blockCounts * 4]; + /* falls through */ + k1 = (k1 * C1 & MASK_HIGH) | (k1 * C1_LOW & MASK_LOW); + k1 = k1 << 15 | k1 >>> 17; + k1 = (k1 * C2 & MASK_HIGH) | (k1 * C2_LOW & MASK_LOW); + if (blockCounts & 1) { + h1 ^= k1; + } else { + h2 ^= k1; + } + } + + this.h1 = h1; + this.h2 = h2; + return this; + }, + + hexdigest: function MurmurHash3_64_hexdigest () { + var h1 = this.h1; + var h2 = this.h2; + + h1 ^= h2 >>> 1; + h1 = (h1 * 0xed558ccd & MASK_HIGH) | (h1 * 0x8ccd & MASK_LOW); + h2 = (h2 * 0xff51afd7 & MASK_HIGH) | + (((h2 << 16 | h1 >>> 16) * 0xafd7ed55 & MASK_HIGH) >>> 16); + h1 ^= h2 >>> 1; + h1 = (h1 * 0x1a85ec53 & MASK_HIGH) | (h1 * 0xec53 & MASK_LOW); + h2 = (h2 * 0xc4ceb9fe & MASK_HIGH) | + (((h2 << 16 | h1 >>> 16) * 0xb9fe1a85 & MASK_HIGH) >>> 16); + h1 ^= h2 >>> 1; + + for (var i = 0, arr = [h1, h2], str = ''; i < arr.length; i++) { + var hex = (arr[i] >>> 0).toString(16); + while (hex.length < 8) { + hex = '0' + hex; + } + str += hex; + } + + return str; + } + }; + + return MurmurHash3_64; +})(); diff --git a/src/core/obj.js b/src/core/obj.js index ab4bb04c7..59c8dbf46 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -271,6 +271,10 @@ var RefSetCache = (function RefSetCacheClosure() { this.dict['R' + ref.num + '.' + ref.gen] = obj; }, + putAlias: function RefSetCache_putAlias(ref, aliasRef) { + this.dict['R' + ref.num + '.' + ref.gen] = this.get(aliasRef); + }, + forEach: function RefSetCache_forEach(fn, thisArg) { for (var i in this.dict) { fn.call(thisArg, this.dict[i]); diff --git a/src/worker_loader.js b/src/worker_loader.js index 3333c3009..7ee5e9d77 100644 --- a/src/worker_loader.js +++ b/src/worker_loader.js @@ -52,6 +52,7 @@ var otherFiles = [ 'core/jpx.js', 'core/jbig2.js', 'core/bidi.js', + 'core/murmurhash3.js', '../external/jpgjs/jpg.js' ];