Merge pull request #4423 from chriskr/font-aliases

Treat fonts with the same font descriptor and encoding as aliases
This commit is contained in:
Brendan Dahl 2014-04-09 10:26:09 -07:00
commit a6e5f31ca1
6 changed files with 245 additions and 10 deletions

View File

@ -349,7 +349,8 @@ target.bundle = function(args) {
'core/jpx.js',
'core/jbig2.js',
'core/bidi.js',
'core/cmap.js'
'core/cmap.js',
'core/murmurhash3.js'
];
if (!defines.SINGLE_FILE) {

View File

@ -17,11 +17,11 @@
/* globals assert, assertWellFormed, ColorSpace, DecodeStream, Dict, Encodings,
error, ErrorFont, Font, FONT_IDENTITY_MATRIX, fontCharsToUnicode,
FontFlags, ImageKind, info, isArray, isCmd, isDict, isEOF, isName,
isNum, isStream, isString, JpegStream, Lexer, Metrics, Name, Parser,
Pattern, PDFImage, PDFJS, serifFonts, stdFontMap, symbolsFonts,
getTilingPatternIR, warn, Util, Promise, LegacyPromise,
RefSetCache, isRef, TextRenderingMode, CMapFactory, OPS,
UNSUPPORTED_FEATURES, UnsupportedManager */
isNum, isStream, isString, JpegStream, Lexer, Metrics,
MurmurHash3_64, Name, Parser, Pattern, PDFImage, PDFJS, serifFonts,
stdFontMap, symbolsFonts, getTilingPatternIR, warn, Util, Promise,
LegacyPromise, RefSetCache, isRef, TextRenderingMode, CMapFactory,
OPS, UNSUPPORTED_FEATURES, UnsupportedManager */
'use strict';
@ -413,6 +413,36 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
if (!isDict(font)) {
return errorFont();
}
var preEvaluatedFont = this.preEvaluateFont(font, xref);
var descriptor = preEvaluatedFont.descriptor;
var fontID = fontRef.num + '_' + fontRef.gen;
if (isDict(descriptor)) {
if (!descriptor.fontAliases) {
descriptor.fontAliases = Object.create(null);
}
var fontAliases = descriptor.fontAliases;
var hash = preEvaluatedFont.hash;
if (fontAliases[hash]) {
var aliasFontRef = fontAliases[hash].aliasRef;
if (aliasFontRef && this.fontCache.has(aliasFontRef)) {
this.fontCache.putAlias(fontRef, aliasFontRef);
var cachedFont = this.fontCache.get(fontRef);
return cachedFont;
}
}
if (!fontAliases[hash]) {
fontAliases[hash] = {
fontID: Font.getFontID()
};
}
fontAliases[hash].aliasRef = fontRef;
fontID = fontAliases[hash].fontID;
}
// Workaround for bad PDF generators that don't reference fonts
// properly, i.e. by not using an object identifier.
// Check if the fontRef is a Dict (as opposed to a standard object),
@ -426,12 +456,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
// Keep track of each font we translated so the caller can
// load them asynchronously before calling display on a page.
font.loadedName = 'g_font_' + (fontRefIsDict ?
fontName.replace(/\W/g, '') : (fontRef.num + '_' + fontRef.gen));
fontName.replace(/\W/g, '') : fontID);
if (!font.translated) {
var translated;
try {
translated = this.translateFont(font, xref);
translated = this.translateFont(preEvaluatedFont, xref);
} catch (e) {
UnsupportedManager.notify(UNSUPPORTED_FEATURES.font);
translated = new ErrorFont(e instanceof Error ? e.message : e);
@ -1127,7 +1157,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return widths;
},
translateFont: function PartialEvaluator_translateFont(dict, xref) {
preEvaluateFont: function PartialEvaluator_preEvaluateFont(dict, xref) {
var baseDict = dict;
var type = dict.get('Subtype');
assertWellFormed(isName(type), 'invalid font Subtype');
@ -1148,9 +1178,55 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
assertWellFormed(isName(type), 'invalid font Subtype');
composite = true;
}
var maxCharIndex = (composite ? 0xFFFF : 0xFF);
var descriptor = dict.get('FontDescriptor');
if (descriptor) {
var hash = new MurmurHash3_64();
var encoding = baseDict.getRaw('Encoding');
if (isName(encoding)) {
hash.update(encoding.name);
} else if (isRef(encoding)) {
hash.update(encoding.num + '_' + encoding.gen);
}
var toUnicode = dict.get('ToUnicode') || baseDict.get('ToUnicode');
if (isStream(toUnicode)) {
var stream = toUnicode.str || toUnicode;
var uint8array = stream.buffer ?
new Uint8Array(stream.buffer.buffer, 0, stream.bufferLength) :
new Uint8Array(stream.bytes.buffer,
stream.start, stream.end - stream.start);
hash.update(uint8array);
} else if (isName(toUnicode)) {
hash.update(toUnicode.name);
}
var widths = dict.get('Widths') || baseDict.get('Widths');
if (widths) {
var uint8array = new Uint8Array(new Uint32Array(widths).buffer);
hash.update(uint8array);
}
}
return {
descriptor: descriptor,
dict: dict,
baseDict: baseDict,
composite: composite,
hash: hash ? hash.hexdigest() : ''
};
},
translateFont: function PartialEvaluator_translateFont(preEvaluatedFont,
xref) {
var baseDict = preEvaluatedFont.baseDict;
var dict = preEvaluatedFont.dict;
var composite = preEvaluatedFont.composite;
var descriptor = preEvaluatedFont.descriptor;
var type = dict.get('Subtype');
var maxCharIndex = (composite ? 0xFFFF : 0xFF);
if (!descriptor) {
if (type.name == 'Type3') {
// FontDescriptor is only required for Type3 fonts when the document

View File

@ -2328,6 +2328,13 @@ var Font = (function FontClosure() {
this.loading = true;
}
Font.getFontID = (function () {
var ID = 1;
return function Font_getFontID() {
return String(ID++);
};
})();
function int16(b0, b1) {
return (b0 << 8) + b1;
}

146
src/core/murmurhash3.js Normal file
View File

@ -0,0 +1,146 @@
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
/* Copyright 2014 Opera Software ASA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
* Based on https://code.google.com/p/smhasher/wiki/MurmurHash3.
* Hashes roughly 100 KB per millisecond on i7 3.4 GHz.
*/
'use strict';
var MurmurHash3_64 = (function MurmurHash3_64Closure (seed) {
// Workaround for missing math precison in JS.
var MASK_HIGH = 0xffff0000;
var MASK_LOW = 0xffff;
function MurmurHash3_64 (seed) {
var SEED = 0xc3d2e1f0;
this.h1 = seed ? seed & 0xffffffff : SEED;
this.h2 = seed ? seed & 0xffffffff : SEED;
}
MurmurHash3_64.prototype = {
update: function MurmurHash3_64_update(input) {
if (typeof input == 'string') {
var data = new Uint8Array(input.length * 2);
var length = 0;
for (var i = 0; i < input.length; i++) {
var code = input.charCodeAt(i);
if (code <= 0xff) {
data[length++] = code;
}
else {
data[length++] = code >>> 8;
data[length++] = code & 0xff;
}
}
} else {
if (!(input instanceof Uint8Array)) {
throw new Error('Wrong data format in MurmurHash3_64_update. ' +
'Input must be a string or Uint8Array');
}
data = input;
length = data.length;
}
var blockCounts = length >> 2;
var tailLength = length - blockCounts * 4;
var dataUint32 = new Uint32Array(data.buffer, 0, blockCounts);
var k1 = 0;
var k2 = 0;
var h1 = this.h1;
var h2 = this.h2;
var C1 = 0xcc9e2d51;
var C2 = 0x1b873593;
var C1_LOW = C1 & MASK_LOW;
var C2_LOW = C2 & MASK_LOW;
for (var i = 0; i < blockCounts; i++) {
if (i & 1) {
k1 = dataUint32[i];
k1 = (k1 * C1 & MASK_HIGH) | (k1 * C1_LOW & MASK_LOW);
k1 = k1 << 15 | k1 >>> 17;
k1 = (k1 * C2 & MASK_HIGH) | (k1 * C2_LOW & MASK_LOW);
h1 ^= k1;
h1 = h1 << 13 | h1 >>> 19;
h1 = h1 * 5 + 0xe6546b64;
} else {
k2 = dataUint32[i];
k2 = (k2 * C1 & MASK_HIGH) | (k2 * C1_LOW & MASK_LOW);
k2 = k2 << 15 | k2 >>> 17;
k2 = (k2 * C2 & MASK_HIGH) | (k2 * C2_LOW & MASK_LOW);
h2 ^= k2;
h2 = h2 << 13 | h2 >>> 19;
h2 = h2 * 5 + 0xe6546b64;
}
}
k1 = 0;
switch (tailLength) {
case 3:
k1 ^= data[blockCounts * 4 + 2] << 16;
/* falls through */
case 2:
k1 ^= data[blockCounts * 4 + 1] << 8;
/* falls through */
case 1:
k1 ^= data[blockCounts * 4];
/* falls through */
k1 = (k1 * C1 & MASK_HIGH) | (k1 * C1_LOW & MASK_LOW);
k1 = k1 << 15 | k1 >>> 17;
k1 = (k1 * C2 & MASK_HIGH) | (k1 * C2_LOW & MASK_LOW);
if (blockCounts & 1) {
h1 ^= k1;
} else {
h2 ^= k1;
}
}
this.h1 = h1;
this.h2 = h2;
return this;
},
hexdigest: function MurmurHash3_64_hexdigest () {
var h1 = this.h1;
var h2 = this.h2;
h1 ^= h2 >>> 1;
h1 = (h1 * 0xed558ccd & MASK_HIGH) | (h1 * 0x8ccd & MASK_LOW);
h2 = (h2 * 0xff51afd7 & MASK_HIGH) |
(((h2 << 16 | h1 >>> 16) * 0xafd7ed55 & MASK_HIGH) >>> 16);
h1 ^= h2 >>> 1;
h1 = (h1 * 0x1a85ec53 & MASK_HIGH) | (h1 * 0xec53 & MASK_LOW);
h2 = (h2 * 0xc4ceb9fe & MASK_HIGH) |
(((h2 << 16 | h1 >>> 16) * 0xb9fe1a85 & MASK_HIGH) >>> 16);
h1 ^= h2 >>> 1;
for (var i = 0, arr = [h1, h2], str = ''; i < arr.length; i++) {
var hex = (arr[i] >>> 0).toString(16);
while (hex.length < 8) {
hex = '0' + hex;
}
str += hex;
}
return str;
}
};
return MurmurHash3_64;
})();

View File

@ -271,6 +271,10 @@ var RefSetCache = (function RefSetCacheClosure() {
this.dict['R' + ref.num + '.' + ref.gen] = obj;
},
putAlias: function RefSetCache_putAlias(ref, aliasRef) {
this.dict['R' + ref.num + '.' + ref.gen] = this.get(aliasRef);
},
forEach: function RefSetCache_forEach(fn, thisArg) {
for (var i in this.dict) {
fn.call(thisArg, this.dict[i]);

View File

@ -52,6 +52,7 @@ var otherFiles = [
'core/jpx.js',
'core/jbig2.js',
'core/bidi.js',
'core/murmurhash3.js',
'../external/jpgjs/jpg.js'
];