Treat fonts with the same font descriptor, encoding and unicode map as aliases

Different fonts can point to the same font descriptor
(see https://github.com/mozilla/pdf.js/issues/4339 for details). With this
commit such fonts are treated as aliases if they have also the same encoding
and the same toUnicode map. The according info is stored on the font descriptor.
This change must also ensure that aliases use always the same font name
because translated fonts can get cleared depending on the CLEANUP_TIMEOUT setting.
This commit is contained in:
Christian Krebs 2014-03-03 18:44:45 +01:00
parent 608c6cea5a
commit 79f34b183c
6 changed files with 245 additions and 10 deletions

View File

@ -349,7 +349,8 @@ target.bundle = function(args) {
'core/jpx.js',
'core/jbig2.js',
'core/bidi.js',
'core/cmap.js'
'core/cmap.js',
'core/murmurhash3.js'
];
if (!defines.SINGLE_FILE) {

View File

@ -17,11 +17,11 @@
/* globals assert, assertWellFormed, ColorSpace, DecodeStream, Dict, Encodings,
error, ErrorFont, Font, FONT_IDENTITY_MATRIX, fontCharsToUnicode,
FontFlags, ImageKind, info, isArray, isCmd, isDict, isEOF, isName,
isNum, isStream, isString, JpegStream, Lexer, Metrics, Name, Parser,
Pattern, PDFImage, PDFJS, serifFonts, stdFontMap, symbolsFonts,
getTilingPatternIR, warn, Util, Promise, LegacyPromise,
RefSetCache, isRef, TextRenderingMode, CMapFactory, OPS,
UNSUPPORTED_FEATURES, UnsupportedManager */
isNum, isStream, isString, JpegStream, Lexer, Metrics,
MurmurHash3_64, Name, Parser, Pattern, PDFImage, PDFJS, serifFonts,
stdFontMap, symbolsFonts, getTilingPatternIR, warn, Util, Promise,
LegacyPromise, RefSetCache, isRef, TextRenderingMode, CMapFactory,
OPS, UNSUPPORTED_FEATURES, UnsupportedManager */
'use strict';
@ -413,6 +413,36 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
if (!isDict(font)) {
return errorFont();
}
var preEvaluatedFont = this.preEvaluateFont(font, xref);
var descriptor = preEvaluatedFont.descriptor;
var fontID = fontRef.num + '_' + fontRef.gen;
if (isDict(descriptor)) {
if (!descriptor.fontAliases) {
descriptor.fontAliases = Object.create(null);
}
var fontAliases = descriptor.fontAliases;
var hash = preEvaluatedFont.hash;
if (fontAliases[hash]) {
var aliasFontRef = fontAliases[hash].aliasRef;
if (aliasFontRef && this.fontCache.has(aliasFontRef)) {
this.fontCache.putAlias(fontRef, aliasFontRef);
var cachedFont = this.fontCache.get(fontRef);
return cachedFont;
}
}
if (!fontAliases[hash]) {
fontAliases[hash] = {
fontID: Font.getFontID()
};
}
fontAliases[hash].aliasRef = fontRef;
fontID = fontAliases[hash].fontID;
}
// Workaround for bad PDF generators that don't reference fonts
// properly, i.e. by not using an object identifier.
// Check if the fontRef is a Dict (as opposed to a standard object),
@ -426,12 +456,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
// Keep track of each font we translated so the caller can
// load them asynchronously before calling display on a page.
font.loadedName = 'g_font_' + (fontRefIsDict ?
fontName.replace(/\W/g, '') : (fontRef.num + '_' + fontRef.gen));
fontName.replace(/\W/g, '') : fontID);
if (!font.translated) {
var translated;
try {
translated = this.translateFont(font, xref);
translated = this.translateFont(preEvaluatedFont, xref);
} catch (e) {
UnsupportedManager.notify(UNSUPPORTED_FEATURES.font);
translated = new ErrorFont(e instanceof Error ? e.message : e);
@ -1127,7 +1157,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return widths;
},
translateFont: function PartialEvaluator_translateFont(dict, xref) {
preEvaluateFont: function PartialEvaluator_preEvaluateFont(dict, xref) {
var baseDict = dict;
var type = dict.get('Subtype');
assertWellFormed(isName(type), 'invalid font Subtype');
@ -1148,9 +1178,55 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
assertWellFormed(isName(type), 'invalid font Subtype');
composite = true;
}
var maxCharIndex = (composite ? 0xFFFF : 0xFF);
var descriptor = dict.get('FontDescriptor');
if (descriptor) {
var hash = new MurmurHash3_64();
var encoding = baseDict.getRaw('Encoding');
if (isName(encoding)) {
hash.update(encoding.name);
} else if (isRef(encoding)) {
hash.update(encoding.num + '_' + encoding.gen);
}
var toUnicode = dict.get('ToUnicode') || baseDict.get('ToUnicode');
if (isStream(toUnicode)) {
var stream = toUnicode.str || toUnicode;
var uint8array = stream.buffer ?
new Uint8Array(stream.buffer.buffer, 0, stream.bufferLength) :
new Uint8Array(stream.bytes.buffer,
stream.start, stream.end - stream.start);
hash.update(uint8array);
} else if (isName(toUnicode)) {
hash.update(toUnicode.name);
}
var widths = dict.get('Widths') || baseDict.get('Widths');
if (widths) {
var uint8array = new Uint8Array(new Uint32Array(widths).buffer);
hash.update(uint8array);
}
}
return {
descriptor: descriptor,
dict: dict,
baseDict: baseDict,
composite: composite,
hash: hash ? hash.hexdigest() : ''
};
},
translateFont: function PartialEvaluator_translateFont(preEvaluatedFont,
xref) {
var baseDict = preEvaluatedFont.baseDict;
var dict = preEvaluatedFont.dict;
var composite = preEvaluatedFont.composite;
var descriptor = preEvaluatedFont.descriptor;
var type = dict.get('Subtype');
var maxCharIndex = (composite ? 0xFFFF : 0xFF);
if (!descriptor) {
if (type.name == 'Type3') {
// FontDescriptor is only required for Type3 fonts when the document

View File

@ -2328,6 +2328,13 @@ var Font = (function FontClosure() {
this.loading = true;
}
Font.getFontID = (function () {
var ID = 1;
return function Font_getFontID() {
return String(ID++);
};
})();
function int16(b0, b1) {
return (b0 << 8) + b1;
}

146
src/core/murmurhash3.js Normal file
View File

@ -0,0 +1,146 @@
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
/* Copyright 2014 Opera Software ASA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
* Based on https://code.google.com/p/smhasher/wiki/MurmurHash3.
* Hashes roughly 100 KB per millisecond on i7 3.4 GHz.
*/
'use strict';
var MurmurHash3_64 = (function MurmurHash3_64Closure (seed) {
// Workaround for missing math precison in JS.
var MASK_HIGH = 0xffff0000;
var MASK_LOW = 0xffff;
function MurmurHash3_64 (seed) {
var SEED = 0xc3d2e1f0;
this.h1 = seed ? seed & 0xffffffff : SEED;
this.h2 = seed ? seed & 0xffffffff : SEED;
}
MurmurHash3_64.prototype = {
update: function MurmurHash3_64_update(input) {
if (typeof input == 'string') {
var data = new Uint8Array(input.length * 2);
var length = 0;
for (var i = 0; i < input.length; i++) {
var code = input.charCodeAt(i);
if (code <= 0xff) {
data[length++] = code;
}
else {
data[length++] = code >>> 8;
data[length++] = code & 0xff;
}
}
} else {
if (!(input instanceof Uint8Array)) {
throw new Error('Wrong data format in MurmurHash3_64_update. ' +
'Input must be a string or Uint8Array');
}
data = input;
length = data.length;
}
var blockCounts = length >> 2;
var tailLength = length - blockCounts * 4;
var dataUint32 = new Uint32Array(data.buffer, 0, blockCounts);
var k1 = 0;
var k2 = 0;
var h1 = this.h1;
var h2 = this.h2;
var C1 = 0xcc9e2d51;
var C2 = 0x1b873593;
var C1_LOW = C1 & MASK_LOW;
var C2_LOW = C2 & MASK_LOW;
for (var i = 0; i < blockCounts; i++) {
if (i & 1) {
k1 = dataUint32[i];
k1 = (k1 * C1 & MASK_HIGH) | (k1 * C1_LOW & MASK_LOW);
k1 = k1 << 15 | k1 >>> 17;
k1 = (k1 * C2 & MASK_HIGH) | (k1 * C2_LOW & MASK_LOW);
h1 ^= k1;
h1 = h1 << 13 | h1 >>> 19;
h1 = h1 * 5 + 0xe6546b64;
} else {
k2 = dataUint32[i];
k2 = (k2 * C1 & MASK_HIGH) | (k2 * C1_LOW & MASK_LOW);
k2 = k2 << 15 | k2 >>> 17;
k2 = (k2 * C2 & MASK_HIGH) | (k2 * C2_LOW & MASK_LOW);
h2 ^= k2;
h2 = h2 << 13 | h2 >>> 19;
h2 = h2 * 5 + 0xe6546b64;
}
}
k1 = 0;
switch (tailLength) {
case 3:
k1 ^= data[blockCounts * 4 + 2] << 16;
/* falls through */
case 2:
k1 ^= data[blockCounts * 4 + 1] << 8;
/* falls through */
case 1:
k1 ^= data[blockCounts * 4];
/* falls through */
k1 = (k1 * C1 & MASK_HIGH) | (k1 * C1_LOW & MASK_LOW);
k1 = k1 << 15 | k1 >>> 17;
k1 = (k1 * C2 & MASK_HIGH) | (k1 * C2_LOW & MASK_LOW);
if (blockCounts & 1) {
h1 ^= k1;
} else {
h2 ^= k1;
}
}
this.h1 = h1;
this.h2 = h2;
return this;
},
hexdigest: function MurmurHash3_64_hexdigest () {
var h1 = this.h1;
var h2 = this.h2;
h1 ^= h2 >>> 1;
h1 = (h1 * 0xed558ccd & MASK_HIGH) | (h1 * 0x8ccd & MASK_LOW);
h2 = (h2 * 0xff51afd7 & MASK_HIGH) |
(((h2 << 16 | h1 >>> 16) * 0xafd7ed55 & MASK_HIGH) >>> 16);
h1 ^= h2 >>> 1;
h1 = (h1 * 0x1a85ec53 & MASK_HIGH) | (h1 * 0xec53 & MASK_LOW);
h2 = (h2 * 0xc4ceb9fe & MASK_HIGH) |
(((h2 << 16 | h1 >>> 16) * 0xb9fe1a85 & MASK_HIGH) >>> 16);
h1 ^= h2 >>> 1;
for (var i = 0, arr = [h1, h2], str = ''; i < arr.length; i++) {
var hex = (arr[i] >>> 0).toString(16);
while (hex.length < 8) {
hex = '0' + hex;
}
str += hex;
}
return str;
}
};
return MurmurHash3_64;
})();

View File

@ -271,6 +271,10 @@ var RefSetCache = (function RefSetCacheClosure() {
this.dict['R' + ref.num + '.' + ref.gen] = obj;
},
putAlias: function RefSetCache_putAlias(ref, aliasRef) {
this.dict['R' + ref.num + '.' + ref.gen] = this.get(aliasRef);
},
forEach: function RefSetCache_forEach(fn, thisArg) {
for (var i in this.dict) {
fn.call(thisArg, this.dict[i]);

View File

@ -52,6 +52,7 @@ var otherFiles = [
'core/jpx.js',
'core/jbig2.js',
'core/bidi.js',
'core/murmurhash3.js',
'../external/jpgjs/jpg.js'
];