Merge pull request #5101 from nnethercote/CMap-forEach

Avoid expensive for..in loops involving CMaps
This commit is contained in:
Yury Delendik 2014-07-31 23:03:25 -05:00
commit ad2ea78280
3 changed files with 59 additions and 30 deletions

View File

@ -199,7 +199,7 @@ var CMap = (function CMapClosure() {
// where nBytePairs are ranges e.g. [low1, high1, low2, high2, ...] // where nBytePairs are ranges e.g. [low1, high1, low2, high2, ...]
this.codespaceRanges = [[], [], [], []]; this.codespaceRanges = [[], [], [], []];
this.numCodespaceRanges = 0; this.numCodespaceRanges = 0;
this.map = []; this._map = [];
this.vertical = false; this.vertical = false;
this.useCMap = null; this.useCMap = null;
this.builtInCMap = builtInCMap; this.builtInCMap = builtInCMap;
@ -213,7 +213,7 @@ var CMap = (function CMapClosure() {
mapRange: function(low, high, dstLow) { mapRange: function(low, high, dstLow) {
var lastByte = dstLow.length - 1; var lastByte = dstLow.length - 1;
while (low <= high) { while (low <= high) {
this.map[low] = dstLow; this._map[low] = dstLow;
// Only the last byte has to be incremented. // Only the last byte has to be incremented.
dstLow = dstLow.substr(0, lastByte) + dstLow = dstLow.substr(0, lastByte) +
String.fromCharCode(dstLow.charCodeAt(lastByte) + 1); String.fromCharCode(dstLow.charCodeAt(lastByte) + 1);
@ -224,17 +224,51 @@ var CMap = (function CMapClosure() {
mapRangeToArray: function(low, high, array) { mapRangeToArray: function(low, high, array) {
var i = 0, ii = array.length; var i = 0, ii = array.length;
while (low <= high && i < ii) { while (low <= high && i < ii) {
this.map[low] = array[i++]; this._map[low] = array[i++];
++low; ++low;
} }
}, },
mapOne: function(src, dst) { mapOne: function(src, dst) {
this.map[src] = dst; this._map[src] = dst;
}, },
lookup: function(code) { lookup: function(code) {
return this.map[code]; return this._map[code];
},
contains: function(code) {
return this._map[code] !== undefined;
},
forEach: function(callback) {
// Most maps have fewer than 65536 entries, and for those we use normal
// array iteration. But really sparse tables are possible -- e.g. with
// indices in the *billions*. For such tables we use for..in, which isn't
// ideal because it stringifies the indices for all present elements, but
// it does avoid iterating over every undefined entry.
var map = this._map;
var length = map.length;
var i;
if (length <= 0x10000) {
for (i = 0; i < length; i++) {
if (map[i] !== undefined) {
callback(i, map[i]);
}
}
} else {
for (i in this._map) {
callback(i, map[i]);
}
}
},
charCodeOf: function(value) {
return this._map.indexOf(value);
},
getMap: function() {
return this._map;
}, },
readCharCode: function(str, offset) { readCharCode: function(str, offset) {
@ -789,12 +823,11 @@ var CMapFactory = (function CMapFactoryClosure() {
} }
// Merge the map into the current one, making sure not to override // Merge the map into the current one, making sure not to override
// any previously defined entries. // any previously defined entries.
for (var key in cMap.useCMap.map) { cMap.useCMap.forEach(function(key, value) {
if (key in cMap.map) { if (!cMap.contains(key)) {
continue; cMap.mapOne(key, cMap.useCMap.lookup(key));
} }
cMap.map[key] = cMap.useCMap.map[key]; });
}
} }
function parseBinaryCMap(name, builtInCMapParams) { function parseBinaryCMap(name, builtInCMapParams) {

View File

@ -1309,10 +1309,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var cmapObj = toUnicode; var cmapObj = toUnicode;
if (isName(cmapObj)) { if (isName(cmapObj)) {
return CMapFactory.create(cmapObj, return CMapFactory.create(cmapObj,
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).map; { url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).getMap();
} else if (isStream(cmapObj)) { } else if (isStream(cmapObj)) {
var cmap = CMapFactory.create(cmapObj, var cmap = CMapFactory.create(cmapObj,
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).map; { url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).getMap();
// Convert UTF-16BE // Convert UTF-16BE
// NOTE: cmap can be a sparse array, so use forEach instead of for(;;) // NOTE: cmap can be a sparse array, so use forEach instead of for(;;)
// to iterate over all keys. // to iterate over all keys.

View File

@ -3898,10 +3898,7 @@ var Font = (function FontClosure() {
if (properties.type === 'CIDFontType2') { if (properties.type === 'CIDFontType2') {
var cidToGidMap = properties.cidToGidMap || []; var cidToGidMap = properties.cidToGidMap || [];
var cidToGidMapLength = cidToGidMap.length; var cidToGidMapLength = cidToGidMap.length;
var cMap = properties.cMap.map; properties.cMap.forEach(function(charCode, cid) {
for (charCode in cMap) {
charCode |= 0;
var cid = cMap[charCode];
assert(cid.length === 1, 'Max size of CID is 65,535'); assert(cid.length === 1, 'Max size of CID is 65,535');
cid = cid.charCodeAt(0); cid = cid.charCodeAt(0);
var glyphId = -1; var glyphId = -1;
@ -3913,7 +3910,7 @@ var Font = (function FontClosure() {
if (glyphId >= 0 && glyphId < numGlyphs) { if (glyphId >= 0 && glyphId < numGlyphs) {
charCodeToGlyphId[charCode] = glyphId; charCodeToGlyphId[charCode] = glyphId;
} }
} });
if (dupFirstEntry) { if (dupFirstEntry) {
charCodeToGlyphId[0] = numGlyphs - 1; charCodeToGlyphId[0] = numGlyphs - 1;
} }
@ -3971,7 +3968,7 @@ var Font = (function FontClosure() {
if (!found && properties.glyphNames) { if (!found && properties.glyphNames) {
// Try to map using the post table. There are currently no known // Try to map using the post table. There are currently no known
// pdfs that this fixes. // pdfs that this fixes.
glyphId = properties.glyphNames.indexOf(glyphName); var glyphId = properties.glyphNames.indexOf(glyphName);
if (glyphId > 0) { if (glyphId > 0) {
charCodeToGlyphId[charCode] = glyphId; charCodeToGlyphId[charCode] = glyphId;
} }
@ -4372,18 +4369,17 @@ var Font = (function FontClosure() {
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null); { url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null);
var cMap = properties.cMap; var cMap = properties.cMap;
toUnicode = []; toUnicode = [];
for (charcode in cMap.map) { cMap.forEach(function(charcode, cid) {
var cid = cMap.map[charcode];
assert(cid.length === 1, 'Max size of CID is 65,535'); assert(cid.length === 1, 'Max size of CID is 65,535');
// e) Map the CID obtained in step (a) according to the CMap obtained // e) Map the CID obtained in step (a) according to the CMap obtained
// in step (d), producing a Unicode value. // in step (d), producing a Unicode value.
var ucs2 = ucs2CMap.map[cid.charCodeAt(0)]; var ucs2 = ucs2CMap.lookup(cid.charCodeAt(0));
if (!ucs2) { if (ucs2) {
continue; toUnicode[charcode] =
String.fromCharCode((ucs2.charCodeAt(0) << 8) +
ucs2.charCodeAt(1));
} }
toUnicode[charcode] = String.fromCharCode((ucs2.charCodeAt(0) << 8) + });
ucs2.charCodeAt(1));
}
map.toUnicode = toUnicode; map.toUnicode = toUnicode;
return map; return map;
} }
@ -4418,7 +4414,7 @@ var Font = (function FontClosure() {
// finding the charcode via unicodeToCID map // finding the charcode via unicodeToCID map
var charcode = 0; var charcode = 0;
if (this.composite) { if (this.composite) {
if (glyphUnicode in this.cMap.map) { if (this.cMap.contains(glyphUnicode)) {
charcode = this.cMap.lookup(glyphUnicode).charCodeAt(0); charcode = this.cMap.lookup(glyphUnicode).charCodeAt(0);
} }
} }
@ -4447,8 +4443,8 @@ var Font = (function FontClosure() {
var fontCharCode, width, operatorListId; var fontCharCode, width, operatorListId;
var widthCode = charcode; var widthCode = charcode;
if (this.cMap && charcode in this.cMap.map) { if (this.cMap && this.cMap.contains(charcode)) {
widthCode = this.cMap.map[charcode].charCodeAt(0); widthCode = this.cMap.lookup(charcode).charCodeAt(0);
} }
width = this.widths[widthCode]; width = this.widths[widthCode];
width = isNum(width) ? width : this.defaultWidth; width = isNum(width) ? width : this.defaultWidth;
@ -5631,7 +5627,7 @@ var CFFFont = (function CFFFontClosure() {
// to map CIDs to GIDs. // to map CIDs to GIDs.
for (glyphId = 0; glyphId < charsets.length; glyphId++) { for (glyphId = 0; glyphId < charsets.length; glyphId++) {
var cidString = String.fromCharCode(charsets[glyphId]); var cidString = String.fromCharCode(charsets[glyphId]);
var charCode = properties.cMap.map.indexOf(cidString); var charCode = properties.cMap.charCodeOf(cidString);
charCodeToGlyphId[charCode] = glyphId; charCodeToGlyphId[charCode] = glyphId;
} }
} else { } else {