diff --git a/README.md b/README.md index 97db68d36..f3500ae4d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # pdf.js - + ## Overview diff --git a/extensions/firefox/install.rdf b/extensions/firefox/install.rdf index 952d55fbf..8b793c525 100644 --- a/extensions/firefox/install.rdf +++ b/extensions/firefox/install.rdf @@ -6,13 +6,13 @@ uriloader@pdf.js pdf.js - 0.1 + 0.1.0 chrome://pdf.js/skin/logo.png {ec8030f7-c20a-464f-9b0e-13a3a9e97384} 6.0 - 11.0.* + 11.0a1 true @@ -20,5 +20,6 @@ Vivien Nicolas pdf.js uri loader https://github.com/mozilla/pdf.js/ + 2 diff --git a/src/canvas.js b/src/canvas.js index c1d2785e8..0913b582a 100644 --- a/src/canvas.js +++ b/src/canvas.js @@ -174,7 +174,7 @@ var CanvasGraphics = (function canvasGraphics() { // before it stops and shedules a continue of execution. var kExecutionTime = 50; - function constructor(canvasCtx, objs) { + function constructor(canvasCtx, objs, textLayer) { this.ctx = canvasCtx; this.current = new CanvasExtraState(); this.stateStack = []; @@ -183,7 +183,7 @@ var CanvasGraphics = (function canvasGraphics() { this.xobjs = null; this.ScratchCanvas = ScratchCanvas; this.objs = objs; - + this.textLayer = textLayer; if (canvasCtx) { addContextCurrentTransform(canvasCtx); } @@ -212,7 +212,13 @@ var CanvasGraphics = (function canvasGraphics() { this.ctx.transform(0, -1, -1, 0, cw, ch); break; } + // Scale so that canvas units are the same as PDF user space units this.ctx.scale(cw / mediaBox.width, ch / mediaBox.height); + this.textDivs = []; + this.textLayerQueue = []; + // Prevent textLayerQueue from being rendered while rendering a new page + if (this.textLayerTimer) + clearTimeout(this.textLayerTimer); }, executeIRQueue: function canvasGraphicsExecuteIRQueue(codeIR, @@ -270,6 +276,37 @@ var CanvasGraphics = (function canvasGraphics() { endDrawing: function canvasGraphicsEndDrawing() { this.ctx.restore(); + + var textLayer = this.textLayer; + if (!textLayer) + return; + + var self = this; + var renderTextLayer = function canvasRenderTextLayer() { + var textDivs = self.textDivs; + for (var i = 0, length = textDivs.length; i < length; ++i) { + if (textDivs[i].dataset.textLength > 1) { // avoid div by zero + textLayer.appendChild(textDivs[i]); + // Adjust div width (via letterSpacing) to match canvas text + // Due to the .offsetWidth calls, this is slow + textDivs[i].style.letterSpacing = + ((textDivs[i].dataset.canvasWidth - textDivs[i].offsetWidth) / + (textDivs[i].dataset.textLength - 1)) + 'px'; + } + } + } + var textLayerQueue = this.textLayerQueue; + textLayerQueue.push(renderTextLayer); + + // Lazy textLayer rendering (to prevent UI hangs) + // Only render queue if activity has stopped, where "no activity" == + // "no beginDrawing() calls in the last N ms" + this.textLayerTimer = setTimeout(function renderTextLayerQueue() { + // Render most recent (==most relevant) layers first + for (var i = textLayerQueue.length - 1; i >= 0; i--) { + textLayerQueue.pop().call(); + } + }, 500); }, // Graphics state @@ -528,23 +565,93 @@ var CanvasGraphics = (function canvasGraphics() { nextLine: function canvasGraphicsNextLine() { this.moveText(0, this.current.leading); }, - showText: function canvasGraphicsShowText(text) { + applyTextTransforms: function canvasApplyTransforms() { + var ctx = this.ctx; + var current = this.current; + var textHScale = current.textHScale; + var fontMatrix = current.font.fontMatrix || IDENTITY_MATRIX; + + ctx.transform.apply(ctx, current.textMatrix); + ctx.scale(1, -1); + ctx.translate(current.x, -1 * current.y); + ctx.transform.apply(ctx, fontMatrix); + ctx.scale(textHScale, 1); + }, + getTextGeometry: function canvasGetTextGeometry() { + var geometry = {}; + var ctx = this.ctx; + var font = this.current.font; + var ctxMatrix = ctx.mozCurrentTransform; + if (ctxMatrix) { + var bl = Util.applyTransform([0, 0], ctxMatrix); + var tr = Util.applyTransform([1, 1], ctxMatrix); + geometry.x = bl[0]; + geometry.y = bl[1]; + geometry.hScale = tr[0] - bl[0]; + geometry.vScale = tr[1] - bl[1]; + } + var spaceGlyph = font.charsToGlyphs(' '); + + // Hack (sometimes space is not encoded) + if (spaceGlyph.length === 0 || spaceGlyph[0].width === 0) + spaceGlyph = font.charsToGlyphs('i'); + + // Fallback + if (spaceGlyph.length === 0 || spaceGlyph[0].width === 0) + spaceGlyph = [{width: 0}]; + + geometry.spaceWidth = spaceGlyph[0].width; + return geometry; + }, + + pushTextDivs: function canvasGraphicsPushTextDivs(text) { + var div = document.createElement('div'); + var fontSize = this.current.fontSize; + + // vScale and hScale already contain the scaling to pixel units + // as mozCurrentTransform reflects ctx.scale() changes + // (see beginDrawing()) + var fontHeight = fontSize * text.geom.vScale; + div.dataset.canvasWidth = text.canvasWidth * text.geom.hScale; + + div.style.fontSize = fontHeight + 'px'; + div.style.fontFamily = this.current.font.loadedName || 'sans-serif'; + div.style.left = text.geom.x + 'px'; + div.style.top = (text.geom.y - fontHeight) + 'px'; + div.innerHTML = text.str; + div.dataset.textLength = text.length; + this.textDivs.push(div); + }, + showText: function canvasGraphicsShowText(str, skipTextSelection) { var ctx = this.ctx; var current = this.current; var font = current.font; - var glyphs = font.charsToGlyphs(text); + var glyphs = font.charsToGlyphs(str); var fontSize = current.fontSize; var charSpacing = current.charSpacing; var wordSpacing = current.wordSpacing; var textHScale = current.textHScale; + var fontMatrix = font.fontMatrix || IDENTITY_MATRIX; + var textHScale2 = textHScale * fontMatrix[0]; var glyphsLength = glyphs.length; + var textLayer = this.textLayer; + var text = {str: '', length: 0, canvasWidth: 0, geom: {}}; + var textSelection = textLayer && !skipTextSelection ? true : false; + + if (textSelection) { + ctx.save(); + this.applyTextTransforms(); + text.geom = this.getTextGeometry(); + ctx.restore(); + } + + // Type3 fonts - each glyph is a "mini-PDF" if (font.coded) { ctx.save(); ctx.transform.apply(ctx, current.textMatrix); ctx.translate(current.x, current.y); - var fontMatrix = font.fontMatrix || IDENTITY_MATRIX; - ctx.scale(1 / textHScale, 1); + ctx.scale(textHScale, 1); for (var i = 0; i < glyphsLength; ++i) { var glyph = glyphs[i]; @@ -564,18 +671,16 @@ var CanvasGraphics = (function canvasGraphics() { var width = transformed[0] * fontSize + charSpacing; ctx.translate(width, 0); - current.x += width; + current.x += width * textHScale2; + text.str += glyph.unicode; + text.length++; + text.canvasWidth += width; } ctx.restore(); } else { ctx.save(); - ctx.transform.apply(ctx, current.textMatrix); - ctx.scale(1, -1); - ctx.translate(current.x, -1 * current.y); - ctx.transform.apply(ctx, font.fontMatrix || IDENTITY_MATRIX); - - ctx.scale(1 / textHScale, 1); + this.applyTextTransforms(); var width = 0; for (var i = 0; i < glyphsLength; ++i) { @@ -586,36 +691,78 @@ var CanvasGraphics = (function canvasGraphics() { continue; } - var unicode = glyph.unicode; - var char = (unicode >= 0x10000) ? - String.fromCharCode(0xD800 | ((unicode - 0x10000) >> 10), - 0xDC00 | (unicode & 0x3FF)) : String.fromCharCode(unicode); - + var char = glyph.fontChar; + var charWidth = glyph.width * fontSize * 0.001 + charSpacing; ctx.fillText(char, width, 0); - width += glyph.width * fontSize * 0.001 + charSpacing; - } - current.x += width; + width += charWidth; + text.str += glyph.unicode === ' ' ? ' ' : glyph.unicode; + text.length++; + text.canvasWidth += charWidth; + } + current.x += width * textHScale2; ctx.restore(); } - }, + if (textSelection) + this.pushTextDivs(text); + + return text; + }, showSpacedText: function canvasGraphicsShowSpacedText(arr) { var ctx = this.ctx; var current = this.current; var fontSize = current.fontSize; - var textHScale = current.textHScale; + var textHScale2 = current.textHScale * + (current.font.fontMatrix || IDENTITY_MATRIX)[0]; var arrLength = arr.length; + var textLayer = this.textLayer; + var font = current.font; + var text = {str: '', length: 0, canvasWidth: 0, geom: {}}; + var textSelection = textLayer ? true : false; + + if (textSelection) { + ctx.save(); + this.applyTextTransforms(); + text.geom = this.getTextGeometry(); + ctx.restore(); + } + for (var i = 0; i < arrLength; ++i) { var e = arr[i]; if (isNum(e)) { - current.x -= e * 0.001 * fontSize * textHScale; + var spacingLength = -e * 0.001 * fontSize * textHScale2; + current.x += spacingLength; + + if (textSelection) { + // Emulate precise spacing via HTML spaces + text.canvasWidth += spacingLength; + if (e < 0 && text.geom.spaceWidth > 0) { // avoid div by zero + var numFakeSpaces = Math.round(-e / text.geom.spaceWidth); + for (var j = 0; j < numFakeSpaces; ++j) + text.str += ' '; + text.length += numFakeSpaces > 0 ? 1 : 0; + } + } } else if (isString(e)) { - this.showText(e); + var shownText = this.showText(e, true); + + if (textSelection) { + if (shownText.str === ' ') { + text.str += ' '; + } else { + text.str += shownText.str; + } + text.canvasWidth += shownText.canvasWidth; + text.length += e.length; + } } else { malformed('TJ array element ' + e + ' is not string or num'); } } + + if (textSelection) + this.pushTextDivs(text); }, nextLineShowText: function canvasGraphicsNextLineShowText(text) { this.nextLine(); diff --git a/src/core.js b/src/core.js index f2977ecd6..284b3cb40 100644 --- a/src/core.js +++ b/src/core.js @@ -70,7 +70,6 @@ var Page = (function pagePage() { this.ctx = null; this.callback = null; - this.errorback = null; } constructor.prototype = { @@ -164,7 +163,7 @@ var Page = (function pagePage() { IRQueue, fonts) { var self = this; this.IRQueue = IRQueue; - var gfx = new CanvasGraphics(this.ctx, this.objs); + var gfx = new CanvasGraphics(this.ctx, this.objs, this.textLayer); var displayContinuation = function pageDisplayContinuation() { // Always defer call to display() to work around bug in @@ -173,8 +172,8 @@ var Page = (function pagePage() { try { self.display(gfx, self.callback); } catch (e) { - if (self.errorback) - self.errorback(e); + if (self.callback) + self.callback(e); else throw e; } @@ -251,6 +250,7 @@ var Page = (function pagePage() { startIdx = gfx.executeIRQueue(IRQueue, startIdx, next); if (startIdx == length) { self.stats.render = Date.now(); + gfx.endDrawing(); if (callback) callback(); } } @@ -313,10 +313,10 @@ var Page = (function pagePage() { } return links; }, - startRendering: function pageStartRendering(ctx, callback, errorback) { + startRendering: function pageStartRendering(ctx, callback, textLayer) { this.ctx = ctx; this.callback = callback; - this.errorback = errorback; + this.textLayer = textLayer; this.startRenderingTime = Date.now(); this.pdf.startRendering(this); @@ -569,20 +569,9 @@ var PDFDoc = (function pdfDoc() { var properties = data[4]; if (file) { + // Rewrap the ArrayBuffer in a stream. var fontFileDict = new Dict(); - fontFileDict.map = file.dict.map; - - var fontFile = new Stream(file.bytes, file.start, - file.end - file.start, fontFileDict); - - // Check if this is a FlateStream. Otherwise just use the created - // Stream one. This makes complex_ttf_font.pdf work. - var cmf = file.bytes[0]; - if ((cmf & 0x0f) == 0x08) { - file = new FlateStream(fontFile); - } else { - file = fontFile; - } + file = new Stream(file, 0, file.length, fontFileDict); } // For now, resolve the font object here direclty. The real font @@ -612,8 +601,8 @@ var PDFDoc = (function pdfDoc() { messageHandler.on('page_error', function pdfDocError(data) { var page = this.pageCache[data.pageNum]; - if (page.errorback) - page.errorback(data.error); + if (page.callback) + page.callback(data.error); else throw data.error; }, this); diff --git a/src/evaluator.js b/src/evaluator.js index a863a531e..954c3bec3 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -155,6 +155,11 @@ var PartialEvaluator = (function partialEvaluator() { font.loadedName = loadedName; var translated = font.translated; + // Convert the file to an ArrayBuffer which will be turned back into + // a Stream in the main thread. + if (translated.file) + translated.file = translated.file.getBytes(); + handler.send('obj', [ loadedName, 'Font', @@ -493,6 +498,8 @@ var PartialEvaluator = (function partialEvaluator() { var baseName = encoding.get('BaseEncoding'); if (baseName) baseEncoding = Encodings[baseName.name]; + else + hasEncoding = false; // base encoding was not provided // Load the differences between the base and original if (encoding.has('Differences')) { @@ -512,6 +519,7 @@ var PartialEvaluator = (function partialEvaluator() { error('Encoding is not a Name nor a Dict'); } } + properties.differences = differences; properties.baseEncoding = baseEncoding; properties.hasEncoding = hasEncoding; @@ -554,9 +562,21 @@ var PartialEvaluator = (function partialEvaluator() { var startRange = tokens[j]; var endRange = tokens[j + 1]; var code = tokens[j + 2]; - while (startRange <= endRange) { - charToUnicode[startRange] = code++; - ++startRange; + if (code == 0xFFFF) { + // CMap is broken, assuming code == startRange + code = startRange; + } + if (isArray(code)) { + var codeindex = 0; + while (startRange <= endRange) { + charToUnicode[startRange] = code[codeindex++]; + ++startRange; + } + } else { + while (startRange <= endRange) { + charToUnicode[startRange] = code++; + ++startRange; + } } } break; @@ -595,9 +615,18 @@ var PartialEvaluator = (function partialEvaluator() { } } else if (byte == 0x3E) { if (token.length) { - // parsing hex number - tokens.push(parseInt(token, 16)); - token = ''; + if (token.length <= 4) { + // parsing hex number + tokens.push(parseInt(token, 16)); + token = ''; + } else { + // parsing hex UTF-16BE numbers + var str = []; + for (var i = 0, ii = token.length; i < ii; i += 4) + str.push(parseInt(token.substr(i, 4), 16)); + tokens.push(String.fromCharCode.apply(String, str)); + token = ''; + } } } else { token += String.fromCharCode(byte); diff --git a/src/fonts.js b/src/fonts.js index 116bb4dfc..672739ea4 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -719,20 +719,10 @@ function getUnicodeRangeFor(value) { return -1; } -function adaptUnicode(unicode) { - return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) ? - unicode + kCmapGlyphOffset : unicode; -} - -function isAdaptedUnicode(unicode) { - return unicode >= kCmapGlyphOffset && - unicode < kCmapGlyphOffset + kSizeOfGlyphArea; -} - function isSpecialUnicode(unicode) { return (unicode <= 0x1F || (unicode >= 127 && unicode < kSizeOfGlyphArea)) || - unicode >= kCmapGlyphOffset && - unicode < kCmapGlyphOffset + kSizeOfGlyphArea; + (unicode >= kCmapGlyphOffset && + unicode < kCmapGlyphOffset + kSizeOfGlyphArea); } /** @@ -771,16 +761,21 @@ var Font = (function Font() { this.widths = properties.widths; this.defaultWidth = properties.defaultWidth; this.composite = properties.composite; - this.toUnicode = properties.toUnicode; this.hasEncoding = properties.hasEncoding; this.fontMatrix = properties.fontMatrix; + this.widthMultiplier = 1.0; if (properties.type == 'Type3') return; // Trying to fix encoding using glyph CIDSystemInfo. this.loadCidToUnicode(properties); + if (properties.toUnicode) + this.toUnicode = properties.toUnicode; + else + this.rebuildToUnicode(properties); + if (!file) { // The file data is not specified. Trying to fix the font name // to be used with the canvas.font. @@ -832,6 +827,8 @@ var Font = (function Font() { this.data = data; this.fontMatrix = properties.fontMatrix; + this.widthMultiplier = !properties.fontMatrix ? 1.0 : + 1.0 / properties.fontMatrix[0]; this.encoding = properties.baseEncoding; this.hasShortCmap = properties.hasShortCmap; this.loadedName = getUniqueName(); @@ -961,15 +958,15 @@ var Font = (function Font() { var ranges = []; for (var n = 0; n < length; ) { var start = codes[n].unicode; - var startCode = codes[n].code; + var codeIndices = [codes[n].code]; ++n; var end = start; while (n < length && end + 1 == codes[n].unicode) { + codeIndices.push(codes[n].code); ++end; ++n; } - var endCode = codes[n - 1].code; - ranges.push([start, end, startCode, endCode]); + ranges.push([start, end, codeIndices]); } return ranges; @@ -1012,17 +1009,16 @@ var Font = (function Font() { idDeltas += string16(0); idRangeOffsets += string16(offset); - var startCode = range[2]; - var endCode = range[3]; - for (var j = startCode; j <= endCode; ++j) - glyphsIds += string16(deltas[j]); + var codes = range[2]; + for (var j = 0, jj = codes.length; j < jj; ++j) + glyphsIds += string16(deltas[codes[j]]); } } else { for (var i = 0; i < segCount - 1; i++) { var range = ranges[i]; var start = range[0]; var end = range[1]; - var startCode = range[2]; + var startCode = range[2][0]; startCount += string16(start); endCount += string16(end); @@ -1299,7 +1295,7 @@ var Font = (function Font() { properties.baseEncoding = encoding; } - function replaceCMapTable(cmap, font, properties) { + function readCMapTable(cmap, font) { var start = (font.start ? font.start : 0) + cmap.offset; font.pos = start; @@ -1316,7 +1312,7 @@ var Font = (function Font() { } // Check that table are sorted by platformID then encodingID, - records.sort(function fontReplaceCMapTableSort(a, b) { + records.sort(function fontReadCMapTableSort(a, b) { return ((a.platformID << 16) + a.encodingID) - ((b.platformID << 16) + b.encodingID); }); @@ -1371,16 +1367,15 @@ var Font = (function Font() { for (var j = 0; j < 256; j++) { var index = font.getByte(); if (index) { - var unicode = adaptUnicode(j); - glyphs.push({ unicode: unicode, code: j }); + glyphs.push({ unicode: j, code: j }); ids.push(index); } } - - properties.hasShortCmap = true; - - createGlyphNameMap(glyphs, ids, properties); - return cmap.data = createCMapTable(glyphs, ids); + return { + glyphs: glyphs, + ids: ids, + hasShortCmap: true + }; } else if (format == 4) { // re-creating the table in format 4 since the encoding // might be changed @@ -1432,17 +1427,18 @@ var Font = (function Font() { var glyphCode = offsetIndex < 0 ? j : offsets[offsetIndex + j - start]; glyphCode = (glyphCode + delta) & 0xFFFF; - if (glyphCode == 0 || isAdaptedUnicode(j)) + if (glyphCode == 0) continue; - var unicode = adaptUnicode(j); - glyphs.push({ unicode: unicode, code: j }); + glyphs.push({ unicode: j, code: j }); ids.push(glyphCode); } } - createGlyphNameMap(glyphs, ids, properties); - return cmap.data = createCMapTable(glyphs, ids); + return { + glyphs: glyphs, + ids: ids + }; } else if (format == 6) { // Format 6 is a 2-bytes dense mapping, which means the font data // lives glue together even if they are pretty far in the unicode @@ -1457,19 +1453,18 @@ var Font = (function Font() { for (var j = 0; j < entryCount; j++) { var glyphCode = int16(font.getBytes(2)); var code = firstCode + j; - if (isAdaptedUnicode(glyphCode)) - continue; - var unicode = adaptUnicode(code); - glyphs.push({ unicode: unicode, code: code }); + glyphs.push({ unicode: code, code: code }); ids.push(glyphCode); } - createGlyphNameMap(glyphs, ids, properties); - return cmap.data = createCMapTable(glyphs, ids); + return { + glyphs: glyphs, + ids: ids + }; } } - return cmap.data; + error('Unsupported cmap table format'); }; function sanitizeMetrics(font, header, metrics, numGlyphs) { @@ -1708,17 +1703,85 @@ var Font = (function Font() { tables.push(cmap); } - var glyphs = []; - for (i = 1; i < numGlyphs; i++) { - if (isAdaptedUnicode(i)) - continue; - - glyphs.push({ unicode: adaptUnicode(i) }); + var cidToGidMap = properties.cidToGidMap || []; + var gidToCidMap = [0]; + if (cidToGidMap.length > 0) { + for (var j = cidToGidMap.length - 1; j >= 0; j--) { + var gid = cidToGidMap[j]; + if (gid) + gidToCidMap[gid] = j; + } + // filling the gaps using CID above the CIDs currently used in font + var nextCid = cidToGidMap.length; + for (var i = 1; i < numGlyphs; i++) { + if (!gidToCidMap[i]) + gidToCidMap[i] = nextCid++; + } } - cmap.data = createCMapTable(glyphs); + + var glyphs = [], ids = []; + var usedUnicodes = []; + var unassignedUnicodeItems = []; + for (var i = 1; i < numGlyphs; i++) { + var cid = gidToCidMap[i] || i; + var unicode = this.toUnicode[cid]; + if (!unicode || isSpecialUnicode(unicode) || + unicode in usedUnicodes) { + unassignedUnicodeItems.push(i); + continue; + } + usedUnicodes[unicode] = true; + glyphs.push({ unicode: unicode, code: cid }); + ids.push(i); + } + // trying to fit as many unassigned symbols as we can + // in the range allocated for the user defined symbols + var unusedUnicode = kCmapGlyphOffset; + for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; j++) { + var i = unassignedUnicodeItems[j]; + var cid = gidToCidMap[i] || i; + while (unusedUnicode in usedUnicodes) + unusedUnicode++; + if (unusedUnicode >= kCmapGlyphOffset + kSizeOfGlyphArea) + break; + var unicode = unusedUnicode++; + this.toUnicode[cid] = unicode; + usedUnicodes[unicode] = true; + glyphs.push({ unicode: unicode, code: cid }); + ids.push(i); + } + cmap.data = createCMapTable(glyphs, ids); } else { - replaceCMapTable(cmap, font, properties); + var cmapTable = readCMapTable(cmap, font); + var glyphs = cmapTable.glyphs; + var ids = cmapTable.ids; + var hasShortCmap = !!cmapTable.hasShortCmap; + var toUnicode = this.toUnicode; + + if (hasShortCmap && toUnicode) { + // checking if cmap is just identity map + var isIdentity = true; + for (var i = 0, ii = glyphs.length; i < ii; i++) { + if (glyphs[i].unicode != i + 1) { + isIdentity = false; + break; + } + } + // if it is, replacing with meaningful toUnicode values + if (isIdentity) { + for (var i = 0, ii = glyphs.length; i < ii; i++) { + var unicode = toUnicode[i + 1] || i + 1; + glyphs[i].unicode = unicode; + } + this.useToUnicode = true; + } + } + properties.hasShortCmap = hasShortCmap; + + createGlyphNameMap(glyphs, ids, properties); this.glyphNameMap = properties.glyphNameMap; + + cmap.data = createCMapTable(glyphs, ids); } // Rewrite the 'post' table if needed @@ -1808,6 +1871,14 @@ var Font = (function Font() { } properties.baseEncoding = encoding; } + if (properties.subtype == 'CIDFontType0C') { + var toUnicode = []; + for (var i = 0; i < charstrings.length; ++i) { + var charstring = charstrings[i]; + toUnicode[charstring.code] = charstring.unicode; + } + this.toUnicode = toUnicode; + } var fields = { // PostScript Font Program @@ -1868,8 +1939,11 @@ var Font = (function Font() { // Horizontal metrics 'hmtx': (function fontFieldsHmtx() { var hmtx = '\x00\x00\x00\x00'; // Fake .notdef - for (var i = 0, ii = charstrings.length; i < ii; i++) - hmtx += string16(charstrings[i].width) + string16(0); + for (var i = 0, ii = charstrings.length; i < ii; i++) { + var charstring = charstrings[i]; + var width = 'width' in charstring ? charstring.width : 0; + hmtx += string16(width) + string16(0); + } return stringToArray(hmtx); })(), @@ -1898,17 +1972,35 @@ var Font = (function Font() { return stringToArray(otf.file); }, - loadCidToUnicode: function font_loadCidToUnicode(properties) { - if (properties.cidToGidMap) { - this.cidToUnicode = properties.cidToGidMap; - return; + rebuildToUnicode: function font_rebuildToUnicode(properties) { + var firstChar = properties.firstChar, lastChar = properties.lastChar; + var map = []; + if (properties.composite) { + var isIdentityMap = this.cidToUnicode.length == 0; + for (var i = firstChar, ii = lastChar; i <= ii; i++) { + // TODO missing map the character according font's CMap + var cid = i; + map[i] = isIdentityMap ? cid : this.cidToUnicode[cid]; + } + } else { + for (var i = firstChar, ii = lastChar; i <= ii; i++) { + var glyph = properties.differences[i]; + if (!glyph) + glyph = properties.baseEncoding[i]; + if (!!glyph && (glyph in GlyphsUnicode)) + map[i] = GlyphsUnicode[glyph]; + } } + this.toUnicode = map; + }, + loadCidToUnicode: function font_loadCidToUnicode(properties) { if (!properties.cidSystemInfo) return; - var cidToUnicodeMap = []; + var cidToUnicodeMap = [], unicodeToCIDMap = []; this.cidToUnicode = cidToUnicodeMap; + this.unicodeToCID = unicodeToCIDMap; var cidSystemInfo = properties.cidSystemInfo; var cidToUnicode; @@ -1920,28 +2012,34 @@ var Font = (function Font() { if (!cidToUnicode) return; // identity encoding - var glyph = 1, i, j, k, ii; + var cid = 1, i, j, k, ii; for (i = 0, ii = cidToUnicode.length; i < ii; ++i) { var unicode = cidToUnicode[i]; if (isArray(unicode)) { var length = unicode.length; - for (j = 0; j < length; j++) - cidToUnicodeMap[unicode[j]] = glyph; - glyph++; + for (j = 0; j < length; j++) { + cidToUnicodeMap[cid] = unicode[j]; + unicodeToCIDMap[unicode[j]] = cid; + } + cid++; } else if (typeof unicode === 'object') { var fillLength = unicode.f; if (fillLength) { k = unicode.c; for (j = 0; j < fillLength; ++j) { - cidToUnicodeMap[k] = glyph++; + cidToUnicodeMap[cid] = k; + unicodeToCIDMap[k] = cid; + cid++; k++; } } else - glyph += unicode.s; + cid += unicode.s; } else if (unicode) { - cidToUnicodeMap[unicode] = glyph++; + cidToUnicodeMap[cid] = unicode; + unicodeToCIDMap[unicode] = cid; + cid++; } else - glyph++; + cid++; } }, @@ -1981,19 +2079,19 @@ var Font = (function Font() { switch (this.type) { case 'CIDFontType0': if (this.noUnicodeAdaptation) { - width = this.widths[this.cidToUnicode[charcode]]; + width = this.widths[this.unicodeToCID[charcode] || charcode]; unicode = charcode; break; } - unicode = adaptUnicode(this.cidToUnicode[charcode] || charcode); + unicode = this.toUnicode[charcode] || charcode; break; case 'CIDFontType2': if (this.noUnicodeAdaptation) { - width = this.widths[this.cidToUnicode[charcode]]; + width = this.widths[this.unicodeToCID[charcode] || charcode]; unicode = charcode; break; } - unicode = adaptUnicode(this.cidToUnicode[charcode] || charcode); + unicode = this.toUnicode[charcode] || charcode; break; case 'Type1': var glyphName = this.differences[charcode] || this.encoding[charcode]; @@ -2004,7 +2102,7 @@ var Font = (function Font() { break; } unicode = this.glyphNameMap[glyphName] || - adaptUnicode(GlyphsUnicode[glyphName] || charcode); + GlyphsUnicode[glyphName] || charcode; break; case 'Type3': var glyphName = this.differences[charcode] || this.encoding[charcode]; @@ -2022,16 +2120,16 @@ var Font = (function Font() { break; } if (!this.hasEncoding) { - unicode = adaptUnicode(charcode); + unicode = this.useToUnicode ? this.toUnicode[charcode] : charcode; break; } - if (this.hasShortCmap) { + if (this.hasShortCmap && false) { var j = Encodings.MacRomanEncoding.indexOf(glyphName); - unicode = j >= 0 && !isSpecialUnicode(j) ? j : + unicode = j >= 0 ? j : this.glyphNameMap[glyphName]; } else { unicode = glyphName in GlyphsUnicode ? - adaptUnicode(GlyphsUnicode[glyphName]) : + GlyphsUnicode[glyphName] : this.glyphNameMap[glyphName]; } break; @@ -2039,9 +2137,17 @@ var Font = (function Font() { warn('Unsupported font type: ' + this.type); break; } + + var unicodeChars = this.toUnicode ? this.toUnicode[charcode] : charcode; + if (typeof unicodeChars === 'number') + unicodeChars = String.fromCharCode(unicodeChars); + + width = (isNum(width) ? width : this.defaultWidth) * this.widthMultiplier; + return { - unicode: unicode, - width: isNum(width) ? width : this.defaultWidth, + fontChar: String.fromCharCode(unicode), + unicode: unicodeChars, + width: width, codeIRQueue: codeIRQueue }; }, @@ -2753,22 +2859,13 @@ CFF.prototype = { getOrderedCharStrings: function cff_getOrderedCharStrings(glyphs, properties) { var charstrings = []; - var reverseMapping = {}; - var encoding = properties.baseEncoding; var i, length, glyphName; - for (i = 0, length = encoding.length; i < length; ++i) { - glyphName = encoding[i]; - if (!glyphName || isSpecialUnicode(i)) - continue; - reverseMapping[glyphName] = i; - } - reverseMapping['.notdef'] = 0; var unusedUnicode = kCmapGlyphOffset; for (i = 0, length = glyphs.length; i < length; i++) { var item = glyphs[i]; var glyphName = item.glyph; - var unicode = glyphName in reverseMapping ? - reverseMapping[glyphName] : unusedUnicode++; + var unicode = glyphName in GlyphsUnicode ? + GlyphsUnicode[glyphName] : unusedUnicode++; charstrings.push({ glyph: glyphName, unicode: unicode, @@ -3055,16 +3152,14 @@ var Type2CFF = (function type2CFF() { } var charStrings = this.parseIndex(topDict.CharStrings); - var charset = this.parseCharsets(topDict.charset, - charStrings.length, strings); - var encoding = this.parseEncoding(topDict.Encoding, properties, - strings, charset); var charset, encoding; var isCIDFont = properties.subtype == 'CIDFontType0C'; if (isCIDFont) { - charset = []; - charset.length = charStrings.length; + charset = ['.notdef']; + for (var i = 1, ii = charStrings.length; i < ii; ++i) + charset.push('glyph' + i); + encoding = this.parseCidMap(topDict.charset, charStrings.length); } else { @@ -3133,38 +3228,44 @@ var Type2CFF = (function type2CFF() { var charstrings = []; var unicodeUsed = []; var unassignedUnicodeItems = []; + var inverseEncoding = []; + for (var charcode in encoding) + inverseEncoding[encoding[charcode]] = charcode | 0; for (var i = 0, ii = charsets.length; i < ii; i++) { var glyph = charsets[i]; - var encodingFound = false; - for (var charcode in encoding) { - if (encoding[charcode] == i) { - var code = charcode | 0; - charstrings.push({ - unicode: adaptUnicode(code), - code: code, - gid: i, - glyph: glyph - }); - unicodeUsed[code] = true; - encodingFound = true; - break; - } + if (glyph == '.notdef') { + charstrings.push({ + unicode: 0, + code: 0, + gid: i, + glyph: glyph + }); + continue; } - if (!encodingFound) { + var code = inverseEncoding[i]; + if (!code || isSpecialUnicode(code)) { unassignedUnicodeItems.push(i); + continue; } + charstrings.push({ + unicode: code, + code: code, + gid: i, + glyph: glyph + }); + unicodeUsed[code] = true; } - var nextUnusedUnicode = 0x21; + var nextUnusedUnicode = kCmapGlyphOffset; for (var j = 0, jj = unassignedUnicodeItems.length; j < jj; ++j) { var i = unassignedUnicodeItems[j]; // giving unicode value anyway - while (unicodeUsed[nextUnusedUnicode]) + while (nextUnusedUnicode in unicodeUsed) nextUnusedUnicode++; - var code = nextUnusedUnicode++; + var unicode = nextUnusedUnicode++; charstrings.push({ - unicode: adaptUnicode(code), - code: code, + unicode: unicode, + code: inverseEncoding[i] || 0, gid: i, glyph: charsets[i] }); diff --git a/src/glyphlist.js b/src/glyphlist.js index 5691f8546..01b94442a 100644 --- a/src/glyphlist.js +++ b/src/glyphlist.js @@ -4287,6 +4287,7 @@ var GlyphsUnicode = { zretroflexhook: 0x0290, zstroke: 0x01B6, zuhiragana: 0x305A, - zukatakana: 0x30BA + zukatakana: 0x30BA, + '.notdef': 0x0000 }; diff --git a/src/metrics.js b/src/metrics.js index c21b4aed1..e64961aa7 100644 --- a/src/metrics.js +++ b/src/metrics.js @@ -3,6 +3,9 @@ 'use strict'; +// The Metrics object contains glyph widths (in glyph space units). +// As per PDF spec, for most fonts (Type 3 being an exception) a glyph +// space unit corresponds to 1/1000th of text space unit. var Metrics = { 'Courier': 600, 'Courier-Bold': 600, diff --git a/test/driver.js b/test/driver.js index 48ac77e65..ffaf0b53a 100644 --- a/test/driver.js +++ b/test/driver.js @@ -162,11 +162,11 @@ function nextPage(task, loadError) { page.startRendering( ctx, - function nextPageStartRendering() { - snapshotCurrentPage(task, false); - }, - function errorNextPageStartRendering(e) { - snapshotCurrentPage(task, 'render : ' + e.message); + function nextPageStartRendering(error) { + var failureMessage = false; + if (error) + failureMessage = 'render : ' + error.message; + snapshotCurrentPage(task, failureMessage); } ); } catch (e) { diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index a757acf34..325987de6 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -16,3 +16,4 @@ !alphatrans.pdf !devicen.pdf !cmykjpeg.pdf +!issue840.pdf diff --git a/test/pdfs/issue840.pdf b/test/pdfs/issue840.pdf new file mode 100644 index 000000000..6501a8c95 Binary files /dev/null and b/test/pdfs/issue840.pdf differ diff --git a/test/pdfs/piperine.pdf.link b/test/pdfs/piperine.pdf.link new file mode 100644 index 000000000..0d38690ee --- /dev/null +++ b/test/pdfs/piperine.pdf.link @@ -0,0 +1 @@ +http://www.erowid.org/archive/rhodium/chemistry/3base/piperonal.pepper/piperine.pepper/465e03piperine.pdf diff --git a/test/pdfs/protectip.pdf.link b/test/pdfs/protectip.pdf.link new file mode 100644 index 000000000..1af1bd87b --- /dev/null +++ b/test/pdfs/protectip.pdf.link @@ -0,0 +1 @@ +http://leahy.senate.gov/imo/media/doc/BillText-PROTECTIPAct.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index 87af30659..bfa131d9a 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -276,5 +276,25 @@ "link": false, "rounds": 1, "type": "eq" + }, + { "id": "protectip", + "file": "pdfs/protectip.pdf", + "md5": "676e7a7b8f96d04825361832b1838a93", + "link": true, + "rounds": 1, + "type": "eq" + }, + { "id": "piperine", + "file": "pdfs/piperine.pdf", + "md5": "603ca43dc5732dbba1579f122958c0c2", + "link": true, + "rounds": 1, + "type": "eq" + }, + { "id": "issue840", + "file": "pdfs/issue840.pdf", + "md5": "20d88011dd7e3c4fb5274979094dab93", + "rounds": 1, + "type": "eq" } ] diff --git a/web/viewer.css b/web/viewer.css index ac758f48e..a1ef92810 100644 --- a/web/viewer.css +++ b/web/viewer.css @@ -232,6 +232,27 @@ canvas { -webkit-box-shadow: 0px 2px 10px #ff0; } +.textLayer { + position: absolute; + left: 0; + top: 0; + right: 0; + bottom: 0; + color: #000; +} + +.textLayer > div { + color: transparent; + position: absolute; + line-height:1.3; +} + +/* TODO: file FF bug to support ::-moz-selection:window-inactive + so we can override the opaque grey background when the window is inactive; + see https://bugzilla.mozilla.org/show_bug.cgi?id=706209 */ +::selection { background:rgba(0,0,255,0.3); } +::-moz-selection { background:rgba(0,0,255,0.3); } + #viewer { margin: 44px 0px 0px; padding: 8px 0px; diff --git a/web/viewer.js b/web/viewer.js index 2ccb155c2..d6f5dd952 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -263,7 +263,7 @@ var PDFView = { var container = document.getElementById('viewer'); while (container.hasChildNodes()) container.removeChild(container.lastChild); - + var pdf; try { pdf = new PDFJS.PDFDoc(data); @@ -291,10 +291,10 @@ var PDFView = { pagesRefMap[pageRef.num + ' ' + pageRef.gen + ' R'] = i; } - this.setScale(scale || kDefaultScale, true); - this.pagesRefMap = pagesRefMap; this.destinations = pdf.catalog.destinations; + this.setScale(scale || kDefaultScale, true); + if (pdf.catalog.documentOutline) { this.outline = new DocumentOutlineView(pdf.catalog.documentOutline); var outlineSwitchButton = document.getElementById('outlineSwitch'); @@ -542,6 +542,10 @@ var PageView = function pageView(container, content, id, pageWidth, pageHeight, div.appendChild(canvas); this.canvas = canvas; + var textLayer = document.createElement('div'); + textLayer.className = 'textLayer'; + div.appendChild(textLayer); + var scale = this.scale; canvas.width = pageWidth * scale; canvas.height = pageHeight * scale; @@ -555,14 +559,13 @@ var PageView = function pageView(container, content, id, pageWidth, pageHeight, stats.begin = Date.now(); this.content.startRendering(ctx, - (function pageViewDrawCallback() { + (function pageViewDrawCallback(error) { + if (error) + PDFView.error('An error occurred while rendering the page.', error); this.updateStats(); if (this.onAfterDraw) this.onAfterDraw(); - }).bind(this), - function pageViewErrorback(e) { - PDFView.error('An error occurred while rendering the page.', e); - } + }).bind(this), textLayer ); setupLinks(this.content, this.scale);