diff --git a/extensions/firefox/components/PdfStreamConverter.js b/extensions/firefox/components/PdfStreamConverter.js index 2e7ab56e8..62d22162b 100644 --- a/extensions/firefox/components/PdfStreamConverter.js +++ b/extensions/firefox/components/PdfStreamConverter.js @@ -169,6 +169,9 @@ ChromeActions.prototype = { pdfBugEnabled: function() { return getBoolPref(PREF_PREFIX + '.pdfBugEnabled', false); }, + searchEnabled: function() { + return getBoolPref(PREF_PREFIX + '.searchEnabled', false); + }, fallback: function(url) { var self = this; var domWindow = this.domWindow; diff --git a/l10n/en-US/viewer.properties b/l10n/en-US/viewer.properties index 0922062a6..4ac776da2 100644 --- a/l10n/en-US/viewer.properties +++ b/l10n/en-US/viewer.properties @@ -44,3 +44,7 @@ thumb_page_title=Page {{page}} thumb_page_canvas=Thumbnail of Page {{page}} request_password=PDF is protected by a password: open_file_label=Open +search.title=Search Document +search_label=Search +search_button=Find + diff --git a/src/api.js b/src/api.js index d9a9faedd..59f9661bf 100644 --- a/src/api.js +++ b/src/api.js @@ -361,12 +361,18 @@ var PDFPageProxy = (function PDFPageProxyClosure() { next(); }, /** - * Stub for future feature. + * @return {Promise} That is resolved with the a {string} that is the text + * content from the page. */ getTextContent: function PDFPageProxy_getTextContent() { var promise = new PDFJS.Promise(); - var textContent = 'page text'; // not implemented - promise.resolve(textContent); + this.transport.messageHandler.send('GetTextContent', { + pageIndex: this.pageNumber - 1 + }, + function textContentCallback(textContent) { + promise.resolve(textContent); + } + ); return promise; }, /** diff --git a/src/core.js b/src/core.js index bef92422f..8e07078ef 100644 --- a/src/core.js +++ b/src/core.js @@ -151,6 +151,7 @@ var Page = (function PageClosure() { // fetching items var streams = []; var i, n = content.length; + var streams = []; for (i = 0; i < n; ++i) streams.push(xref.fetchIfRef(content[i])); content = new StreamsSequenceStream(streams); @@ -166,7 +167,48 @@ var Page = (function PageClosure() { return pe.getOperatorList(content, resources, dependency); }, + extractTextContent: function Page_extractTextContent() { + var handler = { + on: function nullHandlerOn() {}, + send: function nullHandlerSend() {} + }; + var xref = this.xref; + var content = xref.fetchIfRef(this.content); + var resources = xref.fetchIfRef(this.resources); + if (isArray(content)) { + // fetching items + var i, n = content.length; + var streams = []; + for (i = 0; i < n; ++i) + streams.push(xref.fetchIfRef(content[i])); + content = new StreamsSequenceStream(streams); + } else if (isStream(content)) { + content.reset(); + } + + var pe = new PartialEvaluator( + xref, handler, 'p' + this.pageNumber + '_'); + return pe.getTextContent(content, resources); + }, + + ensureFonts: function Page_ensureFonts(fonts, callback) { + this.stats.time('Font Loading'); + // Convert the font names to the corresponding font obj. + for (var i = 0, ii = fonts.length; i < ii; i++) { + fonts[i] = this.objs.objs[fonts[i]].data; + } + + // Load all the fonts + FontLoader.bind( + fonts, + function pageEnsureFontsFontObjs(fontObjs) { + this.stats.timeEnd('Font Loading'); + + callback.call(this); + }.bind(this) + ); + }, getLinks: function Page_getLinks() { var links = []; var annotations = pageGetAnnotations(); @@ -480,3 +522,4 @@ var PDFDocument = (function PDFDocumentClosure() { return PDFDocument; })(); + diff --git a/src/evaluator.js b/src/evaluator.js index 55af857e5..8849a994a 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -154,8 +154,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { font = xref.fetchIfRef(font) || fontRes.get(fontName); assertWellFormed(isDict(font)); + ++self.objIdCounter; - if (!font.translated) { + if (!font.loadedName) { font.translated = self.translateFont(font, xref, resources, dependency); if (font.translated) { @@ -469,6 +470,81 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return queue; }, + getTextContent: function partialEvaluatorGetIRQueue(stream, resources) { + + var self = this; + var xref = this.xref; + + function handleSetFont(fontName, fontRef) { + var fontRes = resources.get('Font'); + + // TODO: TOASK: Is it possible to get here? If so, what does + // args[0].name should be like??? + assert(fontRes, 'fontRes not available'); + + fontRes = xref.fetchIfRef(fontRes); + fontRef = fontRef || fontRes.get(fontName); + var font = xref.fetchIfRef(fontRef), tra; + assertWellFormed(isDict(font)); + if (!font.translated) { + font.translated = self.translateFont(font, xref, resources); + } + return font; + } + + resources = xref.fetchIfRef(resources) || new Dict(); + + var parser = new Parser(new Lexer(stream), false); + var res = resources; + var args = [], obj; + + var text = ''; + var chunk = ''; + var font = null; + while (!isEOF(obj = parser.getObj())) { + if (isCmd(obj)) { + var cmd = obj.cmd; + switch (cmd) { + case 'Tf': + font = handleSetFont(args[0].name); + break; + case 'TJ': + var items = args[0]; + for (var j = 0, jj = items.length; j < jj; j++) { + if (typeof items[j] === 'string') { + chunk += items[j]; + } else if (items[j] < 0) { + // making all negative offsets a space - better to have + // a space in incorrect place than not have them at all + chunk += ' '; + } + } + break; + case 'Tj': + chunk += args[0]; + break; + case "'": + chunk += args[0] + ' '; + break; + case '"': + chunk += args[2] + ' '; + break; + } // switch + if (chunk !== '') { + text += fontCharsToUnicode(chunk, font.translated.properties); + chunk = ''; + } + + args = []; + } else if (obj != null) { + assertWellFormed(args.length <= 33, 'Too many arguments'); + args.push(obj); + } + } + + return text; + }, + extractDataStructures: function partialEvaluatorExtractDataStructures(dict, baseDict, xref, properties) { diff --git a/src/fonts.js b/src/fonts.js index 1d498746a..72bcecbce 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -753,6 +753,736 @@ function isSpecialUnicode(unicode) { unicode < kCmapGlyphOffset + kSizeOfGlyphArea); } +// The normalization table is obtained by filtering the Unicode characters +// database with entries. +var NormalizedUnicodes = { + '\u00A8': '\u0020\u0308', + '\u00AF': '\u0020\u0304', + '\u00B4': '\u0020\u0301', + '\u00B5': '\u03BC', + '\u00B8': '\u0020\u0327', + '\u0132': '\u0049\u004A', + '\u0133': '\u0069\u006A', + '\u013F': '\u004C\u00B7', + '\u0140': '\u006C\u00B7', + '\u0149': '\u02BC\u006E', + '\u017F': '\u0073', + '\u01C4': '\u0044\u017D', + '\u01C5': '\u0044\u017E', + '\u01C6': '\u0064\u017E', + '\u01C7': '\u004C\u004A', + '\u01C8': '\u004C\u006A', + '\u01C9': '\u006C\u006A', + '\u01CA': '\u004E\u004A', + '\u01CB': '\u004E\u006A', + '\u01CC': '\u006E\u006A', + '\u01F1': '\u0044\u005A', + '\u01F2': '\u0044\u007A', + '\u01F3': '\u0064\u007A', + '\u02D8': '\u0020\u0306', + '\u02D9': '\u0020\u0307', + '\u02DA': '\u0020\u030A', + '\u02DB': '\u0020\u0328', + '\u02DC': '\u0020\u0303', + '\u02DD': '\u0020\u030B', + '\u037A': '\u0020\u0345', + '\u0384': '\u0020\u0301', + '\u03D0': '\u03B2', + '\u03D1': '\u03B8', + '\u03D2': '\u03A5', + '\u03D5': '\u03C6', + '\u03D6': '\u03C0', + '\u03F0': '\u03BA', + '\u03F1': '\u03C1', + '\u03F2': '\u03C2', + '\u03F4': '\u0398', + '\u03F5': '\u03B5', + '\u03F9': '\u03A3', + '\u0587': '\u0565\u0582', + '\u0675': '\u0627\u0674', + '\u0676': '\u0648\u0674', + '\u0677': '\u06C7\u0674', + '\u0678': '\u064A\u0674', + '\u0E33': '\u0E4D\u0E32', + '\u0EB3': '\u0ECD\u0EB2', + '\u0EDC': '\u0EAB\u0E99', + '\u0EDD': '\u0EAB\u0EA1', + '\u0F77': '\u0FB2\u0F81', + '\u0F79': '\u0FB3\u0F81', + '\u1E9A': '\u0061\u02BE', + '\u1FBD': '\u0020\u0313', + '\u1FBF': '\u0020\u0313', + '\u1FC0': '\u0020\u0342', + '\u1FFE': '\u0020\u0314', + '\u2002': '\u0020', + '\u2003': '\u0020', + '\u2004': '\u0020', + '\u2005': '\u0020', + '\u2006': '\u0020', + '\u2008': '\u0020', + '\u2009': '\u0020', + '\u200A': '\u0020', + '\u2017': '\u0020\u0333', + '\u2024': '\u002E', + '\u2025': '\u002E\u002E', + '\u2026': '\u002E\u002E\u002E', + '\u2033': '\u2032\u2032', + '\u2034': '\u2032\u2032\u2032', + '\u2036': '\u2035\u2035', + '\u2037': '\u2035\u2035\u2035', + '\u203C': '\u0021\u0021', + '\u203E': '\u0020\u0305', + '\u2047': '\u003F\u003F', + '\u2048': '\u003F\u0021', + '\u2049': '\u0021\u003F', + '\u2057': '\u2032\u2032\u2032\u2032', + '\u205F': '\u0020', + '\u20A8': '\u0052\u0073', + '\u2100': '\u0061\u002F\u0063', + '\u2101': '\u0061\u002F\u0073', + '\u2103': '\u00B0\u0043', + '\u2105': '\u0063\u002F\u006F', + '\u2106': '\u0063\u002F\u0075', + '\u2107': '\u0190', + '\u2109': '\u00B0\u0046', + '\u2116': '\u004E\u006F', + '\u2121': '\u0054\u0045\u004C', + '\u2135': '\u05D0', + '\u2136': '\u05D1', + '\u2137': '\u05D2', + '\u2138': '\u05D3', + '\u213B': '\u0046\u0041\u0058', + '\u2160': '\u0049', + '\u2161': '\u0049\u0049', + '\u2162': '\u0049\u0049\u0049', + '\u2163': '\u0049\u0056', + '\u2164': '\u0056', + '\u2165': '\u0056\u0049', + '\u2166': '\u0056\u0049\u0049', + '\u2167': '\u0056\u0049\u0049\u0049', + '\u2168': '\u0049\u0058', + '\u2169': '\u0058', + '\u216A': '\u0058\u0049', + '\u216B': '\u0058\u0049\u0049', + '\u216C': '\u004C', + '\u216D': '\u0043', + '\u216E': '\u0044', + '\u216F': '\u004D', + '\u2170': '\u0069', + '\u2171': '\u0069\u0069', + '\u2172': '\u0069\u0069\u0069', + '\u2173': '\u0069\u0076', + '\u2174': '\u0076', + '\u2175': '\u0076\u0069', + '\u2176': '\u0076\u0069\u0069', + '\u2177': '\u0076\u0069\u0069\u0069', + '\u2178': '\u0069\u0078', + '\u2179': '\u0078', + '\u217A': '\u0078\u0069', + '\u217B': '\u0078\u0069\u0069', + '\u217C': '\u006C', + '\u217D': '\u0063', + '\u217E': '\u0064', + '\u217F': '\u006D', + '\u222C': '\u222B\u222B', + '\u222D': '\u222B\u222B\u222B', + '\u222F': '\u222E\u222E', + '\u2230': '\u222E\u222E\u222E', + '\u2474': '\u0028\u0031\u0029', + '\u2475': '\u0028\u0032\u0029', + '\u2476': '\u0028\u0033\u0029', + '\u2477': '\u0028\u0034\u0029', + '\u2478': '\u0028\u0035\u0029', + '\u2479': '\u0028\u0036\u0029', + '\u247A': '\u0028\u0037\u0029', + '\u247B': '\u0028\u0038\u0029', + '\u247C': '\u0028\u0039\u0029', + '\u247D': '\u0028\u0031\u0030\u0029', + '\u247E': '\u0028\u0031\u0031\u0029', + '\u247F': '\u0028\u0031\u0032\u0029', + '\u2480': '\u0028\u0031\u0033\u0029', + '\u2481': '\u0028\u0031\u0034\u0029', + '\u2482': '\u0028\u0031\u0035\u0029', + '\u2483': '\u0028\u0031\u0036\u0029', + '\u2484': '\u0028\u0031\u0037\u0029', + '\u2485': '\u0028\u0031\u0038\u0029', + '\u2486': '\u0028\u0031\u0039\u0029', + '\u2487': '\u0028\u0032\u0030\u0029', + '\u2488': '\u0031\u002E', + '\u2489': '\u0032\u002E', + '\u248A': '\u0033\u002E', + '\u248B': '\u0034\u002E', + '\u248C': '\u0035\u002E', + '\u248D': '\u0036\u002E', + '\u248E': '\u0037\u002E', + '\u248F': '\u0038\u002E', + '\u2490': '\u0039\u002E', + '\u2491': '\u0031\u0030\u002E', + '\u2492': '\u0031\u0031\u002E', + '\u2493': '\u0031\u0032\u002E', + '\u2494': '\u0031\u0033\u002E', + '\u2495': '\u0031\u0034\u002E', + '\u2496': '\u0031\u0035\u002E', + '\u2497': '\u0031\u0036\u002E', + '\u2498': '\u0031\u0037\u002E', + '\u2499': '\u0031\u0038\u002E', + '\u249A': '\u0031\u0039\u002E', + '\u249B': '\u0032\u0030\u002E', + '\u249C': '\u0028\u0061\u0029', + '\u249D': '\u0028\u0062\u0029', + '\u249E': '\u0028\u0063\u0029', + '\u249F': '\u0028\u0064\u0029', + '\u24A0': '\u0028\u0065\u0029', + '\u24A1': '\u0028\u0066\u0029', + '\u24A2': '\u0028\u0067\u0029', + '\u24A3': '\u0028\u0068\u0029', + '\u24A4': '\u0028\u0069\u0029', + '\u24A5': '\u0028\u006A\u0029', + '\u24A6': '\u0028\u006B\u0029', + '\u24A7': '\u0028\u006C\u0029', + '\u24A8': '\u0028\u006D\u0029', + '\u24A9': '\u0028\u006E\u0029', + '\u24AA': '\u0028\u006F\u0029', + '\u24AB': '\u0028\u0070\u0029', + '\u24AC': '\u0028\u0071\u0029', + '\u24AD': '\u0028\u0072\u0029', + '\u24AE': '\u0028\u0073\u0029', + '\u24AF': '\u0028\u0074\u0029', + '\u24B0': '\u0028\u0075\u0029', + '\u24B1': '\u0028\u0076\u0029', + '\u24B2': '\u0028\u0077\u0029', + '\u24B3': '\u0028\u0078\u0029', + '\u24B4': '\u0028\u0079\u0029', + '\u24B5': '\u0028\u007A\u0029', + '\u2A0C': '\u222B\u222B\u222B\u222B', + '\u2A74': '\u003A\u003A\u003D', + '\u2A75': '\u003D\u003D', + '\u2A76': '\u003D\u003D\u003D', + '\u2E9F': '\u6BCD', + '\u2EF3': '\u9F9F', + '\u2F00': '\u4E00', + '\u2F01': '\u4E28', + '\u2F02': '\u4E36', + '\u2F03': '\u4E3F', + '\u2F04': '\u4E59', + '\u2F05': '\u4E85', + '\u2F06': '\u4E8C', + '\u2F07': '\u4EA0', + '\u2F08': '\u4EBA', + '\u2F09': '\u513F', + '\u2F0A': '\u5165', + '\u2F0B': '\u516B', + '\u2F0C': '\u5182', + '\u2F0D': '\u5196', + '\u2F0E': '\u51AB', + '\u2F0F': '\u51E0', + '\u2F10': '\u51F5', + '\u2F11': '\u5200', + '\u2F12': '\u529B', + '\u2F13': '\u52F9', + '\u2F14': '\u5315', + '\u2F15': '\u531A', + '\u2F16': '\u5338', + '\u2F17': '\u5341', + '\u2F18': '\u535C', + '\u2F19': '\u5369', + '\u2F1A': '\u5382', + '\u2F1B': '\u53B6', + '\u2F1C': '\u53C8', + '\u2F1D': '\u53E3', + '\u2F1E': '\u56D7', + '\u2F1F': '\u571F', + '\u2F20': '\u58EB', + '\u2F21': '\u5902', + '\u2F22': '\u590A', + '\u2F23': '\u5915', + '\u2F24': '\u5927', + '\u2F25': '\u5973', + '\u2F26': '\u5B50', + '\u2F27': '\u5B80', + '\u2F28': '\u5BF8', + '\u2F29': '\u5C0F', + '\u2F2A': '\u5C22', + '\u2F2B': '\u5C38', + '\u2F2C': '\u5C6E', + '\u2F2D': '\u5C71', + '\u2F2E': '\u5DDB', + '\u2F2F': '\u5DE5', + '\u2F30': '\u5DF1', + '\u2F31': '\u5DFE', + '\u2F32': '\u5E72', + '\u2F33': '\u5E7A', + '\u2F34': '\u5E7F', + '\u2F35': '\u5EF4', + '\u2F36': '\u5EFE', + '\u2F37': '\u5F0B', + '\u2F38': '\u5F13', + '\u2F39': '\u5F50', + '\u2F3A': '\u5F61', + '\u2F3B': '\u5F73', + '\u2F3C': '\u5FC3', + '\u2F3D': '\u6208', + '\u2F3E': '\u6236', + '\u2F3F': '\u624B', + '\u2F40': '\u652F', + '\u2F41': '\u6534', + '\u2F42': '\u6587', + '\u2F43': '\u6597', + '\u2F44': '\u65A4', + '\u2F45': '\u65B9', + '\u2F46': '\u65E0', + '\u2F47': '\u65E5', + '\u2F48': '\u66F0', + '\u2F49': '\u6708', + '\u2F4A': '\u6728', + '\u2F4B': '\u6B20', + '\u2F4C': '\u6B62', + '\u2F4D': '\u6B79', + '\u2F4E': '\u6BB3', + '\u2F4F': '\u6BCB', + '\u2F50': '\u6BD4', + '\u2F51': '\u6BDB', + '\u2F52': '\u6C0F', + '\u2F53': '\u6C14', + '\u2F54': '\u6C34', + '\u2F55': '\u706B', + '\u2F56': '\u722A', + '\u2F57': '\u7236', + '\u2F58': '\u723B', + '\u2F59': '\u723F', + '\u2F5A': '\u7247', + '\u2F5B': '\u7259', + '\u2F5C': '\u725B', + '\u2F5D': '\u72AC', + '\u2F5E': '\u7384', + '\u2F5F': '\u7389', + '\u2F60': '\u74DC', + '\u2F61': '\u74E6', + '\u2F62': '\u7518', + '\u2F63': '\u751F', + '\u2F64': '\u7528', + '\u2F65': '\u7530', + '\u2F66': '\u758B', + '\u2F67': '\u7592', + '\u2F68': '\u7676', + '\u2F69': '\u767D', + '\u2F6A': '\u76AE', + '\u2F6B': '\u76BF', + '\u2F6C': '\u76EE', + '\u2F6D': '\u77DB', + '\u2F6E': '\u77E2', + '\u2F6F': '\u77F3', + '\u2F70': '\u793A', + '\u2F71': '\u79B8', + '\u2F72': '\u79BE', + '\u2F73': '\u7A74', + '\u2F74': '\u7ACB', + '\u2F75': '\u7AF9', + '\u2F76': '\u7C73', + '\u2F77': '\u7CF8', + '\u2F78': '\u7F36', + '\u2F79': '\u7F51', + '\u2F7A': '\u7F8A', + '\u2F7B': '\u7FBD', + '\u2F7C': '\u8001', + '\u2F7D': '\u800C', + '\u2F7E': '\u8012', + '\u2F7F': '\u8033', + '\u2F80': '\u807F', + '\u2F81': '\u8089', + '\u2F82': '\u81E3', + '\u2F83': '\u81EA', + '\u2F84': '\u81F3', + '\u2F85': '\u81FC', + '\u2F86': '\u820C', + '\u2F87': '\u821B', + '\u2F88': '\u821F', + '\u2F89': '\u826E', + '\u2F8A': '\u8272', + '\u2F8B': '\u8278', + '\u2F8C': '\u864D', + '\u2F8D': '\u866B', + '\u2F8E': '\u8840', + '\u2F8F': '\u884C', + '\u2F90': '\u8863', + '\u2F91': '\u897E', + '\u2F92': '\u898B', + '\u2F93': '\u89D2', + '\u2F94': '\u8A00', + '\u2F95': '\u8C37', + '\u2F96': '\u8C46', + '\u2F97': '\u8C55', + '\u2F98': '\u8C78', + '\u2F99': '\u8C9D', + '\u2F9A': '\u8D64', + '\u2F9B': '\u8D70', + '\u2F9C': '\u8DB3', + '\u2F9D': '\u8EAB', + '\u2F9E': '\u8ECA', + '\u2F9F': '\u8F9B', + '\u2FA0': '\u8FB0', + '\u2FA1': '\u8FB5', + '\u2FA2': '\u9091', + '\u2FA3': '\u9149', + '\u2FA4': '\u91C6', + '\u2FA5': '\u91CC', + '\u2FA6': '\u91D1', + '\u2FA7': '\u9577', + '\u2FA8': '\u9580', + '\u2FA9': '\u961C', + '\u2FAA': '\u96B6', + '\u2FAB': '\u96B9', + '\u2FAC': '\u96E8', + '\u2FAD': '\u9751', + '\u2FAE': '\u975E', + '\u2FAF': '\u9762', + '\u2FB0': '\u9769', + '\u2FB1': '\u97CB', + '\u2FB2': '\u97ED', + '\u2FB3': '\u97F3', + '\u2FB4': '\u9801', + '\u2FB5': '\u98A8', + '\u2FB6': '\u98DB', + '\u2FB7': '\u98DF', + '\u2FB8': '\u9996', + '\u2FB9': '\u9999', + '\u2FBA': '\u99AC', + '\u2FBB': '\u9AA8', + '\u2FBC': '\u9AD8', + '\u2FBD': '\u9ADF', + '\u2FBE': '\u9B25', + '\u2FBF': '\u9B2F', + '\u2FC0': '\u9B32', + '\u2FC1': '\u9B3C', + '\u2FC2': '\u9B5A', + '\u2FC3': '\u9CE5', + '\u2FC4': '\u9E75', + '\u2FC5': '\u9E7F', + '\u2FC6': '\u9EA5', + '\u2FC7': '\u9EBB', + '\u2FC8': '\u9EC3', + '\u2FC9': '\u9ECD', + '\u2FCA': '\u9ED1', + '\u2FCB': '\u9EF9', + '\u2FCC': '\u9EFD', + '\u2FCD': '\u9F0E', + '\u2FCE': '\u9F13', + '\u2FCF': '\u9F20', + '\u2FD0': '\u9F3B', + '\u2FD1': '\u9F4A', + '\u2FD2': '\u9F52', + '\u2FD3': '\u9F8D', + '\u2FD4': '\u9F9C', + '\u2FD5': '\u9FA0', + '\u3036': '\u3012', + '\u3038': '\u5341', + '\u3039': '\u5344', + '\u303A': '\u5345', + '\u309B': '\u0020\u3099', + '\u309C': '\u0020\u309A', + '\u3131': '\u1100', + '\u3132': '\u1101', + '\u3133': '\u11AA', + '\u3134': '\u1102', + '\u3135': '\u11AC', + '\u3136': '\u11AD', + '\u3137': '\u1103', + '\u3138': '\u1104', + '\u3139': '\u1105', + '\u313A': '\u11B0', + '\u313B': '\u11B1', + '\u313C': '\u11B2', + '\u313D': '\u11B3', + '\u313E': '\u11B4', + '\u313F': '\u11B5', + '\u3140': '\u111A', + '\u3141': '\u1106', + '\u3142': '\u1107', + '\u3143': '\u1108', + '\u3144': '\u1121', + '\u3145': '\u1109', + '\u3146': '\u110A', + '\u3147': '\u110B', + '\u3148': '\u110C', + '\u3149': '\u110D', + '\u314A': '\u110E', + '\u314B': '\u110F', + '\u314C': '\u1110', + '\u314D': '\u1111', + '\u314E': '\u1112', + '\u314F': '\u1161', + '\u3150': '\u1162', + '\u3151': '\u1163', + '\u3152': '\u1164', + '\u3153': '\u1165', + '\u3154': '\u1166', + '\u3155': '\u1167', + '\u3156': '\u1168', + '\u3157': '\u1169', + '\u3158': '\u116A', + '\u3159': '\u116B', + '\u315A': '\u116C', + '\u315B': '\u116D', + '\u315C': '\u116E', + '\u315D': '\u116F', + '\u315E': '\u1170', + '\u315F': '\u1171', + '\u3160': '\u1172', + '\u3161': '\u1173', + '\u3162': '\u1174', + '\u3163': '\u1175', + '\u3164': '\u1160', + '\u3165': '\u1114', + '\u3166': '\u1115', + '\u3167': '\u11C7', + '\u3168': '\u11C8', + '\u3169': '\u11CC', + '\u316A': '\u11CE', + '\u316B': '\u11D3', + '\u316C': '\u11D7', + '\u316D': '\u11D9', + '\u316E': '\u111C', + '\u316F': '\u11DD', + '\u3170': '\u11DF', + '\u3171': '\u111D', + '\u3172': '\u111E', + '\u3173': '\u1120', + '\u3174': '\u1122', + '\u3175': '\u1123', + '\u3176': '\u1127', + '\u3177': '\u1129', + '\u3178': '\u112B', + '\u3179': '\u112C', + '\u317A': '\u112D', + '\u317B': '\u112E', + '\u317C': '\u112F', + '\u317D': '\u1132', + '\u317E': '\u1136', + '\u317F': '\u1140', + '\u3180': '\u1147', + '\u3181': '\u114C', + '\u3182': '\u11F1', + '\u3183': '\u11F2', + '\u3184': '\u1157', + '\u3185': '\u1158', + '\u3186': '\u1159', + '\u3187': '\u1184', + '\u3188': '\u1185', + '\u3189': '\u1188', + '\u318A': '\u1191', + '\u318B': '\u1192', + '\u318C': '\u1194', + '\u318D': '\u119E', + '\u318E': '\u11A1', + '\u3200': '\u0028\u1100\u0029', + '\u3201': '\u0028\u1102\u0029', + '\u3202': '\u0028\u1103\u0029', + '\u3203': '\u0028\u1105\u0029', + '\u3204': '\u0028\u1106\u0029', + '\u3205': '\u0028\u1107\u0029', + '\u3206': '\u0028\u1109\u0029', + '\u3207': '\u0028\u110B\u0029', + '\u3208': '\u0028\u110C\u0029', + '\u3209': '\u0028\u110E\u0029', + '\u320A': '\u0028\u110F\u0029', + '\u320B': '\u0028\u1110\u0029', + '\u320C': '\u0028\u1111\u0029', + '\u320D': '\u0028\u1112\u0029', + '\u320E': '\u0028\u1100\u1161\u0029', + '\u320F': '\u0028\u1102\u1161\u0029', + '\u3210': '\u0028\u1103\u1161\u0029', + '\u3211': '\u0028\u1105\u1161\u0029', + '\u3212': '\u0028\u1106\u1161\u0029', + '\u3213': '\u0028\u1107\u1161\u0029', + '\u3214': '\u0028\u1109\u1161\u0029', + '\u3215': '\u0028\u110B\u1161\u0029', + '\u3216': '\u0028\u110C\u1161\u0029', + '\u3217': '\u0028\u110E\u1161\u0029', + '\u3218': '\u0028\u110F\u1161\u0029', + '\u3219': '\u0028\u1110\u1161\u0029', + '\u321A': '\u0028\u1111\u1161\u0029', + '\u321B': '\u0028\u1112\u1161\u0029', + '\u321C': '\u0028\u110C\u116E\u0029', + '\u321D': '\u0028\u110B\u1169\u110C\u1165\u11AB\u0029', + '\u321E': '\u0028\u110B\u1169\u1112\u116E\u0029', + '\u3220': '\u0028\u4E00\u0029', + '\u3221': '\u0028\u4E8C\u0029', + '\u3222': '\u0028\u4E09\u0029', + '\u3223': '\u0028\u56DB\u0029', + '\u3224': '\u0028\u4E94\u0029', + '\u3225': '\u0028\u516D\u0029', + '\u3226': '\u0028\u4E03\u0029', + '\u3227': '\u0028\u516B\u0029', + '\u3228': '\u0028\u4E5D\u0029', + '\u3229': '\u0028\u5341\u0029', + '\u322A': '\u0028\u6708\u0029', + '\u322B': '\u0028\u706B\u0029', + '\u322C': '\u0028\u6C34\u0029', + '\u322D': '\u0028\u6728\u0029', + '\u322E': '\u0028\u91D1\u0029', + '\u322F': '\u0028\u571F\u0029', + '\u3230': '\u0028\u65E5\u0029', + '\u3231': '\u0028\u682A\u0029', + '\u3232': '\u0028\u6709\u0029', + '\u3233': '\u0028\u793E\u0029', + '\u3234': '\u0028\u540D\u0029', + '\u3235': '\u0028\u7279\u0029', + '\u3236': '\u0028\u8CA1\u0029', + '\u3237': '\u0028\u795D\u0029', + '\u3238': '\u0028\u52B4\u0029', + '\u3239': '\u0028\u4EE3\u0029', + '\u323A': '\u0028\u547C\u0029', + '\u323B': '\u0028\u5B66\u0029', + '\u323C': '\u0028\u76E3\u0029', + '\u323D': '\u0028\u4F01\u0029', + '\u323E': '\u0028\u8CC7\u0029', + '\u323F': '\u0028\u5354\u0029', + '\u3240': '\u0028\u796D\u0029', + '\u3241': '\u0028\u4F11\u0029', + '\u3242': '\u0028\u81EA\u0029', + '\u3243': '\u0028\u81F3\u0029', + '\u32C0': '\u0031\u6708', + '\u32C1': '\u0032\u6708', + '\u32C2': '\u0033\u6708', + '\u32C3': '\u0034\u6708', + '\u32C4': '\u0035\u6708', + '\u32C5': '\u0036\u6708', + '\u32C6': '\u0037\u6708', + '\u32C7': '\u0038\u6708', + '\u32C8': '\u0039\u6708', + '\u32C9': '\u0031\u0030\u6708', + '\u32CA': '\u0031\u0031\u6708', + '\u32CB': '\u0031\u0032\u6708', + '\u3358': '\u0030\u70B9', + '\u3359': '\u0031\u70B9', + '\u335A': '\u0032\u70B9', + '\u335B': '\u0033\u70B9', + '\u335C': '\u0034\u70B9', + '\u335D': '\u0035\u70B9', + '\u335E': '\u0036\u70B9', + '\u335F': '\u0037\u70B9', + '\u3360': '\u0038\u70B9', + '\u3361': '\u0039\u70B9', + '\u3362': '\u0031\u0030\u70B9', + '\u3363': '\u0031\u0031\u70B9', + '\u3364': '\u0031\u0032\u70B9', + '\u3365': '\u0031\u0033\u70B9', + '\u3366': '\u0031\u0034\u70B9', + '\u3367': '\u0031\u0035\u70B9', + '\u3368': '\u0031\u0036\u70B9', + '\u3369': '\u0031\u0037\u70B9', + '\u336A': '\u0031\u0038\u70B9', + '\u336B': '\u0031\u0039\u70B9', + '\u336C': '\u0032\u0030\u70B9', + '\u336D': '\u0032\u0031\u70B9', + '\u336E': '\u0032\u0032\u70B9', + '\u336F': '\u0032\u0033\u70B9', + '\u3370': '\u0032\u0034\u70B9', + '\u33E0': '\u0031\u65E5', + '\u33E1': '\u0032\u65E5', + '\u33E2': '\u0033\u65E5', + '\u33E3': '\u0034\u65E5', + '\u33E4': '\u0035\u65E5', + '\u33E5': '\u0036\u65E5', + '\u33E6': '\u0037\u65E5', + '\u33E7': '\u0038\u65E5', + '\u33E8': '\u0039\u65E5', + '\u33E9': '\u0031\u0030\u65E5', + '\u33EA': '\u0031\u0031\u65E5', + '\u33EB': '\u0031\u0032\u65E5', + '\u33EC': '\u0031\u0033\u65E5', + '\u33ED': '\u0031\u0034\u65E5', + '\u33EE': '\u0031\u0035\u65E5', + '\u33EF': '\u0031\u0036\u65E5', + '\u33F0': '\u0031\u0037\u65E5', + '\u33F1': '\u0031\u0038\u65E5', + '\u33F2': '\u0031\u0039\u65E5', + '\u33F3': '\u0032\u0030\u65E5', + '\u33F4': '\u0032\u0031\u65E5', + '\u33F5': '\u0032\u0032\u65E5', + '\u33F6': '\u0032\u0033\u65E5', + '\u33F7': '\u0032\u0034\u65E5', + '\u33F8': '\u0032\u0035\u65E5', + '\u33F9': '\u0032\u0036\u65E5', + '\u33FA': '\u0032\u0037\u65E5', + '\u33FB': '\u0032\u0038\u65E5', + '\u33FC': '\u0032\u0039\u65E5', + '\u33FD': '\u0033\u0030\u65E5', + '\u33FE': '\u0033\u0031\u65E5', + '\uFB00': '\u0066\u0066', + '\uFB01': '\u0066\u0069', + '\uFB02': '\u0066\u006C', + '\uFB03': '\u0066\u0066\u0069', + '\uFB04': '\u0066\u0066\u006C', + '\uFB05': '\u017F\u0074', + '\uFB06': '\u0073\u0074', + '\uFB13': '\u0574\u0576', + '\uFB14': '\u0574\u0565', + '\uFB15': '\u0574\u056B', + '\uFB16': '\u057E\u0576', + '\uFB17': '\u0574\u056D', + '\uFB4F': '\u05D0\u05DC', + '\uFE49': '\u203E', + '\uFE4A': '\u203E', + '\uFE4B': '\u203E', + '\uFE4C': '\u203E', + '\uFE4D': '\u005F', + '\uFE4E': '\u005F', + '\uFE4F': '\u005F' +}; + +function fontCharsToUnicode(charCodes, fontProperties) { + var toUnicode = fontProperties.toUnicode; + var composite = fontProperties.composite; + var encoding, differences, cidToUnicode; + var result = ''; + if (composite) { + cidToUnicode = fontProperties.cidToUnicode; + for (var i = 0, ii = charCodes.length; i < ii; i += 2) { + var charCode = (charCodes.charCodeAt(i) << 8) | + charCodes.charCodeAt(i + 1); + if (toUnicode && charCode in toUnicode) { + var unicode = toUnicode[charCode]; + result += typeof unicode !== 'number' ? unicode : + String.fromCharCode(unicode); + continue; + } + result += String.fromCharCode(!cidToUnicode ? charCode : + cidToUnicode[charCode] || charCode); + } + } else { + differences = fontProperties.differences; + encoding = fontProperties.baseEncoding; + for (var i = 0, ii = charCodes.length; i < ii; i++) { + var charCode = charCodes.charCodeAt(i); + var unicode; + if (toUnicode && charCode in toUnicode) { + var unicode = toUnicode[charCode]; + result += typeof unicode !== 'number' ? unicode : + String.fromCharCode(unicode); + continue; + } + + var glyphName = charCode in differences ? differences[charCode] : + encoding[charCode]; + if (glyphName in GlyphsUnicode) { + result += String.fromCharCode(GlyphsUnicode[glyphName]); + continue; + } + result += String.fromCharCode(charCode); + } + } + // normalizing the unicode characters + for (var i = 0, ii = result.length; i < ii; i++) { + if (!(result[i] in NormalizedUnicodes)) + continue; + result = result.substring(0, i) + NormalizedUnicodes[result[i]] + + result.substring(i + 1); + ii = result.length; + } + return result; +} + /** * 'Font' is the class the outside world should use, it encapsulate all the font * decoding logics whatever type it is (assuming the font type is supported). @@ -2478,6 +3208,7 @@ var Font = (function FontClosure() { } // MacRoman encoding address by re-encoding the cmap table + fontCharCode = glyphName in this.glyphNameMap ? this.glyphNameMap[glyphName] : GlyphsUnicode[glyphName]; break; diff --git a/src/worker.js b/src/worker.js index e362b0a91..a93c36e95 100644 --- a/src/worker.js +++ b/src/worker.js @@ -147,7 +147,6 @@ var WorkerMessageHandler = { handler.on('RenderPageRequest', function wphSetupRenderPage(data) { var pageNum = data.pageIndex + 1; - // The following code does quite the same as // Page.prototype.startRendering, but stops at one point and sends the // result back to the main thread. @@ -207,6 +206,24 @@ var WorkerMessageHandler = { depFonts: Object.keys(fonts) }); }, this); + + handler.on('GetTextContent', function wphExtractText(data, promise) { + var pageNum = data.pageIndex + 1; + var start = Date.now(); + + var textContent = ''; + try { + var page = pdfModel.getPage(pageNum); + textContent = page.extractTextContent(); + promise.resolve(textContent); + } catch (e) { + // Skip errored pages + promise.reject(e); + } + + console.log('text indexing: page=%d - time=%dms', + pageNum, Date.now() - start); + }); } }; diff --git a/web/images/toolbarButton-search.png b/web/images/toolbarButton-search.png new file mode 100644 index 000000000..78878b417 Binary files /dev/null and b/web/images/toolbarButton-search.png differ diff --git a/web/viewer.css b/web/viewer.css index c7c8c7cf3..2d2864551 100644 --- a/web/viewer.css +++ b/web/viewer.css @@ -238,6 +238,10 @@ html[dir='rtl'] .splitToolbarButton > .toolbarButton { opacity: .5; } +.toolbarButton.group { + margin-right:0; +} + .splitToolbarButton:hover > .toolbarButton, .splitToolbarButton:focus > .toolbarButton, .splitToolbarButton.toggled > .toolbarButton { @@ -533,6 +537,12 @@ html[dir='rtl'] .toolbarButton.pageDown::before { content: url(images/toolbarButton-viewOutline.png); } +#viewSearch.toolbarButton::before { + display: inline-block; + content: url(images/toolbarButton-search.png); +} + + .toolbarField { min-width: 16px; width: 32px; @@ -697,6 +707,53 @@ a:focus > .thumbnail > .thumbnailSelectionRing, color: hsla(0,0%,100%,.9); } +#searchScrollView { + position: absolute; + top: 10px; + bottom: 10px; + left: 10px; + width: 280px; +} + +#searchToolbar { + padding-left: 0px; + right: 0px; + padding-top: 0px; + padding-bottom: 5px; +} + +#searchToolbar > input { + margin-left: 8px; + width: 130px; +} + +#searchResults { + overflow: auto; + background-color: #fff; + position: absolute; + top: 30px; + bottom: 0px; + left: 0px; + right: 0; + font-size: smaller; + opacity: 0.7; +} + +#searchResults a { + display: block; + white-space: pre; + text-decoration: none; + color: black; +} + +#sidebarControls { + position:absolute; + width: 180px; + height: 32px; + left: 15px; + bottom: 35px; +} + .outlineItem.selected { background-color: hsla(0,0%,100%,.08); background-image: -moz-linear-gradient(hsla(0,0%,100%,.05), hsla(0,0%,100%,0)); diff --git a/web/viewer.html b/web/viewer.html index 4a307c7ef..087fb7af3 100644 --- a/web/viewer.html +++ b/web/viewer.html @@ -45,20 +45,28 @@
-
- - -
+ + +
+
@@ -68,27 +76,27 @@
-
-
-
- +
- @@ -107,16 +115,16 @@
-
-
- diff --git a/web/viewer.js b/web/viewer.js index 2b9067719..e6d1b6766 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -219,6 +219,8 @@ var PDFView = { currentScale: kUnknownScale, currentScaleValue: null, initialBookmark: document.location.hash.substring(1), + startedTextExtraction: false, + pageText: [], container: null, initialized: false, fellback: false, @@ -579,6 +581,8 @@ var PDFView = { } var pages = this.pages = []; + this.pageText = []; + this.startedTextExtraction = false; var pagesRefMap = {}; var thumbnails = this.thumbnails = []; var pagePromises = []; @@ -659,6 +663,67 @@ var PDFView = { } }, + search: function pdfViewStartSearch() { + // Limit this function to run every ms. + var SEARCH_TIMEOUT = 250; + var lastSeach = this.lastSearch; + var now = Date.now(); + if (lastSeach && (now - lastSeach) < SEARCH_TIMEOUT) { + if (!this.searchTimer) { + this.searchTimer = setTimeout(function resumeSearch() { + PDFView.search(); + }, + SEARCH_TIMEOUT - (now - lastSeach) + ); + } + return; + } + this.searchTimer = null; + this.lastSearch = now; + + function bindLink(link, pageNumber) { + link.href = '#' + pageNumber; + link.onclick = function searchBindLink() { + PDFView.page = pageNumber; + return false; + }; + } + + var searchResults = document.getElementById('searchResults'); + + var searchTermsInput = document.getElementById('searchTermsInput'); + searchResults.removeAttribute('hidden'); + searchResults.textContent = ''; + + var terms = searchTermsInput.value; + + if (!terms) + return; + + // simple search: removing spaces and hyphens, then scanning every + terms = terms.replace(/\s-/g, '').toLowerCase(); + var index = PDFView.pageText; + var pageFound = false; + for (var i = 0, ii = index.length; i < ii; i++) { + var pageText = index[i].replace(/\s-/g, '').toLowerCase(); + var j = pageText.indexOf(terms); + if (j < 0) + continue; + + var pageNumber = i + 1; + var textSample = index[i].substr(j, 50); + var link = document.createElement('a'); + bindLink(link, pageNumber); + link.textContent = 'Page ' + pageNumber + ': ' + textSample; + searchResults.appendChild(link); + + pageFound = true; + } + if (!pageFound) { + searchResults.textContent = '(Not found)'; + } + }, + setHash: function pdfViewSetHash(hash) { if (!hash) return; @@ -701,26 +766,70 @@ var PDFView = { switchSidebarView: function pdfViewSwitchSidebarView(view) { var thumbsView = document.getElementById('thumbnailView'); var outlineView = document.getElementById('outlineView'); - var thumbsSwitchButton = document.getElementById('viewThumbnail'); - var outlineSwitchButton = document.getElementById('viewOutline'); + var searchView = document.getElementById('searchView'); - if (outlineSwitchButton.getAttribute('disabled')) - return; - - thumbsView.classList.toggle('hidden'); - outlineView.classList.toggle('hidden'); - document.getElementById('viewThumbnail').classList.toggle('toggled'); - document.getElementById('viewOutline').classList.toggle('toggled'); + var thumbsButton = document.getElementById('viewThumbnail'); + var outlineButton = document.getElementById('viewOutline'); + var searchButton = document.getElementById('viewSearch'); switch (view) { case 'thumbs': + thumbsButton.classList.add('toggled'); + outlineButton.classList.remove('toggled'); + searchButton.classList.remove('toggled'); + thumbsView.classList.remove('hidden'); + outlineView.classList.add('hidden'); + searchView.classList.add('hidden'); + updateThumbViewArea(); break; + case 'outline': + thumbsButton.classList.remove('toggled'); + outlineButton.classList.add('toggled'); + searchButton.classList.remove('toggled'); + thumbsView.classList.add('hidden'); + outlineView.classList.remove('hidden'); + searchView.classList.add('hidden'); + + if (outlineButton.getAttribute('disabled')) + return; + break; + + case 'search': + thumbsButton.classList.remove('toggled'); + outlineButton.classList.remove('toggled'); + searchButton.classList.add('toggled'); + thumbsView.classList.add('hidden'); + outlineView.classList.add('hidden'); + searchView.classList.remove('hidden'); + + var searchTermsInput = document.getElementById('searchTermsInput'); + searchTermsInput.focus(); + // Start text extraction as soon as the search gets displayed. + this.extractText(); break; } }, + extractText: function() { + if (this.startedTextExtraction) + return; + this.startedTextExtraction = true; + var self = this; + function extractPageText(pageIndex) { + self.pages[pageIndex].pdfPage.getTextContent().then( + function textContentResolved(textContent) { + self.pageText[pageIndex] = textContent; + self.search(); + if ((pageIndex + 1) < self.pages.length) + extractPageText(pageIndex + 1); + } + ); + }; + extractPageText(0); + }, + getVisiblePages: function pdfViewGetVisiblePages() { var pages = this.pages; var kBottomMargin = 10; @@ -1418,6 +1527,11 @@ window.addEventListener('load', function webViewerLoad(evt) { PDFBug.init(); } + if (!PDFJS.isFirefoxExtension || + (PDFJS.isFirefoxExtension && FirefoxCom.request('searchEnabled'))) { + document.querySelector('#viewSearch').classList.remove('hidden'); + } + // Listen for warnings to trigger the fallback UI. Errors should be caught // and call PDFView.error() so we don't need to listen for those. PDFJS.LogManager.addLogger({