Merge pull request #1674 from mozilla/textsearch

Text search with new API, new UI
This commit is contained in:
Brendan Dahl 2012-05-29 10:16:18 -07:00
commit d9764ab468
11 changed files with 1089 additions and 30 deletions

View File

@ -169,6 +169,9 @@ ChromeActions.prototype = {
pdfBugEnabled: function() { pdfBugEnabled: function() {
return getBoolPref(PREF_PREFIX + '.pdfBugEnabled', false); return getBoolPref(PREF_PREFIX + '.pdfBugEnabled', false);
}, },
searchEnabled: function() {
return getBoolPref(PREF_PREFIX + '.searchEnabled', false);
},
fallback: function(url) { fallback: function(url) {
var self = this; var self = this;
var domWindow = this.domWindow; var domWindow = this.domWindow;

View File

@ -44,3 +44,7 @@ thumb_page_title=Page {{page}}
thumb_page_canvas=Thumbnail of Page {{page}} thumb_page_canvas=Thumbnail of Page {{page}}
request_password=PDF is protected by a password: request_password=PDF is protected by a password:
open_file_label=Open open_file_label=Open
search.title=Search Document
search_label=Search
search_button=Find

View File

@ -361,12 +361,18 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
next(); next();
}, },
/** /**
* Stub for future feature. * @return {Promise} That is resolved with the a {string} that is the text
* content from the page.
*/ */
getTextContent: function PDFPageProxy_getTextContent() { getTextContent: function PDFPageProxy_getTextContent() {
var promise = new PDFJS.Promise(); var promise = new PDFJS.Promise();
var textContent = 'page text'; // not implemented this.transport.messageHandler.send('GetTextContent', {
pageIndex: this.pageNumber - 1
},
function textContentCallback(textContent) {
promise.resolve(textContent); promise.resolve(textContent);
}
);
return promise; return promise;
}, },
/** /**

View File

@ -151,6 +151,7 @@ var Page = (function PageClosure() {
// fetching items // fetching items
var streams = []; var streams = [];
var i, n = content.length; var i, n = content.length;
var streams = [];
for (i = 0; i < n; ++i) for (i = 0; i < n; ++i)
streams.push(xref.fetchIfRef(content[i])); streams.push(xref.fetchIfRef(content[i]));
content = new StreamsSequenceStream(streams); content = new StreamsSequenceStream(streams);
@ -166,7 +167,48 @@ var Page = (function PageClosure() {
return pe.getOperatorList(content, resources, dependency); return pe.getOperatorList(content, resources, dependency);
}, },
extractTextContent: function Page_extractTextContent() {
var handler = {
on: function nullHandlerOn() {},
send: function nullHandlerSend() {}
};
var xref = this.xref;
var content = xref.fetchIfRef(this.content);
var resources = xref.fetchIfRef(this.resources);
if (isArray(content)) {
// fetching items
var i, n = content.length;
var streams = [];
for (i = 0; i < n; ++i)
streams.push(xref.fetchIfRef(content[i]));
content = new StreamsSequenceStream(streams);
} else if (isStream(content)) {
content.reset();
}
var pe = new PartialEvaluator(
xref, handler, 'p' + this.pageNumber + '_');
return pe.getTextContent(content, resources);
},
ensureFonts: function Page_ensureFonts(fonts, callback) {
this.stats.time('Font Loading');
// Convert the font names to the corresponding font obj.
for (var i = 0, ii = fonts.length; i < ii; i++) {
fonts[i] = this.objs.objs[fonts[i]].data;
}
// Load all the fonts
FontLoader.bind(
fonts,
function pageEnsureFontsFontObjs(fontObjs) {
this.stats.timeEnd('Font Loading');
callback.call(this);
}.bind(this)
);
},
getLinks: function Page_getLinks() { getLinks: function Page_getLinks() {
var links = []; var links = [];
var annotations = pageGetAnnotations(); var annotations = pageGetAnnotations();
@ -480,3 +522,4 @@ var PDFDocument = (function PDFDocumentClosure() {
return PDFDocument; return PDFDocument;
})(); })();

View File

@ -154,8 +154,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
font = xref.fetchIfRef(font) || fontRes.get(fontName); font = xref.fetchIfRef(font) || fontRes.get(fontName);
assertWellFormed(isDict(font)); assertWellFormed(isDict(font));
++self.objIdCounter; ++self.objIdCounter;
if (!font.translated) { if (!font.loadedName) {
font.translated = self.translateFont(font, xref, resources, font.translated = self.translateFont(font, xref, resources,
dependency); dependency);
if (font.translated) { if (font.translated) {
@ -469,6 +470,81 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return queue; return queue;
}, },
getTextContent: function partialEvaluatorGetIRQueue(stream, resources) {
var self = this;
var xref = this.xref;
function handleSetFont(fontName, fontRef) {
var fontRes = resources.get('Font');
// TODO: TOASK: Is it possible to get here? If so, what does
// args[0].name should be like???
assert(fontRes, 'fontRes not available');
fontRes = xref.fetchIfRef(fontRes);
fontRef = fontRef || fontRes.get(fontName);
var font = xref.fetchIfRef(fontRef), tra;
assertWellFormed(isDict(font));
if (!font.translated) {
font.translated = self.translateFont(font, xref, resources);
}
return font;
}
resources = xref.fetchIfRef(resources) || new Dict();
var parser = new Parser(new Lexer(stream), false);
var res = resources;
var args = [], obj;
var text = '';
var chunk = '';
var font = null;
while (!isEOF(obj = parser.getObj())) {
if (isCmd(obj)) {
var cmd = obj.cmd;
switch (cmd) {
case 'Tf':
font = handleSetFont(args[0].name);
break;
case 'TJ':
var items = args[0];
for (var j = 0, jj = items.length; j < jj; j++) {
if (typeof items[j] === 'string') {
chunk += items[j];
} else if (items[j] < 0) {
// making all negative offsets a space - better to have
// a space in incorrect place than not have them at all
chunk += ' ';
}
}
break;
case 'Tj':
chunk += args[0];
break;
case "'":
chunk += args[0] + ' ';
break;
case '"':
chunk += args[2] + ' ';
break;
} // switch
if (chunk !== '') {
text += fontCharsToUnicode(chunk, font.translated.properties);
chunk = '';
}
args = [];
} else if (obj != null) {
assertWellFormed(args.length <= 33, 'Too many arguments');
args.push(obj);
}
}
return text;
},
extractDataStructures: function extractDataStructures: function
partialEvaluatorExtractDataStructures(dict, baseDict, partialEvaluatorExtractDataStructures(dict, baseDict,
xref, properties) { xref, properties) {

View File

@ -753,6 +753,736 @@ function isSpecialUnicode(unicode) {
unicode < kCmapGlyphOffset + kSizeOfGlyphArea); unicode < kCmapGlyphOffset + kSizeOfGlyphArea);
} }
// The normalization table is obtained by filtering the Unicode characters
// database with <compat> entries.
var NormalizedUnicodes = {
'\u00A8': '\u0020\u0308',
'\u00AF': '\u0020\u0304',
'\u00B4': '\u0020\u0301',
'\u00B5': '\u03BC',
'\u00B8': '\u0020\u0327',
'\u0132': '\u0049\u004A',
'\u0133': '\u0069\u006A',
'\u013F': '\u004C\u00B7',
'\u0140': '\u006C\u00B7',
'\u0149': '\u02BC\u006E',
'\u017F': '\u0073',
'\u01C4': '\u0044\u017D',
'\u01C5': '\u0044\u017E',
'\u01C6': '\u0064\u017E',
'\u01C7': '\u004C\u004A',
'\u01C8': '\u004C\u006A',
'\u01C9': '\u006C\u006A',
'\u01CA': '\u004E\u004A',
'\u01CB': '\u004E\u006A',
'\u01CC': '\u006E\u006A',
'\u01F1': '\u0044\u005A',
'\u01F2': '\u0044\u007A',
'\u01F3': '\u0064\u007A',
'\u02D8': '\u0020\u0306',
'\u02D9': '\u0020\u0307',
'\u02DA': '\u0020\u030A',
'\u02DB': '\u0020\u0328',
'\u02DC': '\u0020\u0303',
'\u02DD': '\u0020\u030B',
'\u037A': '\u0020\u0345',
'\u0384': '\u0020\u0301',
'\u03D0': '\u03B2',
'\u03D1': '\u03B8',
'\u03D2': '\u03A5',
'\u03D5': '\u03C6',
'\u03D6': '\u03C0',
'\u03F0': '\u03BA',
'\u03F1': '\u03C1',
'\u03F2': '\u03C2',
'\u03F4': '\u0398',
'\u03F5': '\u03B5',
'\u03F9': '\u03A3',
'\u0587': '\u0565\u0582',
'\u0675': '\u0627\u0674',
'\u0676': '\u0648\u0674',
'\u0677': '\u06C7\u0674',
'\u0678': '\u064A\u0674',
'\u0E33': '\u0E4D\u0E32',
'\u0EB3': '\u0ECD\u0EB2',
'\u0EDC': '\u0EAB\u0E99',
'\u0EDD': '\u0EAB\u0EA1',
'\u0F77': '\u0FB2\u0F81',
'\u0F79': '\u0FB3\u0F81',
'\u1E9A': '\u0061\u02BE',
'\u1FBD': '\u0020\u0313',
'\u1FBF': '\u0020\u0313',
'\u1FC0': '\u0020\u0342',
'\u1FFE': '\u0020\u0314',
'\u2002': '\u0020',
'\u2003': '\u0020',
'\u2004': '\u0020',
'\u2005': '\u0020',
'\u2006': '\u0020',
'\u2008': '\u0020',
'\u2009': '\u0020',
'\u200A': '\u0020',
'\u2017': '\u0020\u0333',
'\u2024': '\u002E',
'\u2025': '\u002E\u002E',
'\u2026': '\u002E\u002E\u002E',
'\u2033': '\u2032\u2032',
'\u2034': '\u2032\u2032\u2032',
'\u2036': '\u2035\u2035',
'\u2037': '\u2035\u2035\u2035',
'\u203C': '\u0021\u0021',
'\u203E': '\u0020\u0305',
'\u2047': '\u003F\u003F',
'\u2048': '\u003F\u0021',
'\u2049': '\u0021\u003F',
'\u2057': '\u2032\u2032\u2032\u2032',
'\u205F': '\u0020',
'\u20A8': '\u0052\u0073',
'\u2100': '\u0061\u002F\u0063',
'\u2101': '\u0061\u002F\u0073',
'\u2103': '\u00B0\u0043',
'\u2105': '\u0063\u002F\u006F',
'\u2106': '\u0063\u002F\u0075',
'\u2107': '\u0190',
'\u2109': '\u00B0\u0046',
'\u2116': '\u004E\u006F',
'\u2121': '\u0054\u0045\u004C',
'\u2135': '\u05D0',
'\u2136': '\u05D1',
'\u2137': '\u05D2',
'\u2138': '\u05D3',
'\u213B': '\u0046\u0041\u0058',
'\u2160': '\u0049',
'\u2161': '\u0049\u0049',
'\u2162': '\u0049\u0049\u0049',
'\u2163': '\u0049\u0056',
'\u2164': '\u0056',
'\u2165': '\u0056\u0049',
'\u2166': '\u0056\u0049\u0049',
'\u2167': '\u0056\u0049\u0049\u0049',
'\u2168': '\u0049\u0058',
'\u2169': '\u0058',
'\u216A': '\u0058\u0049',
'\u216B': '\u0058\u0049\u0049',
'\u216C': '\u004C',
'\u216D': '\u0043',
'\u216E': '\u0044',
'\u216F': '\u004D',
'\u2170': '\u0069',
'\u2171': '\u0069\u0069',
'\u2172': '\u0069\u0069\u0069',
'\u2173': '\u0069\u0076',
'\u2174': '\u0076',
'\u2175': '\u0076\u0069',
'\u2176': '\u0076\u0069\u0069',
'\u2177': '\u0076\u0069\u0069\u0069',
'\u2178': '\u0069\u0078',
'\u2179': '\u0078',
'\u217A': '\u0078\u0069',
'\u217B': '\u0078\u0069\u0069',
'\u217C': '\u006C',
'\u217D': '\u0063',
'\u217E': '\u0064',
'\u217F': '\u006D',
'\u222C': '\u222B\u222B',
'\u222D': '\u222B\u222B\u222B',
'\u222F': '\u222E\u222E',
'\u2230': '\u222E\u222E\u222E',
'\u2474': '\u0028\u0031\u0029',
'\u2475': '\u0028\u0032\u0029',
'\u2476': '\u0028\u0033\u0029',
'\u2477': '\u0028\u0034\u0029',
'\u2478': '\u0028\u0035\u0029',
'\u2479': '\u0028\u0036\u0029',
'\u247A': '\u0028\u0037\u0029',
'\u247B': '\u0028\u0038\u0029',
'\u247C': '\u0028\u0039\u0029',
'\u247D': '\u0028\u0031\u0030\u0029',
'\u247E': '\u0028\u0031\u0031\u0029',
'\u247F': '\u0028\u0031\u0032\u0029',
'\u2480': '\u0028\u0031\u0033\u0029',
'\u2481': '\u0028\u0031\u0034\u0029',
'\u2482': '\u0028\u0031\u0035\u0029',
'\u2483': '\u0028\u0031\u0036\u0029',
'\u2484': '\u0028\u0031\u0037\u0029',
'\u2485': '\u0028\u0031\u0038\u0029',
'\u2486': '\u0028\u0031\u0039\u0029',
'\u2487': '\u0028\u0032\u0030\u0029',
'\u2488': '\u0031\u002E',
'\u2489': '\u0032\u002E',
'\u248A': '\u0033\u002E',
'\u248B': '\u0034\u002E',
'\u248C': '\u0035\u002E',
'\u248D': '\u0036\u002E',
'\u248E': '\u0037\u002E',
'\u248F': '\u0038\u002E',
'\u2490': '\u0039\u002E',
'\u2491': '\u0031\u0030\u002E',
'\u2492': '\u0031\u0031\u002E',
'\u2493': '\u0031\u0032\u002E',
'\u2494': '\u0031\u0033\u002E',
'\u2495': '\u0031\u0034\u002E',
'\u2496': '\u0031\u0035\u002E',
'\u2497': '\u0031\u0036\u002E',
'\u2498': '\u0031\u0037\u002E',
'\u2499': '\u0031\u0038\u002E',
'\u249A': '\u0031\u0039\u002E',
'\u249B': '\u0032\u0030\u002E',
'\u249C': '\u0028\u0061\u0029',
'\u249D': '\u0028\u0062\u0029',
'\u249E': '\u0028\u0063\u0029',
'\u249F': '\u0028\u0064\u0029',
'\u24A0': '\u0028\u0065\u0029',
'\u24A1': '\u0028\u0066\u0029',
'\u24A2': '\u0028\u0067\u0029',
'\u24A3': '\u0028\u0068\u0029',
'\u24A4': '\u0028\u0069\u0029',
'\u24A5': '\u0028\u006A\u0029',
'\u24A6': '\u0028\u006B\u0029',
'\u24A7': '\u0028\u006C\u0029',
'\u24A8': '\u0028\u006D\u0029',
'\u24A9': '\u0028\u006E\u0029',
'\u24AA': '\u0028\u006F\u0029',
'\u24AB': '\u0028\u0070\u0029',
'\u24AC': '\u0028\u0071\u0029',
'\u24AD': '\u0028\u0072\u0029',
'\u24AE': '\u0028\u0073\u0029',
'\u24AF': '\u0028\u0074\u0029',
'\u24B0': '\u0028\u0075\u0029',
'\u24B1': '\u0028\u0076\u0029',
'\u24B2': '\u0028\u0077\u0029',
'\u24B3': '\u0028\u0078\u0029',
'\u24B4': '\u0028\u0079\u0029',
'\u24B5': '\u0028\u007A\u0029',
'\u2A0C': '\u222B\u222B\u222B\u222B',
'\u2A74': '\u003A\u003A\u003D',
'\u2A75': '\u003D\u003D',
'\u2A76': '\u003D\u003D\u003D',
'\u2E9F': '\u6BCD',
'\u2EF3': '\u9F9F',
'\u2F00': '\u4E00',
'\u2F01': '\u4E28',
'\u2F02': '\u4E36',
'\u2F03': '\u4E3F',
'\u2F04': '\u4E59',
'\u2F05': '\u4E85',
'\u2F06': '\u4E8C',
'\u2F07': '\u4EA0',
'\u2F08': '\u4EBA',
'\u2F09': '\u513F',
'\u2F0A': '\u5165',
'\u2F0B': '\u516B',
'\u2F0C': '\u5182',
'\u2F0D': '\u5196',
'\u2F0E': '\u51AB',
'\u2F0F': '\u51E0',
'\u2F10': '\u51F5',
'\u2F11': '\u5200',
'\u2F12': '\u529B',
'\u2F13': '\u52F9',
'\u2F14': '\u5315',
'\u2F15': '\u531A',
'\u2F16': '\u5338',
'\u2F17': '\u5341',
'\u2F18': '\u535C',
'\u2F19': '\u5369',
'\u2F1A': '\u5382',
'\u2F1B': '\u53B6',
'\u2F1C': '\u53C8',
'\u2F1D': '\u53E3',
'\u2F1E': '\u56D7',
'\u2F1F': '\u571F',
'\u2F20': '\u58EB',
'\u2F21': '\u5902',
'\u2F22': '\u590A',
'\u2F23': '\u5915',
'\u2F24': '\u5927',
'\u2F25': '\u5973',
'\u2F26': '\u5B50',
'\u2F27': '\u5B80',
'\u2F28': '\u5BF8',
'\u2F29': '\u5C0F',
'\u2F2A': '\u5C22',
'\u2F2B': '\u5C38',
'\u2F2C': '\u5C6E',
'\u2F2D': '\u5C71',
'\u2F2E': '\u5DDB',
'\u2F2F': '\u5DE5',
'\u2F30': '\u5DF1',
'\u2F31': '\u5DFE',
'\u2F32': '\u5E72',
'\u2F33': '\u5E7A',
'\u2F34': '\u5E7F',
'\u2F35': '\u5EF4',
'\u2F36': '\u5EFE',
'\u2F37': '\u5F0B',
'\u2F38': '\u5F13',
'\u2F39': '\u5F50',
'\u2F3A': '\u5F61',
'\u2F3B': '\u5F73',
'\u2F3C': '\u5FC3',
'\u2F3D': '\u6208',
'\u2F3E': '\u6236',
'\u2F3F': '\u624B',
'\u2F40': '\u652F',
'\u2F41': '\u6534',
'\u2F42': '\u6587',
'\u2F43': '\u6597',
'\u2F44': '\u65A4',
'\u2F45': '\u65B9',
'\u2F46': '\u65E0',
'\u2F47': '\u65E5',
'\u2F48': '\u66F0',
'\u2F49': '\u6708',
'\u2F4A': '\u6728',
'\u2F4B': '\u6B20',
'\u2F4C': '\u6B62',
'\u2F4D': '\u6B79',
'\u2F4E': '\u6BB3',
'\u2F4F': '\u6BCB',
'\u2F50': '\u6BD4',
'\u2F51': '\u6BDB',
'\u2F52': '\u6C0F',
'\u2F53': '\u6C14',
'\u2F54': '\u6C34',
'\u2F55': '\u706B',
'\u2F56': '\u722A',
'\u2F57': '\u7236',
'\u2F58': '\u723B',
'\u2F59': '\u723F',
'\u2F5A': '\u7247',
'\u2F5B': '\u7259',
'\u2F5C': '\u725B',
'\u2F5D': '\u72AC',
'\u2F5E': '\u7384',
'\u2F5F': '\u7389',
'\u2F60': '\u74DC',
'\u2F61': '\u74E6',
'\u2F62': '\u7518',
'\u2F63': '\u751F',
'\u2F64': '\u7528',
'\u2F65': '\u7530',
'\u2F66': '\u758B',
'\u2F67': '\u7592',
'\u2F68': '\u7676',
'\u2F69': '\u767D',
'\u2F6A': '\u76AE',
'\u2F6B': '\u76BF',
'\u2F6C': '\u76EE',
'\u2F6D': '\u77DB',
'\u2F6E': '\u77E2',
'\u2F6F': '\u77F3',
'\u2F70': '\u793A',
'\u2F71': '\u79B8',
'\u2F72': '\u79BE',
'\u2F73': '\u7A74',
'\u2F74': '\u7ACB',
'\u2F75': '\u7AF9',
'\u2F76': '\u7C73',
'\u2F77': '\u7CF8',
'\u2F78': '\u7F36',