Implement text normalization for page content and queries
This commit is contained in:
parent
ec1b58a30a
commit
fbfb9458d6
@ -43,6 +43,19 @@ var PDFFindController = {
|
|||||||
findTimeout: null,
|
findTimeout: null,
|
||||||
pdfPageSource: null,
|
pdfPageSource: null,
|
||||||
integratedFind: false,
|
integratedFind: false,
|
||||||
|
charactersToNormalize: {
|
||||||
|
'\u2018': '\'', // Left single quotation mark
|
||||||
|
'\u2019': '\'', // Right single quotation mark
|
||||||
|
'\u201A': '\'', // Single low-9 quotation mark
|
||||||
|
'\u201B': '\'', // Single high-reversed-9 quotation mark
|
||||||
|
'\u201C': '"', // Left double quotation mark
|
||||||
|
'\u201D': '"', // Right double quotation mark
|
||||||
|
'\u201E': '"', // Double low-9 quotation mark
|
||||||
|
'\u201F': '"', // Double high-reversed-9 quotation mark
|
||||||
|
'\u00BC': '1/4', // Vulgar fraction one quarter
|
||||||
|
'\u00BD': '1/2', // Vulgar fraction one half
|
||||||
|
'\u00BE': '3/4' // Vulgar fraction three quarters
|
||||||
|
},
|
||||||
|
|
||||||
initialize: function(options) {
|
initialize: function(options) {
|
||||||
if (typeof PDFFindBar === 'undefined' || PDFFindBar === null) {
|
if (typeof PDFFindBar === 'undefined' || PDFFindBar === null) {
|
||||||
@ -53,6 +66,10 @@ var PDFFindController = {
|
|||||||
this.pdfPageSource = options.pdfPageSource;
|
this.pdfPageSource = options.pdfPageSource;
|
||||||
this.integratedFind = options.integratedFind;
|
this.integratedFind = options.integratedFind;
|
||||||
|
|
||||||
|
// Compile the regular expression for text normalization once
|
||||||
|
var replace = Object.keys(this.charactersToNormalize).join('');
|
||||||
|
this.normalizationRegex = new RegExp('[' + replace + ']', 'g');
|
||||||
|
|
||||||
var events = [
|
var events = [
|
||||||
'find',
|
'find',
|
||||||
'findagain',
|
'findagain',
|
||||||
@ -76,9 +93,15 @@ var PDFFindController = {
|
|||||||
this.active = false;
|
this.active = false;
|
||||||
},
|
},
|
||||||
|
|
||||||
|
normalize: function pdfFindControllerNormalize(text) {
|
||||||
|
return text.replace(this.normalizationRegex, function (ch) {
|
||||||
|
return PDFFindController.charactersToNormalize[ch];
|
||||||
|
});
|
||||||
|
},
|
||||||
|
|
||||||
calcFindMatch: function(pageIndex) {
|
calcFindMatch: function(pageIndex) {
|
||||||
var pageContent = this.pageContents[pageIndex];
|
var pageContent = this.normalize(this.pageContents[pageIndex]);
|
||||||
var query = this.state.query;
|
var query = this.normalize(this.state.query);
|
||||||
var caseSensitive = this.state.caseSensitive;
|
var caseSensitive = this.state.caseSensitive;
|
||||||
var queryLen = query.length;
|
var queryLen = query.length;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user