Merge pull request #10185 from Snuffleupagus/find-less-normalization
Reduce the number of redundant text normalization operations in `PDFFindController`
This commit is contained in:
commit
a77ac5fa23
@ -40,6 +40,18 @@ const CHARACTERS_TO_NORMALIZE = {
|
|||||||
'\u00BE': '3/4', // Vulgar fraction three quarters
|
'\u00BE': '3/4', // Vulgar fraction three quarters
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let normalizationRegex = null;
|
||||||
|
function normalize(text) {
|
||||||
|
if (!normalizationRegex) {
|
||||||
|
// Compile the regular expression for text normalization once.
|
||||||
|
const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
|
||||||
|
normalizationRegex = new RegExp(`[${replace}]`, 'g');
|
||||||
|
}
|
||||||
|
return text.replace(normalizationRegex, function(ch) {
|
||||||
|
return CHARACTERS_TO_NORMALIZE[ch];
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @typedef {Object} PDFFindControllerOptions
|
* @typedef {Object} PDFFindControllerOptions
|
||||||
* @property {IPDFLinkService} linkService - The navigation/linking service.
|
* @property {IPDFLinkService} linkService - The navigation/linking service.
|
||||||
@ -59,10 +71,6 @@ class PDFFindController {
|
|||||||
|
|
||||||
this._reset();
|
this._reset();
|
||||||
eventBus.on('findbarclose', this._onFindBarClose.bind(this));
|
eventBus.on('findbarclose', this._onFindBarClose.bind(this));
|
||||||
|
|
||||||
// Compile the regular expression for text normalization once.
|
|
||||||
const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
|
|
||||||
this._normalizationRegex = new RegExp(`[${replace}]`, 'g');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
get highlightMatches() {
|
get highlightMatches() {
|
||||||
@ -152,7 +160,7 @@ class PDFFindController {
|
|||||||
matchIdx: null,
|
matchIdx: null,
|
||||||
};
|
};
|
||||||
this._extractTextPromises = [];
|
this._extractTextPromises = [];
|
||||||
this._pageContents = []; // Stores the text for each page.
|
this._pageContents = []; // Stores the normalized text for each page.
|
||||||
this._matchesCountTotal = 0;
|
this._matchesCountTotal = 0;
|
||||||
this._pagesToSearch = null;
|
this._pagesToSearch = null;
|
||||||
this._pendingFindMatches = Object.create(null);
|
this._pendingFindMatches = Object.create(null);
|
||||||
@ -164,10 +172,15 @@ class PDFFindController {
|
|||||||
this._firstPageCapability = createPromiseCapability();
|
this._firstPageCapability = createPromiseCapability();
|
||||||
}
|
}
|
||||||
|
|
||||||
_normalize(text) {
|
/**
|
||||||
return text.replace(this._normalizationRegex, function(ch) {
|
* @return {string} The (current) normalized search query.
|
||||||
return CHARACTERS_TO_NORMALIZE[ch];
|
*/
|
||||||
});
|
get _query() {
|
||||||
|
if (this._state.query !== this._rawQuery) {
|
||||||
|
this._rawQuery = this._state.query;
|
||||||
|
this._normalizedQuery = normalize(this._state.query);
|
||||||
|
}
|
||||||
|
return this._normalizedQuery;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -304,8 +317,8 @@ class PDFFindController {
|
|||||||
}
|
}
|
||||||
|
|
||||||
_calculateMatch(pageIndex) {
|
_calculateMatch(pageIndex) {
|
||||||
let pageContent = this._normalize(this._pageContents[pageIndex]);
|
let pageContent = this._pageContents[pageIndex];
|
||||||
let query = this._normalize(this._state.query);
|
let query = this._query;
|
||||||
const { caseSensitive, entireWord, phraseSearch, } = this._state;
|
const { caseSensitive, entireWord, phraseSearch, } = this._state;
|
||||||
|
|
||||||
if (query.length === 0) {
|
if (query.length === 0) {
|
||||||
@ -362,8 +375,8 @@ class PDFFindController {
|
|||||||
strBuf.push(textItems[j].str);
|
strBuf.push(textItems[j].str);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store the page content (text items) as one string.
|
// Store the normalized page content (text items) as one string.
|
||||||
this._pageContents[i] = strBuf.join('');
|
this._pageContents[i] = normalize(strBuf.join(''));
|
||||||
extractTextCapability.resolve(i);
|
extractTextCapability.resolve(i);
|
||||||
}, (reason) => {
|
}, (reason) => {
|
||||||
console.error(`Unable to get text content for page ${i + 1}`, reason);
|
console.error(`Unable to get text content for page ${i + 1}`, reason);
|
||||||
@ -423,7 +436,7 @@ class PDFFindController {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If there's no query there's no point in searching.
|
// If there's no query there's no point in searching.
|
||||||
if (this._state.query === '') {
|
if (this._query === '') {
|
||||||
this._updateUIState(FindState.FOUND);
|
this._updateUIState(FindState.FOUND);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user