Only normalize the text-content once, in PDFFindController, and not on every new search operation

Currently the text-content is normalized every time that a new search operation is started, which seems completely useless considering that the "raw" text-content is never used for anything.
For a short document, such as e.g. the `tracemonkey` file, this repeated normalization won't matter much, but for documents with a couple of thousand pages it seems completely unnecessary (and wasteful) to keep repeating the normalization whenever e.g. a new search operation starts.
This commit is contained in:
Jonas Jenwald 2018-10-26 20:23:32 +02:00
parent 12d8b52c49
commit 84ae4f9a5e

View File

@ -160,7 +160,7 @@ class PDFFindController {
matchIdx: null,
};
this._extractTextPromises = [];
this._pageContents = []; // Stores the text for each page.
this._pageContents = []; // Stores the normalized text for each page.
this._matchesCountTotal = 0;
this._pagesToSearch = null;
this._pendingFindMatches = Object.create(null);
@ -306,7 +306,7 @@ class PDFFindController {
}
_calculateMatch(pageIndex) {
let pageContent = normalize(this._pageContents[pageIndex]);
let pageContent = this._pageContents[pageIndex];
let query = normalize(this._state.query);
const { caseSensitive, entireWord, phraseSearch, } = this._state;
@ -364,8 +364,8 @@ class PDFFindController {
strBuf.push(textItems[j].str);
}
// Store the page content (text items) as one string.
this._pageContents[i] = strBuf.join('');
// Store the normalized page content (text items) as one string.
this._pageContents[i] = normalize(strBuf.join(''));
extractTextCapability.resolve(i);
}, (reason) => {
console.error(`Unable to get text content for page ${i + 1}`, reason);