Only normalize the text-content once, in PDFFindController
, and not on every new search operation
Currently the text-content is normalized every time that a new search operation is started, which seems completely useless considering that the "raw" text-content is never used for anything. For a short document, such as e.g. the `tracemonkey` file, this repeated normalization won't matter much, but for documents with a couple of thousand pages it seems completely unnecessary (and wasteful) to keep repeating the normalization whenever e.g. a new search operation starts.
This commit is contained in:
parent
12d8b52c49
commit
84ae4f9a5e
@ -160,7 +160,7 @@ class PDFFindController {
|
|||||||
matchIdx: null,
|
matchIdx: null,
|
||||||
};
|
};
|
||||||
this._extractTextPromises = [];
|
this._extractTextPromises = [];
|
||||||
this._pageContents = []; // Stores the text for each page.
|
this._pageContents = []; // Stores the normalized text for each page.
|
||||||
this._matchesCountTotal = 0;
|
this._matchesCountTotal = 0;
|
||||||
this._pagesToSearch = null;
|
this._pagesToSearch = null;
|
||||||
this._pendingFindMatches = Object.create(null);
|
this._pendingFindMatches = Object.create(null);
|
||||||
@ -306,7 +306,7 @@ class PDFFindController {
|
|||||||
}
|
}
|
||||||
|
|
||||||
_calculateMatch(pageIndex) {
|
_calculateMatch(pageIndex) {
|
||||||
let pageContent = normalize(this._pageContents[pageIndex]);
|
let pageContent = this._pageContents[pageIndex];
|
||||||
let query = normalize(this._state.query);
|
let query = normalize(this._state.query);
|
||||||
const { caseSensitive, entireWord, phraseSearch, } = this._state;
|
const { caseSensitive, entireWord, phraseSearch, } = this._state;
|
||||||
|
|
||||||
@ -364,8 +364,8 @@ class PDFFindController {
|
|||||||
strBuf.push(textItems[j].str);
|
strBuf.push(textItems[j].str);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store the page content (text items) as one string.
|
// Store the normalized page content (text items) as one string.
|
||||||
this._pageContents[i] = strBuf.join('');
|
this._pageContents[i] = normalize(strBuf.join(''));
|
||||||
extractTextCapability.resolve(i);
|
extractTextCapability.resolve(i);
|
||||||
}, (reason) => {
|
}, (reason) => {
|
||||||
console.error(`Unable to get text content for page ${i + 1}`, reason);
|
console.error(`Unable to get text content for page ${i + 1}`, reason);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user