Only normalize the text-content once, in PDFFindController, and not on every new search operation

Currently the text-content is normalized every time that a new search operation is started, which seems completely useless considering that the "raw" text-content is never used for anything. For a short document, such as e.g. the `tracemonkey` file, this repeated normalization won't matter much, but for documents with a couple of thousand pages it seems completely unnecessary (and wasteful) to keep repeating the normalization whenever e.g. a new search operation starts.
2018-10-26 20:23:32 +02:00 · 2018-10-26 20:23:32 +02:00 · 84ae4f9a5e
commit 84ae4f9a5e
parent 12d8b52c49
1 changed files with 4 additions and 4 deletions
--- a/web/pdf_find_controller.js
+++ b/web/pdf_find_controller.js
@ -160,7 +160,7 @@ class PDFFindController {
      matchIdx: null,
    };
    this._extractTextPromises = [];
-    this._pageContents = []; // Stores the text for each page.
+    this._pageContents = []; // Stores the normalized text for each page.
    this._matchesCountTotal = 0;
    this._pagesToSearch = null;
    this._pendingFindMatches = Object.create(null);
@ -306,7 +306,7 @@ class PDFFindController {
  }

  _calculateMatch(pageIndex) {
-    let pageContent = normalize(this._pageContents[pageIndex]);
+    let pageContent = this._pageContents[pageIndex];
    let query = normalize(this._state.query);
    const { caseSensitive, entireWord, phraseSearch, } = this._state;

@ -364,8 +364,8 @@ class PDFFindController {
            strBuf.push(textItems[j].str);
          }

-          // Store the page content (text items) as one string.
-          this._pageContents[i] = strBuf.join('');
+          // Store the normalized page content (text items) as one string.
+          this._pageContents[i] = normalize(strBuf.join(''));
          extractTextCapability.resolve(i);
        }, (reason) => {
          console.error(`Unable to get text content for page ${i + 1}`, reason);