Merge pull request #16247 from Snuffleupagus/issue-7442

[api-minor] Add support, in `PDFFindController`, for mixing phrase/word searches (issue 7442)
This commit is contained in:
Tim van der Meij 2023-04-16 14:23:41 +02:00 committed by GitHub
commit f46ed43b81
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 95 additions and 48 deletions

View File

@ -95,7 +95,6 @@ function testSearch({
query: null, query: null,
caseSensitive: false, caseSensitive: false,
entireWord: false, entireWord: false,
phraseSearch: true,
findPrevious: false, findPrevious: false,
matchDiacritics: false, matchDiacritics: false,
}, },
@ -182,7 +181,6 @@ function testEmptySearch({ eventBus, pdfFindController, state }) {
query: null, query: null,
caseSensitive: false, caseSensitive: false,
entireWord: false, entireWord: false,
phraseSearch: true,
findPrevious: false, findPrevious: false,
matchDiacritics: false, matchDiacritics: false,
}, },
@ -321,8 +319,7 @@ describe("pdf_find_controller", function () {
eventBus, eventBus,
pdfFindController, pdfFindController,
state: { state: {
query: "alternate solution", query: ["alternate", "solution"],
phraseSearch: false,
}, },
matchesPerPage: [0, 0, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0], matchesPerPage: [0, 0, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0],
selectedMatch: { selectedMatch: {
@ -332,6 +329,25 @@ describe("pdf_find_controller", function () {
}); });
}); });
it("performs a multiple term (phrase) search", async function () {
// Page 9 contains 'alternate solution' and pages 6 and 9 contain
// 'solution'. Both should be found for multiple term (phrase) search.
const { eventBus, pdfFindController } = await initPdfFindController();
await testSearch({
eventBus,
pdfFindController,
state: {
query: ["alternate solution", "solution"],
},
matchesPerPage: [0, 0, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 0],
selectedMatch: {
pageIndex: 5,
matchIndex: 0,
},
});
});
it("performs a normal search, where the text is normalized", async function () { it("performs a normal search, where the text is normalized", async function () {
const { eventBus, pdfFindController } = await initPdfFindController( const { eventBus, pdfFindController } = await initPdfFindController(
"fraction-highlight.pdf" "fraction-highlight.pdf"

View File

@ -2575,7 +2575,6 @@ function webViewerFindFromUrlHash(evt) {
source: evt.source, source: evt.source,
type: "", type: "",
query: evt.query, query: evt.query,
phraseSearch: evt.phraseSearch,
caseSensitive: false, caseSensitive: false,
entireWord: false, entireWord: false,
highlightAll: true, highlightAll: true,

View File

@ -222,7 +222,6 @@ class MozL10n {
source: window, source: window,
type: type.substring(findLen), type: type.substring(findLen),
query: detail.query, query: detail.query,
phraseSearch: true,
caseSensitive: !!detail.caseSensitive, caseSensitive: !!detail.caseSensitive,
entireWord: !!detail.entireWord, entireWord: !!detail.entireWord,
highlightAll: !!detail.highlightAll, highlightAll: !!detail.highlightAll,

View File

@ -99,7 +99,6 @@ class PDFFindBar {
source: this, source: this,
type, type,
query: this.findField.value, query: this.findField.value,
phraseSearch: true,
caseSensitive: this.caseSensitive.checked, caseSensitive: this.caseSensitive.checked,
entireWord: this.entireWord.checked, entireWord: this.entireWord.checked,
highlightAll: this.highlightAll.checked, highlightAll: this.highlightAll.checked,

View File

@ -387,6 +387,8 @@ function getOriginalIndex(diffs, pos, len) {
* Provides search functionality to find a given string in a PDF document. * Provides search functionality to find a given string in a PDF document.
*/ */
class PDFFindController { class PDFFindController {
#state = null;
#updateMatchesCountOnProgress = true; #updateMatchesCountOnProgress = true;
#visitedPagesCount = 0; #visitedPagesCount = 0;
@ -421,7 +423,7 @@ class PDFFindController {
} }
get state() { get state() {
return this._state; return this.#state;
} }
/** /**
@ -445,13 +447,25 @@ class PDFFindController {
if (!state) { if (!state) {
return; return;
} }
if (
(typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) &&
state.phraseSearch === false
) {
console.error(
"The `phraseSearch`-parameter was removed, please provide " +
"an Array of strings in the `query`-parameter instead."
);
if (typeof state.query === "string") {
state.query = state.query.match(/\S+/g);
}
}
const pdfDocument = this._pdfDocument; const pdfDocument = this._pdfDocument;
const { type } = state; const { type } = state;
if (this._state === null || this.#shouldDirtyMatch(state)) { if (this.#state === null || this.#shouldDirtyMatch(state)) {
this._dirtyMatch = true; this._dirtyMatch = true;
} }
this._state = state; this.#state = state;
if (type !== "highlightallchange") { if (type !== "highlightallchange") {
this.#updateUIState(FindState.PENDING); this.#updateUIState(FindState.PENDING);
} }
@ -490,7 +504,7 @@ class PDFFindController {
// When the findbar was previously closed, and `highlightAll` is set, // When the findbar was previously closed, and `highlightAll` is set,
// ensure that the matches on all active pages are highlighted again. // ensure that the matches on all active pages are highlighted again.
if (findbarClosed && this._state.highlightAll) { if (findbarClosed && this.#state.highlightAll) {
this.#updateAllPages(); this.#updateAllPages();
} }
} else if (type === "highlightallchange") { } else if (type === "highlightallchange") {
@ -537,7 +551,7 @@ class PDFFindController {
this._pageMatches = []; this._pageMatches = [];
this._pageMatchesLength = []; this._pageMatchesLength = [];
this.#visitedPagesCount = 0; this.#visitedPagesCount = 0;
this._state = null; this.#state = null;
// Currently selected match. // Currently selected match.
this._selected = { this._selected = {
pageIdx: -1, pageIdx: -1,
@ -565,22 +579,44 @@ class PDFFindController {
} }
/** /**
* @type {string} The (current) normalized search query. * @type {string|Array} The (current) normalized search query.
*/ */
get #query() { get #query() {
if (this._state.query !== this._rawQuery) { const { query } = this.#state;
this._rawQuery = this._state.query; if (typeof query === "string") {
[this._normalizedQuery] = normalize(this._state.query); if (query !== this._rawQuery) {
this._rawQuery = query;
[this._normalizedQuery] = normalize(query);
}
return this._normalizedQuery;
} }
return this._normalizedQuery; // We don't bother caching the normalized search query in the Array-case,
// since this code-path is *essentially* unused in the default viewer.
return (query || []).filter(q => !!q).map(q => normalize(q)[0]);
} }
#shouldDirtyMatch(state) { #shouldDirtyMatch(state) {
// When the search query changes, regardless of the actual search command // When the search query changes, regardless of the actual search command
// used, always re-calculate matches to avoid errors (fixes bug 1030622). // used, always re-calculate matches to avoid errors (fixes bug 1030622).
if (state.query !== this._state.query) { const newQuery = state.query,
prevQuery = this.#state.query;
const newType = typeof newQuery,
prevType = typeof prevQuery;
if (newType !== prevType) {
return true; return true;
} }
if (newType === "string") {
if (newQuery !== prevQuery) {
return true;
}
} else {
// Array
if (JSON.stringify(newQuery) !== JSON.stringify(prevQuery)) {
return true;
}
}
switch (state.type) { switch (state.type) {
case "again": case "again":
const pageNumber = this._selected.pageIdx + 1; const pageNumber = this._selected.pageIdx + 1;
@ -670,7 +706,7 @@ class PDFFindController {
} }
#convertToRegExpString(query, hasDiacritics) { #convertToRegExpString(query, hasDiacritics) {
const { matchDiacritics } = this._state; const { matchDiacritics } = this.#state;
let isUnicode = false; let isUnicode = false;
query = query.replaceAll( query = query.replaceAll(
SPECIAL_CHARS_REG_EXP, SPECIAL_CHARS_REG_EXP,
@ -741,36 +777,31 @@ class PDFFindController {
#calculateMatch(pageIndex) { #calculateMatch(pageIndex) {
let query = this.#query; let query = this.#query;
if (!query) { if (query.length === 0) {
// Do nothing: the matches should be wiped out already. return; // Do nothing: the matches should be wiped out already.
return;
} }
const { caseSensitive, entireWord } = this.#state;
const { caseSensitive, entireWord, phraseSearch } = this._state;
const pageContent = this._pageContents[pageIndex]; const pageContent = this._pageContents[pageIndex];
const hasDiacritics = this._hasDiacritics[pageIndex]; const hasDiacritics = this._hasDiacritics[pageIndex];
let isUnicode = false; let isUnicode = false;
if (phraseSearch) { if (typeof query === "string") {
[isUnicode, query] = this.#convertToRegExpString(query, hasDiacritics); [isUnicode, query] = this.#convertToRegExpString(query, hasDiacritics);
} else { } else {
// Words are sorted in reverse order to be sure that "foobar" is matched // Words are sorted in reverse order to be sure that "foobar" is matched
// before "foo" in case the query is "foobar foo". // before "foo" in case the query is "foobar foo".
const match = query.match(/\S+/g); query = query
if (match) { .sort()
query = match .reverse()
.sort() .map(q => {
.reverse() const [isUnicodePart, queryPart] = this.#convertToRegExpString(
.map(q => { q,
const [isUnicodePart, queryPart] = this.#convertToRegExpString( hasDiacritics
q, );
hasDiacritics isUnicode ||= isUnicodePart;
); return `(${queryPart})`;
isUnicode ||= isUnicodePart; })
return `(${queryPart})`; .join("|");
})
.join("|");
}
} }
const flags = `g${isUnicode ? "u" : ""}${caseSensitive ? "" : "i"}`; const flags = `g${isUnicode ? "u" : ""}${caseSensitive ? "" : "i"}`;
@ -780,7 +811,7 @@ class PDFFindController {
// When `highlightAll` is set, ensure that the matches on previously // When `highlightAll` is set, ensure that the matches on previously
// rendered (and still active) pages are correctly highlighted. // rendered (and still active) pages are correctly highlighted.
if (this._state.highlightAll) { if (this.#state.highlightAll) {
this.#updatePage(pageIndex); this.#updatePage(pageIndex);
} }
if (this._resumePageIdx === pageIndex) { if (this._resumePageIdx === pageIndex) {
@ -876,7 +907,7 @@ class PDFFindController {
} }
#nextMatch() { #nextMatch() {
const previous = this._state.findPrevious; const previous = this.#state.findPrevious;
const currentPageIndex = this._linkService.page - 1; const currentPageIndex = this._linkService.page - 1;
const numPages = this._linkService.pagesCount; const numPages = this._linkService.pagesCount;
@ -911,7 +942,8 @@ class PDFFindController {
} }
// If there's no query there's no point in searching. // If there's no query there's no point in searching.
if (!this.#query) { const query = this.#query;
if (query.length === 0) {
this.#updateUIState(FindState.FOUND); this.#updateUIState(FindState.FOUND);
return; return;
} }
@ -948,7 +980,7 @@ class PDFFindController {
#matchesReady(matches) { #matchesReady(matches) {
const offset = this._offset; const offset = this._offset;
const numMatches = matches.length; const numMatches = matches.length;
const previous = this._state.findPrevious; const previous = this.#state.findPrevious;
if (numMatches) { if (numMatches) {
// There were matches for the page, so initialize `matchIdx`. // There were matches for the page, so initialize `matchIdx`.
@ -1021,7 +1053,7 @@ class PDFFindController {
} }
} }
this.#updateUIState(state, this._state.findPrevious); this.#updateUIState(state, this.#state.findPrevious);
if (this._selected.pageIdx !== -1) { if (this._selected.pageIdx !== -1) {
// Ensure that the match will be scrolled into view. // Ensure that the match will be scrolled into view.
this._scrollMatches = true; this._scrollMatches = true;
@ -1106,7 +1138,7 @@ class PDFFindController {
state, state,
previous, previous,
matchesCount: this.#requestMatchesCount(), matchesCount: this.#requestMatchesCount(),
rawQuery: this._state?.query ?? null, rawQuery: this.#state?.query ?? null,
}); });
} }
} }

View File

@ -350,10 +350,12 @@ class PDFLinkService {
if (hash.includes("=")) { if (hash.includes("=")) {
const params = parseQueryString(hash); const params = parseQueryString(hash);
if (params.has("search")) { if (params.has("search")) {
const query = params.get("search").replaceAll('"', ""),
phrase = params.get("phrase") === "true";
this.eventBus.dispatch("findfromurlhash", { this.eventBus.dispatch("findfromurlhash", {
source: this, source: this,
query: params.get("search").replaceAll('"', ""), query: phrase ? query : query.match(/\S+/g),
phraseSearch: params.get("phrase") === "true",
}); });
} }
// borrowing syntax from "Parameters for Opening PDF Files" // borrowing syntax from "Parameters for Opening PDF Files"