Merge pull request #10028 from Snuffleupagus/entireWord

Add initial support for "Whole words" searching in the viewer
This commit is contained in:
Tim van der Meij 2018-09-10 13:01:24 +02:00 committed by GitHub
commit bc5111d152
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 226 additions and 12 deletions

View File

@ -165,6 +165,7 @@ find_next.title=Find the next occurrence of the phrase
find_next_label=Next find_next_label=Next
find_highlight=Highlight all find_highlight=Highlight all
find_match_case_label=Match case find_match_case_label=Match case
find_entire_word_label=Whole words
find_reached_top=Reached top of document, continued from bottom find_reached_top=Reached top of document, continued from bottom
find_reached_bottom=Reached end of document, continued from top find_reached_bottom=Reached end of document, continued from top
# LOCALIZATION NOTE (find_matches_count): "{{current}}" and "{{total}}" will be # LOCALIZATION NOTE (find_matches_count): "{{current}}" and "{{total}}" will be

View File

@ -165,6 +165,7 @@ find_next.title=De volgende overeenkomst van de tekst zoeken
find_next_label=Volgende find_next_label=Volgende
find_highlight=Alles markeren find_highlight=Alles markeren
find_match_case_label=Hoofdlettergevoelig find_match_case_label=Hoofdlettergevoelig
find_entire_word_label=Hele woorden
find_reached_top=Bovenkant van document bereikt, doorgegaan vanaf onderkant find_reached_top=Bovenkant van document bereikt, doorgegaan vanaf onderkant
find_reached_bottom=Onderkant van document bereikt, doorgegaan vanaf bovenkant find_reached_bottom=Onderkant van document bereikt, doorgegaan vanaf bovenkant
# LOCALIZATION NOTE (find_matches_count): "{{current}}" and "{{total}}" will be # LOCALIZATION NOTE (find_matches_count): "{{current}}" and "{{total}}" will be

View File

@ -165,6 +165,7 @@ find_next.title=Hitta nästa förekomst av frasen
find_next_label=Nästa find_next_label=Nästa
find_highlight=Markera alla find_highlight=Markera alla
find_match_case_label=Matcha versal/gemen find_match_case_label=Matcha versal/gemen
find_entire_word_label=Hela ord
find_reached_top=Nådde början av dokumentet, började från slutet find_reached_top=Nådde början av dokumentet, började från slutet
find_reached_bottom=Nådde slutet på dokumentet, började från början find_reached_bottom=Nådde slutet på dokumentet, började från början
# LOCALIZATION NOTE (find_matches_count): "{{current}}" and "{{total}}" will be # LOCALIZATION NOTE (find_matches_count): "{{current}}" and "{{total}}" will be

View File

@ -25,6 +25,7 @@
"network_utils_spec.js", "network_utils_spec.js",
"node_stream_spec.js", "node_stream_spec.js",
"parser_spec.js", "parser_spec.js",
"pdf_find_utils.js",
"pdf_history.js", "pdf_history.js",
"primitives_spec.js", "primitives_spec.js",
"stream_spec.js", "stream_spec.js",

View File

@ -67,6 +67,7 @@ function initializePDFJS(callback) {
'pdfjs-test/unit/network_spec', 'pdfjs-test/unit/network_spec',
'pdfjs-test/unit/network_utils_spec', 'pdfjs-test/unit/network_utils_spec',
'pdfjs-test/unit/parser_spec', 'pdfjs-test/unit/parser_spec',
'pdfjs-test/unit/pdf_find_utils_spec',
'pdfjs-test/unit/pdf_history_spec', 'pdfjs-test/unit/pdf_history_spec',
'pdfjs-test/unit/primitives_spec', 'pdfjs-test/unit/primitives_spec',
'pdfjs-test/unit/stream_spec', 'pdfjs-test/unit/stream_spec',

View File

@ -0,0 +1,56 @@
/* Copyright 2018 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { CharacterType, getCharacterType } from '../../web/pdf_find_utils';
describe('pdf_find_utils', function() {
describe('getCharacterType', function() {
it('gets expected character types', function() {
const characters = {
'A': CharacterType.ALPHA_LETTER,
'a': CharacterType.ALPHA_LETTER,
'0': CharacterType.ALPHA_LETTER,
'5': CharacterType.ALPHA_LETTER,
'\xC4': CharacterType.ALPHA_LETTER, // 'Ä'
'\xE4': CharacterType.ALPHA_LETTER, // 'ä'
'_': CharacterType.ALPHA_LETTER,
' ': CharacterType.SPACE,
'\t': CharacterType.SPACE,
'\r': CharacterType.SPACE,
'\n': CharacterType.SPACE,
'\xA0': CharacterType.SPACE,
'-': CharacterType.PUNCT,
',': CharacterType.PUNCT,
'.': CharacterType.PUNCT,
';': CharacterType.PUNCT,
':': CharacterType.PUNCT,
'\u2122': CharacterType.ALPHA_LETTER, // trademark
'\u0E25': CharacterType.THAI_LETTER,
'\u4000': CharacterType.HAN_LETTER,
'\uF950': CharacterType.HAN_LETTER,
'\u30C0': CharacterType.KATAKANA_LETTER,
'\u3050': CharacterType.HIRAGANA_LETTER,
'\uFF80': CharacterType.HALFWIDTH_KATAKANA_LETTER,
};
for (const character in characters) {
const charCode = character.charCodeAt(0);
const type = characters[character];
expect(getCharacterType(charCode)).toEqual(type);
}
});
});
});

View File

@ -1959,6 +1959,7 @@ function webViewerFind(evt) {
query: evt.query, query: evt.query,
phraseSearch: evt.phraseSearch, phraseSearch: evt.phraseSearch,
caseSensitive: evt.caseSensitive, caseSensitive: evt.caseSensitive,
entireWord: evt.entireWord,
highlightAll: evt.highlightAll, highlightAll: evt.highlightAll,
findPrevious: evt.findPrevious, findPrevious: evt.findPrevious,
}); });
@ -1969,6 +1970,7 @@ function webViewerFindFromUrlHash(evt) {
query: evt.query, query: evt.query,
phraseSearch: evt.phraseSearch, phraseSearch: evt.phraseSearch,
caseSensitive: false, caseSensitive: false,
entireWord: false,
highlightAll: true, highlightAll: true,
findPrevious: false, findPrevious: false,
}); });
@ -2105,6 +2107,7 @@ function webViewerKeyDown(evt) {
query: findState.query, query: findState.query,
phraseSearch: findState.phraseSearch, phraseSearch: findState.phraseSearch,
caseSensitive: findState.caseSensitive, caseSensitive: findState.caseSensitive,
entireWord: findState.entireWord,
highlightAll: findState.highlightAll, highlightAll: findState.highlightAll,
findPrevious: cmd === 5 || cmd === 12, findPrevious: cmd === 5 || cmd === 12,
}); });

View File

@ -167,7 +167,8 @@ class MozL10n {
'find', 'find',
'findagain', 'findagain',
'findhighlightallchange', 'findhighlightallchange',
'findcasesensitivitychange' 'findcasesensitivitychange',
'findentirewordchange',
]; ];
let handleEvent = function(evt) { let handleEvent = function(evt) {
if (!PDFViewerApplication.initialized) { if (!PDFViewerApplication.initialized) {
@ -179,13 +180,14 @@ class MozL10n {
query: evt.detail.query, query: evt.detail.query,
phraseSearch: true, phraseSearch: true,
caseSensitive: !!evt.detail.caseSensitive, caseSensitive: !!evt.detail.caseSensitive,
entireWord: !!evt.detail.entireWord,
highlightAll: !!evt.detail.highlightAll, highlightAll: !!evt.detail.highlightAll,
findPrevious: !!evt.detail.findPrevious, findPrevious: !!evt.detail.findPrevious,
}); });
}; };
for (let i = 0, len = events.length; i < len; i++) { for (let event of events) {
window.addEventListener(events[i], handleEvent); window.addEventListener(event, handleEvent);
} }
})(); })();

View File

@ -33,9 +33,9 @@ class PDFFindBar {
this.findField = options.findField || null; this.findField = options.findField || null;
this.highlightAll = options.highlightAllCheckbox || null; this.highlightAll = options.highlightAllCheckbox || null;
this.caseSensitive = options.caseSensitiveCheckbox || null; this.caseSensitive = options.caseSensitiveCheckbox || null;
this.entireWord = options.entireWordCheckbox || null;
this.findMsg = options.findMsg || null; this.findMsg = options.findMsg || null;
this.findResultsCount = options.findResultsCount || null; this.findResultsCount = options.findResultsCount || null;
this.findStatusIcon = options.findStatusIcon || null;
this.findPreviousButton = options.findPreviousButton || null; this.findPreviousButton = options.findPreviousButton || null;
this.findNextButton = options.findNextButton || null; this.findNextButton = options.findNextButton || null;
this.findController = options.findController || null; this.findController = options.findController || null;
@ -85,6 +85,10 @@ class PDFFindBar {
this.dispatchEvent('casesensitivitychange'); this.dispatchEvent('casesensitivitychange');
}); });
this.entireWord.addEventListener('click', () => {
this.dispatchEvent('entirewordchange');
});
this.eventBus.on('resize', this._adjustWidth.bind(this)); this.eventBus.on('resize', this._adjustWidth.bind(this));
} }
@ -97,8 +101,9 @@ class PDFFindBar {
source: this, source: this,
type, type,
query: this.findField.value, query: this.findField.value,
caseSensitive: this.caseSensitive.checked,
phraseSearch: true, phraseSearch: true,
caseSensitive: this.caseSensitive.checked,
entireWord: this.entireWord.checked,
highlightAll: this.highlightAll.checked, highlightAll: this.highlightAll.checked,
findPrevious: findPrev, findPrevious: findPrev,
}); });

View File

@ -14,6 +14,7 @@
*/ */
import { createPromiseCapability } from 'pdfjs-lib'; import { createPromiseCapability } from 'pdfjs-lib';
import { getCharacterType } from './pdf_find_utils';
import { getGlobalEventBus } from './dom_events'; import { getGlobalEventBus } from './dom_events';
import { scrollIntoView } from './ui_utils'; import { scrollIntoView } from './ui_utils';
@ -190,7 +191,30 @@ class PDFFindController {
} }
} }
_calculatePhraseMatch(query, pageIndex, pageContent) { /**
* Determine if the search query constitutes a "whole word", by comparing the
* first/last character type with the preceding/following character type.
*/
_isEntireWord(content, startIdx, length) {
if (startIdx > 0) {
const first = content.charCodeAt(startIdx);
const limit = content.charCodeAt(startIdx - 1);
if (getCharacterType(first) === getCharacterType(limit)) {
return false;
}
}
const endIdx = (startIdx + length - 1);
if (endIdx < (content.length - 1)) {
const last = content.charCodeAt(endIdx);
const limit = content.charCodeAt(endIdx + 1);
if (getCharacterType(last) === getCharacterType(limit)) {
return false;
}
}
return true;
}
_calculatePhraseMatch(query, pageIndex, pageContent, entireWord) {
let matches = []; let matches = [];
let queryLen = query.length; let queryLen = query.length;
let matchIdx = -queryLen; let matchIdx = -queryLen;
@ -199,12 +223,15 @@ class PDFFindController {
if (matchIdx === -1) { if (matchIdx === -1) {
break; break;
} }
if (entireWord && !this._isEntireWord(pageContent, matchIdx, queryLen)) {
continue;
}
matches.push(matchIdx); matches.push(matchIdx);
} }
this.pageMatches[pageIndex] = matches; this.pageMatches[pageIndex] = matches;
} }
_calculateWordMatch(query, pageIndex, pageContent) { _calculateWordMatch(query, pageIndex, pageContent, entireWord) {
let matchesWithLength = []; let matchesWithLength = [];
// Divide the query into pieces and search for text in each piece. // Divide the query into pieces and search for text in each piece.
let queryArray = query.match(/\S+/g); let queryArray = query.match(/\S+/g);
@ -217,6 +244,10 @@ class PDFFindController {
if (matchIdx === -1) { if (matchIdx === -1) {
break; break;
} }
if (entireWord &&
!this._isEntireWord(pageContent, matchIdx, subqueryLen)) {
continue;
}
// Other searches do not, so we store the length. // Other searches do not, so we store the length.
matchesWithLength.push({ matchesWithLength.push({
match: matchIdx, match: matchIdx,
@ -244,6 +275,7 @@ class PDFFindController {
let query = this._normalize(this.state.query); let query = this._normalize(this.state.query);
let caseSensitive = this.state.caseSensitive; let caseSensitive = this.state.caseSensitive;
let phraseSearch = this.state.phraseSearch; let phraseSearch = this.state.phraseSearch;
const entireWord = this.state.entireWord;
let queryLen = query.length; let queryLen = query.length;
if (queryLen === 0) { if (queryLen === 0) {
@ -257,9 +289,9 @@ class PDFFindController {
} }
if (phraseSearch) { if (phraseSearch) {
this._calculatePhraseMatch(query, pageIndex, pageContent); this._calculatePhraseMatch(query, pageIndex, pageContent, entireWord);
} else { } else {
this._calculateWordMatch(query, pageIndex, pageContent); this._calculateWordMatch(query, pageIndex, pageContent, entireWord);
} }
this._updatePage(pageIndex); this._updatePage(pageIndex);

107
web/pdf_find_utils.js Normal file
View File

@ -0,0 +1,107 @@
/* Copyright 2018 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
const CharacterType = {
SPACE: 0,
ALPHA_LETTER: 1,
PUNCT: 2,
HAN_LETTER: 3,
KATAKANA_LETTER: 4,
HIRAGANA_LETTER: 5,
HALFWIDTH_KATAKANA_LETTER: 6,
THAI_LETTER: 7,
};
function isAlphabeticalScript(charCode) {
return charCode < 0x2E80;
}
function isAscii(charCode) {
return (charCode & 0xFF80) === 0;
}
function isAsciiAlpha(charCode) {
return (charCode >= /* a = */ 0x61 && charCode <= /* z = */ 0x7A) ||
(charCode >= /* A = */ 0x41 && charCode <= /* Z = */ 0x5A);
}
function isAsciiDigit(charCode) {
return (charCode >= /* 0 = */ 0x30 && charCode <= /* 9 = */ 0x39);
}
function isAsciiSpace(charCode) {
return (charCode === /* SPACE = */ 0x20 || charCode === /* TAB = */ 0x09 ||
charCode === /* CR = */ 0x0D || charCode === /* LF = */ 0x0A);
}
function isHan(charCode) {
return (charCode >= 0x3400 && charCode <= 0x9FFF) ||
(charCode >= 0xF900 && charCode <= 0xFAFF);
}
function isKatakana(charCode) {
return (charCode >= 0x30A0 && charCode <= 0x30FF);
}
function isHiragana(charCode) {
return (charCode >= 0x3040 && charCode <= 0x309F);
}
function isHalfwidthKatakana(charCode) {
return (charCode >= 0xFF60 && charCode <= 0xFF9F);
}
function isThai(charCode) {
return (charCode & 0xFF80) === 0x0E00;
}
/**
* This function is based on the word-break detection implemented in:
* https://hg.mozilla.org/mozilla-central/file/tip/intl/lwbrk/WordBreaker.cpp
*/
function getCharacterType(charCode) {
if (isAlphabeticalScript(charCode)) {
if (isAscii(charCode)) {
if (isAsciiSpace(charCode)) {
return CharacterType.SPACE;
} else if (isAsciiAlpha(charCode) || isAsciiDigit(charCode) ||
charCode === /* UNDERSCORE = */ 0x5F) {
return CharacterType.ALPHA_LETTER;
}
return CharacterType.PUNCT;
} else if (isThai(charCode)) {
return CharacterType.THAI_LETTER;
} else if (charCode === /* NBSP = */ 0xA0) {
return CharacterType.SPACE;
}
return CharacterType.ALPHA_LETTER;
}
if (isHan(charCode)) {
return CharacterType.HAN_LETTER;
} else if (isKatakana(charCode)) {
return CharacterType.KATAKANA_LETTER;
} else if (isHiragana(charCode)) {
return CharacterType.HIRAGANA_LETTER;
} else if (isHalfwidthKatakana(charCode)) {
return CharacterType.HALFWIDTH_KATAKANA_LETTER;
}
return CharacterType.ALPHA_LETTER;
}
export {
CharacterType,
getCharacterType,
};

View File

@ -104,15 +104,19 @@ See https://github.com/adobe-type-tools/cmap-resources
</div> </div>
</div> </div>
<div id="findbarOptionsContainer"> <div id="findbarOptionsOneContainer">
<input type="checkbox" id="findHighlightAll" class="toolbarField" tabindex="94"> <input type="checkbox" id="findHighlightAll" class="toolbarField" tabindex="94">
<label for="findHighlightAll" class="toolbarLabel" data-l10n-id="find_highlight">Highlight all</label> <label for="findHighlightAll" class="toolbarLabel" data-l10n-id="find_highlight">Highlight all</label>
<input type="checkbox" id="findMatchCase" class="toolbarField" tabindex="95"> <input type="checkbox" id="findMatchCase" class="toolbarField" tabindex="95">
<label for="findMatchCase" class="toolbarLabel" data-l10n-id="find_match_case_label">Match case</label> <label for="findMatchCase" class="toolbarLabel" data-l10n-id="find_match_case_label">Match case</label>
</div> </div>
<div id="findbarOptionsTwoContainer">
<input type="checkbox" id="findEntireWord" class="toolbarField" tabindex="96">
<label for="findEntireWord" class="toolbarLabel" data-l10n-id="find_entire_word_label">Whole words</label>
<span id="findResultsCount" class="toolbarLabel hidden"></span>
</div>
<div id="findbarMessageContainer"> <div id="findbarMessageContainer">
<span id="findResultsCount" class="toolbarLabel hidden"></span>
<span id="findMsg" class="toolbarLabel"></span> <span id="findMsg" class="toolbarLabel"></span>
</div> </div>
</div> <!-- findbar --> </div> <!-- findbar -->

View File

@ -134,9 +134,9 @@ function getViewerConfiguration() {
findField: document.getElementById('findInput'), findField: document.getElementById('findInput'),
highlightAllCheckbox: document.getElementById('findHighlightAll'), highlightAllCheckbox: document.getElementById('findHighlightAll'),
caseSensitiveCheckbox: document.getElementById('findMatchCase'), caseSensitiveCheckbox: document.getElementById('findMatchCase'),
entireWordCheckbox: document.getElementById('findEntireWord'),
findMsg: document.getElementById('findMsg'), findMsg: document.getElementById('findMsg'),
findResultsCount: document.getElementById('findResultsCount'), findResultsCount: document.getElementById('findResultsCount'),
findStatusIcon: document.getElementById('findStatusIcon'),
findPreviousButton: document.getElementById('findPrevious'), findPreviousButton: document.getElementById('findPrevious'),
findNextButton: document.getElementById('findNext'), findNextButton: document.getElementById('findNext'),
}, },