Merge pull request #10028 from Snuffleupagus/entireWord
Add initial support for "Whole words" searching in the viewer
This commit is contained in:
commit
bc5111d152
@ -165,6 +165,7 @@ find_next.title=Find the next occurrence of the phrase
|
||||
find_next_label=Next
|
||||
find_highlight=Highlight all
|
||||
find_match_case_label=Match case
|
||||
find_entire_word_label=Whole words
|
||||
find_reached_top=Reached top of document, continued from bottom
|
||||
find_reached_bottom=Reached end of document, continued from top
|
||||
# LOCALIZATION NOTE (find_matches_count): "{{current}}" and "{{total}}" will be
|
||||
|
@ -165,6 +165,7 @@ find_next.title=De volgende overeenkomst van de tekst zoeken
|
||||
find_next_label=Volgende
|
||||
find_highlight=Alles markeren
|
||||
find_match_case_label=Hoofdlettergevoelig
|
||||
find_entire_word_label=Hele woorden
|
||||
find_reached_top=Bovenkant van document bereikt, doorgegaan vanaf onderkant
|
||||
find_reached_bottom=Onderkant van document bereikt, doorgegaan vanaf bovenkant
|
||||
# LOCALIZATION NOTE (find_matches_count): "{{current}}" and "{{total}}" will be
|
||||
|
@ -165,6 +165,7 @@ find_next.title=Hitta nästa förekomst av frasen
|
||||
find_next_label=Nästa
|
||||
find_highlight=Markera alla
|
||||
find_match_case_label=Matcha versal/gemen
|
||||
find_entire_word_label=Hela ord
|
||||
find_reached_top=Nådde början av dokumentet, började från slutet
|
||||
find_reached_bottom=Nådde slutet på dokumentet, började från början
|
||||
# LOCALIZATION NOTE (find_matches_count): "{{current}}" and "{{total}}" will be
|
||||
|
@ -25,6 +25,7 @@
|
||||
"network_utils_spec.js",
|
||||
"node_stream_spec.js",
|
||||
"parser_spec.js",
|
||||
"pdf_find_utils.js",
|
||||
"pdf_history.js",
|
||||
"primitives_spec.js",
|
||||
"stream_spec.js",
|
||||
|
@ -67,6 +67,7 @@ function initializePDFJS(callback) {
|
||||
'pdfjs-test/unit/network_spec',
|
||||
'pdfjs-test/unit/network_utils_spec',
|
||||
'pdfjs-test/unit/parser_spec',
|
||||
'pdfjs-test/unit/pdf_find_utils_spec',
|
||||
'pdfjs-test/unit/pdf_history_spec',
|
||||
'pdfjs-test/unit/primitives_spec',
|
||||
'pdfjs-test/unit/stream_spec',
|
||||
|
56
test/unit/pdf_find_utils_spec.js
Normal file
56
test/unit/pdf_find_utils_spec.js
Normal file
@ -0,0 +1,56 @@
|
||||
/* Copyright 2018 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { CharacterType, getCharacterType } from '../../web/pdf_find_utils';
|
||||
|
||||
describe('pdf_find_utils', function() {
|
||||
describe('getCharacterType', function() {
|
||||
it('gets expected character types', function() {
|
||||
const characters = {
|
||||
'A': CharacterType.ALPHA_LETTER,
|
||||
'a': CharacterType.ALPHA_LETTER,
|
||||
'0': CharacterType.ALPHA_LETTER,
|
||||
'5': CharacterType.ALPHA_LETTER,
|
||||
'\xC4': CharacterType.ALPHA_LETTER, // 'Ä'
|
||||
'\xE4': CharacterType.ALPHA_LETTER, // 'ä'
|
||||
'_': CharacterType.ALPHA_LETTER,
|
||||
' ': CharacterType.SPACE,
|
||||
'\t': CharacterType.SPACE,
|
||||
'\r': CharacterType.SPACE,
|
||||
'\n': CharacterType.SPACE,
|
||||
'\xA0': CharacterType.SPACE,
|
||||
'-': CharacterType.PUNCT,
|
||||
',': CharacterType.PUNCT,
|
||||
'.': CharacterType.PUNCT,
|
||||
';': CharacterType.PUNCT,
|
||||
':': CharacterType.PUNCT,
|
||||
'\u2122': CharacterType.ALPHA_LETTER, // trademark
|
||||
'\u0E25': CharacterType.THAI_LETTER,
|
||||
'\u4000': CharacterType.HAN_LETTER,
|
||||
'\uF950': CharacterType.HAN_LETTER,
|
||||
'\u30C0': CharacterType.KATAKANA_LETTER,
|
||||
'\u3050': CharacterType.HIRAGANA_LETTER,
|
||||
'\uFF80': CharacterType.HALFWIDTH_KATAKANA_LETTER,
|
||||
};
|
||||
|
||||
for (const character in characters) {
|
||||
const charCode = character.charCodeAt(0);
|
||||
const type = characters[character];
|
||||
|
||||
expect(getCharacterType(charCode)).toEqual(type);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
@ -1959,6 +1959,7 @@ function webViewerFind(evt) {
|
||||
query: evt.query,
|
||||
phraseSearch: evt.phraseSearch,
|
||||
caseSensitive: evt.caseSensitive,
|
||||
entireWord: evt.entireWord,
|
||||
highlightAll: evt.highlightAll,
|
||||
findPrevious: evt.findPrevious,
|
||||
});
|
||||
@ -1969,6 +1970,7 @@ function webViewerFindFromUrlHash(evt) {
|
||||
query: evt.query,
|
||||
phraseSearch: evt.phraseSearch,
|
||||
caseSensitive: false,
|
||||
entireWord: false,
|
||||
highlightAll: true,
|
||||
findPrevious: false,
|
||||
});
|
||||
@ -2105,6 +2107,7 @@ function webViewerKeyDown(evt) {
|
||||
query: findState.query,
|
||||
phraseSearch: findState.phraseSearch,
|
||||
caseSensitive: findState.caseSensitive,
|
||||
entireWord: findState.entireWord,
|
||||
highlightAll: findState.highlightAll,
|
||||
findPrevious: cmd === 5 || cmd === 12,
|
||||
});
|
||||
|
@ -167,7 +167,8 @@ class MozL10n {
|
||||
'find',
|
||||
'findagain',
|
||||
'findhighlightallchange',
|
||||
'findcasesensitivitychange'
|
||||
'findcasesensitivitychange',
|
||||
'findentirewordchange',
|
||||
];
|
||||
let handleEvent = function(evt) {
|
||||
if (!PDFViewerApplication.initialized) {
|
||||
@ -179,13 +180,14 @@ class MozL10n {
|
||||
query: evt.detail.query,
|
||||
phraseSearch: true,
|
||||
caseSensitive: !!evt.detail.caseSensitive,
|
||||
entireWord: !!evt.detail.entireWord,
|
||||
highlightAll: !!evt.detail.highlightAll,
|
||||
findPrevious: !!evt.detail.findPrevious,
|
||||
});
|
||||
};
|
||||
|
||||
for (let i = 0, len = events.length; i < len; i++) {
|
||||
window.addEventListener(events[i], handleEvent);
|
||||
for (let event of events) {
|
||||
window.addEventListener(event, handleEvent);
|
||||
}
|
||||
})();
|
||||
|
||||
|
@ -33,9 +33,9 @@ class PDFFindBar {
|
||||
this.findField = options.findField || null;
|
||||
this.highlightAll = options.highlightAllCheckbox || null;
|
||||
this.caseSensitive = options.caseSensitiveCheckbox || null;
|
||||
this.entireWord = options.entireWordCheckbox || null;
|
||||
this.findMsg = options.findMsg || null;
|
||||
this.findResultsCount = options.findResultsCount || null;
|
||||
this.findStatusIcon = options.findStatusIcon || null;
|
||||
this.findPreviousButton = options.findPreviousButton || null;
|
||||
this.findNextButton = options.findNextButton || null;
|
||||
this.findController = options.findController || null;
|
||||
@ -85,6 +85,10 @@ class PDFFindBar {
|
||||
this.dispatchEvent('casesensitivitychange');
|
||||
});
|
||||
|
||||
this.entireWord.addEventListener('click', () => {
|
||||
this.dispatchEvent('entirewordchange');
|
||||
});
|
||||
|
||||
this.eventBus.on('resize', this._adjustWidth.bind(this));
|
||||
}
|
||||
|
||||
@ -97,8 +101,9 @@ class PDFFindBar {
|
||||
source: this,
|
||||
type,
|
||||
query: this.findField.value,
|
||||
caseSensitive: this.caseSensitive.checked,
|
||||
phraseSearch: true,
|
||||
caseSensitive: this.caseSensitive.checked,
|
||||
entireWord: this.entireWord.checked,
|
||||
highlightAll: this.highlightAll.checked,
|
||||
findPrevious: findPrev,
|
||||
});
|
||||
|
@ -14,6 +14,7 @@
|
||||
*/
|
||||
|
||||
import { createPromiseCapability } from 'pdfjs-lib';
|
||||
import { getCharacterType } from './pdf_find_utils';
|
||||
import { getGlobalEventBus } from './dom_events';
|
||||
import { scrollIntoView } from './ui_utils';
|
||||
|
||||
@ -190,7 +191,30 @@ class PDFFindController {
|
||||
}
|
||||
}
|
||||
|
||||
_calculatePhraseMatch(query, pageIndex, pageContent) {
|
||||
/**
|
||||
* Determine if the search query constitutes a "whole word", by comparing the
|
||||
* first/last character type with the preceding/following character type.
|
||||
*/
|
||||
_isEntireWord(content, startIdx, length) {
|
||||
if (startIdx > 0) {
|
||||
const first = content.charCodeAt(startIdx);
|
||||
const limit = content.charCodeAt(startIdx - 1);
|
||||
if (getCharacterType(first) === getCharacterType(limit)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const endIdx = (startIdx + length - 1);
|
||||
if (endIdx < (content.length - 1)) {
|
||||
const last = content.charCodeAt(endIdx);
|
||||
const limit = content.charCodeAt(endIdx + 1);
|
||||
if (getCharacterType(last) === getCharacterType(limit)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
_calculatePhraseMatch(query, pageIndex, pageContent, entireWord) {
|
||||
let matches = [];
|
||||
let queryLen = query.length;
|
||||
let matchIdx = -queryLen;
|
||||
@ -199,12 +223,15 @@ class PDFFindController {
|
||||
if (matchIdx === -1) {
|
||||
break;
|
||||
}
|
||||
if (entireWord && !this._isEntireWord(pageContent, matchIdx, queryLen)) {
|
||||
continue;
|
||||
}
|
||||
matches.push(matchIdx);
|
||||
}
|
||||
this.pageMatches[pageIndex] = matches;
|
||||
}
|
||||
|
||||
_calculateWordMatch(query, pageIndex, pageContent) {
|
||||
_calculateWordMatch(query, pageIndex, pageContent, entireWord) {
|
||||
let matchesWithLength = [];
|
||||
// Divide the query into pieces and search for text in each piece.
|
||||
let queryArray = query.match(/\S+/g);
|
||||
@ -217,6 +244,10 @@ class PDFFindController {
|
||||
if (matchIdx === -1) {
|
||||
break;
|
||||
}
|
||||
if (entireWord &&
|
||||
!this._isEntireWord(pageContent, matchIdx, subqueryLen)) {
|
||||
continue;
|
||||
}
|
||||
// Other searches do not, so we store the length.
|
||||
matchesWithLength.push({
|
||||
match: matchIdx,
|
||||
@ -244,6 +275,7 @@ class PDFFindController {
|
||||
let query = this._normalize(this.state.query);
|
||||
let caseSensitive = this.state.caseSensitive;
|
||||
let phraseSearch = this.state.phraseSearch;
|
||||
const entireWord = this.state.entireWord;
|
||||
let queryLen = query.length;
|
||||
|
||||
if (queryLen === 0) {
|
||||
@ -257,9 +289,9 @@ class PDFFindController {
|
||||
}
|
||||
|
||||
if (phraseSearch) {
|
||||
this._calculatePhraseMatch(query, pageIndex, pageContent);
|
||||
this._calculatePhraseMatch(query, pageIndex, pageContent, entireWord);
|
||||
} else {
|
||||
this._calculateWordMatch(query, pageIndex, pageContent);
|
||||
this._calculateWordMatch(query, pageIndex, pageContent, entireWord);
|
||||
}
|
||||
|
||||
this._updatePage(pageIndex);
|
||||
|
107
web/pdf_find_utils.js
Normal file
107
web/pdf_find_utils.js
Normal file
@ -0,0 +1,107 @@
|
||||
/* Copyright 2018 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
const CharacterType = {
|
||||
SPACE: 0,
|
||||
ALPHA_LETTER: 1,
|
||||
PUNCT: 2,
|
||||
HAN_LETTER: 3,
|
||||
KATAKANA_LETTER: 4,
|
||||
HIRAGANA_LETTER: 5,
|
||||
HALFWIDTH_KATAKANA_LETTER: 6,
|
||||
THAI_LETTER: 7,
|
||||
};
|
||||
|
||||
function isAlphabeticalScript(charCode) {
|
||||
return charCode < 0x2E80;
|
||||
}
|
||||
|
||||
function isAscii(charCode) {
|
||||
return (charCode & 0xFF80) === 0;
|
||||
}
|
||||
|
||||
function isAsciiAlpha(charCode) {
|
||||
return (charCode >= /* a = */ 0x61 && charCode <= /* z = */ 0x7A) ||
|
||||
(charCode >= /* A = */ 0x41 && charCode <= /* Z = */ 0x5A);
|
||||
}
|
||||
|
||||
function isAsciiDigit(charCode) {
|
||||
return (charCode >= /* 0 = */ 0x30 && charCode <= /* 9 = */ 0x39);
|
||||
}
|
||||
|
||||
function isAsciiSpace(charCode) {
|
||||
return (charCode === /* SPACE = */ 0x20 || charCode === /* TAB = */ 0x09 ||
|
||||
charCode === /* CR = */ 0x0D || charCode === /* LF = */ 0x0A);
|
||||
}
|
||||
|
||||
function isHan(charCode) {
|
||||
return (charCode >= 0x3400 && charCode <= 0x9FFF) ||
|
||||
(charCode >= 0xF900 && charCode <= 0xFAFF);
|
||||
}
|
||||
|
||||
function isKatakana(charCode) {
|
||||
return (charCode >= 0x30A0 && charCode <= 0x30FF);
|
||||
}
|
||||
|
||||
function isHiragana(charCode) {
|
||||
return (charCode >= 0x3040 && charCode <= 0x309F);
|
||||
}
|
||||
|
||||
function isHalfwidthKatakana(charCode) {
|
||||
return (charCode >= 0xFF60 && charCode <= 0xFF9F);
|
||||
}
|
||||
|
||||
function isThai(charCode) {
|
||||
return (charCode & 0xFF80) === 0x0E00;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function is based on the word-break detection implemented in:
|
||||
* https://hg.mozilla.org/mozilla-central/file/tip/intl/lwbrk/WordBreaker.cpp
|
||||
*/
|
||||
function getCharacterType(charCode) {
|
||||
if (isAlphabeticalScript(charCode)) {
|
||||
if (isAscii(charCode)) {
|
||||
if (isAsciiSpace(charCode)) {
|
||||
return CharacterType.SPACE;
|
||||
} else if (isAsciiAlpha(charCode) || isAsciiDigit(charCode) ||
|
||||
charCode === /* UNDERSCORE = */ 0x5F) {
|
||||
return CharacterType.ALPHA_LETTER;
|
||||
}
|
||||
return CharacterType.PUNCT;
|
||||
} else if (isThai(charCode)) {
|
||||
return CharacterType.THAI_LETTER;
|
||||
} else if (charCode === /* NBSP = */ 0xA0) {
|
||||
return CharacterType.SPACE;
|
||||
}
|
||||
return CharacterType.ALPHA_LETTER;
|
||||
}
|
||||
|
||||
if (isHan(charCode)) {
|
||||
return CharacterType.HAN_LETTER;
|
||||
} else if (isKatakana(charCode)) {
|
||||
return CharacterType.KATAKANA_LETTER;
|
||||
} else if (isHiragana(charCode)) {
|
||||
return CharacterType.HIRAGANA_LETTER;
|
||||
} else if (isHalfwidthKatakana(charCode)) {
|
||||
return CharacterType.HALFWIDTH_KATAKANA_LETTER;
|
||||
}
|
||||
return CharacterType.ALPHA_LETTER;
|
||||
}
|
||||
|
||||
export {
|
||||
CharacterType,
|
||||
getCharacterType,
|
||||
};
|
@ -104,15 +104,19 @@ See https://github.com/adobe-type-tools/cmap-resources
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="findbarOptionsContainer">
|
||||
<div id="findbarOptionsOneContainer">
|
||||
<input type="checkbox" id="findHighlightAll" class="toolbarField" tabindex="94">
|
||||
<label for="findHighlightAll" class="toolbarLabel" data-l10n-id="find_highlight">Highlight all</label>
|
||||
<input type="checkbox" id="findMatchCase" class="toolbarField" tabindex="95">
|
||||
<label for="findMatchCase" class="toolbarLabel" data-l10n-id="find_match_case_label">Match case</label>
|
||||
</div>
|
||||
<div id="findbarOptionsTwoContainer">
|
||||
<input type="checkbox" id="findEntireWord" class="toolbarField" tabindex="96">
|
||||
<label for="findEntireWord" class="toolbarLabel" data-l10n-id="find_entire_word_label">Whole words</label>
|
||||
<span id="findResultsCount" class="toolbarLabel hidden"></span>
|
||||
</div>
|
||||
|
||||
<div id="findbarMessageContainer">
|
||||
<span id="findResultsCount" class="toolbarLabel hidden"></span>
|
||||
<span id="findMsg" class="toolbarLabel"></span>
|
||||
</div>
|
||||
</div> <!-- findbar -->
|
||||
|
@ -134,9 +134,9 @@ function getViewerConfiguration() {
|
||||
findField: document.getElementById('findInput'),
|
||||
highlightAllCheckbox: document.getElementById('findHighlightAll'),
|
||||
caseSensitiveCheckbox: document.getElementById('findMatchCase'),
|
||||
entireWordCheckbox: document.getElementById('findEntireWord'),
|
||||
findMsg: document.getElementById('findMsg'),
|
||||
findResultsCount: document.getElementById('findResultsCount'),
|
||||
findStatusIcon: document.getElementById('findStatusIcon'),
|
||||
findPreviousButton: document.getElementById('findPrevious'),
|
||||
findNextButton: document.getElementById('findNext'),
|
||||
},
|
||||
|
Loading…
x
Reference in New Issue
Block a user