117bbf7cd9
Some arabic chars like \ufe94 could be searched in a pdf, hence it must be normalized when creating the search query. So to avoid to duplicate the normalization code, everything is moved in the find controller. The previous code to normalize text was using NFKC but with a hardcoded map, hence it has been replaced by the use of normalize("NFKC") (it helps to reduce the bundle size by 30kb). In playing with this \ufe94 char, I noticed that the bidi algorithm wasn't taking into account some RTL unicode ranges, the generated font wasn't embedding the mapping this char and the unicode ranges in the OS/2 table weren't up-to-date. When normalized some chars can be replaced by several ones and it induced to have some extra chars in the text layer. To avoid any regression, when copying some text from the text layer, a copied string is normalized (NFKC) before being put in the clipboard (it works like this in either Acrobat or Chrome).
1037 lines
26 KiB
JavaScript
1037 lines
26 KiB
JavaScript
/* Copyright 2018 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
import { FindState, PDFFindController } from "../../web/pdf_find_controller.js";
|
|
import { buildGetDocumentParams } from "./test_utils.js";
|
|
import { EventBus } from "../../web/event_utils.js";
|
|
import { getDocument } from "../../src/display/api.js";
|
|
import { isNodeJS } from "../../src/shared/is_node.js";
|
|
import { SimpleLinkService } from "../../web/pdf_link_service.js";
|
|
|
|
const tracemonkeyFileName = "tracemonkey.pdf";
|
|
|
|
const CMAP_URL = isNodeJS ? "./external/bcmaps/" : "../../../external/bcmaps/";
|
|
|
|
class MockLinkService extends SimpleLinkService {
|
|
constructor() {
|
|
super();
|
|
|
|
this._page = 1;
|
|
this._pdfDocument = null;
|
|
}
|
|
|
|
setDocument(pdfDocument) {
|
|
this._pdfDocument = pdfDocument;
|
|
}
|
|
|
|
get pagesCount() {
|
|
return this._pdfDocument.numPages;
|
|
}
|
|
|
|
get page() {
|
|
return this._page;
|
|
}
|
|
|
|
set page(value) {
|
|
this._page = value;
|
|
}
|
|
}
|
|
|
|
async function initPdfFindController(
|
|
filename,
|
|
updateMatchesCountOnProgress = true
|
|
) {
|
|
const loadingTask = getDocument(
|
|
buildGetDocumentParams(filename || tracemonkeyFileName, {
|
|
cMapUrl: CMAP_URL,
|
|
})
|
|
);
|
|
const pdfDocument = await loadingTask.promise;
|
|
|
|
const eventBus = new EventBus();
|
|
|
|
const linkService = new MockLinkService();
|
|
linkService.setDocument(pdfDocument);
|
|
|
|
const pdfFindController = new PDFFindController({
|
|
linkService,
|
|
eventBus,
|
|
updateMatchesCountOnProgress,
|
|
});
|
|
pdfFindController.setDocument(pdfDocument); // Enable searching.
|
|
|
|
return { eventBus, pdfFindController };
|
|
}
|
|
|
|
function testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state,
|
|
matchesPerPage,
|
|
selectedMatch,
|
|
pageMatches = null,
|
|
pageMatchesLength = null,
|
|
updateFindMatchesCount = null,
|
|
updateFindControlState = null,
|
|
}) {
|
|
return new Promise(function (resolve) {
|
|
const eventState = Object.assign(
|
|
Object.create(null),
|
|
{
|
|
source: this,
|
|
type: "",
|
|
query: null,
|
|
caseSensitive: false,
|
|
entireWord: false,
|
|
findPrevious: false,
|
|
matchDiacritics: false,
|
|
},
|
|
state
|
|
);
|
|
eventBus.dispatch("find", eventState);
|
|
|
|
// The `updatefindmatchescount` event is only emitted if the page contains
|
|
// at least one match for the query, so the last non-zero item in the
|
|
// matches per page array corresponds to the page for which the final
|
|
// `updatefindmatchescount` event is emitted. If this happens, we know
|
|
// that any subsequent pages won't trigger the event anymore and we
|
|
// can start comparing the matches per page. This logic is necessary
|
|
// because we call the `pdfFindController.pageMatches` getter directly
|
|
// after receiving the event and the underlying `_pageMatches` array
|
|
// is only extended when a page is processed, so it will only contain
|
|
// entries for the pages processed until the time when the final event
|
|
// was emitted.
|
|
let totalPages = matchesPerPage.length;
|
|
for (let i = totalPages - 1; i >= 0; i--) {
|
|
if (matchesPerPage[i] > 0) {
|
|
totalPages = i + 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
const totalMatches = matchesPerPage.reduce((a, b) => {
|
|
return a + b;
|
|
});
|
|
|
|
if (updateFindControlState) {
|
|
eventBus.on(
|
|
"updatefindcontrolstate",
|
|
function onUpdateFindControlState(evt) {
|
|
updateFindControlState[0] += 1;
|
|
}
|
|
);
|
|
}
|
|
|
|
eventBus.on(
|
|
"updatefindmatchescount",
|
|
function onUpdateFindMatchesCount(evt) {
|
|
if (updateFindMatchesCount) {
|
|
updateFindMatchesCount[0] += 1;
|
|
}
|
|
if (pdfFindController.pageMatches.length !== totalPages) {
|
|
return;
|
|
}
|
|
eventBus.off("updatefindmatchescount", onUpdateFindMatchesCount);
|
|
|
|
expect(evt.matchesCount.total).toBe(totalMatches);
|
|
for (let i = 0; i < totalPages; i++) {
|
|
expect(pdfFindController.pageMatches[i].length).toEqual(
|
|
matchesPerPage[i]
|
|
);
|
|
}
|
|
expect(pdfFindController.selected.pageIdx).toEqual(
|
|
selectedMatch.pageIndex
|
|
);
|
|
expect(pdfFindController.selected.matchIdx).toEqual(
|
|
selectedMatch.matchIndex
|
|
);
|
|
|
|
if (pageMatches) {
|
|
expect(pdfFindController.pageMatches).toEqual(pageMatches);
|
|
expect(pdfFindController.pageMatchesLength).toEqual(
|
|
pageMatchesLength
|
|
);
|
|
}
|
|
|
|
resolve();
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
function testEmptySearch({ eventBus, pdfFindController, state }) {
|
|
return new Promise(function (resolve) {
|
|
const eventState = Object.assign(
|
|
Object.create(null),
|
|
{
|
|
source: this,
|
|
type: "",
|
|
query: null,
|
|
caseSensitive: false,
|
|
entireWord: false,
|
|
findPrevious: false,
|
|
matchDiacritics: false,
|
|
},
|
|
state
|
|
);
|
|
eventBus.dispatch("find", eventState);
|
|
|
|
eventBus.on(
|
|
"updatefindcontrolstate",
|
|
function onUpdatefindcontrolstate(evt) {
|
|
if (evt.state !== FindState.NOT_FOUND) {
|
|
return;
|
|
}
|
|
eventBus.off("updatefindcontrolstate", onUpdatefindcontrolstate);
|
|
expect(evt.matchesCount.total).toBe(0);
|
|
resolve();
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
describe("pdf_find_controller", function () {
|
|
it("performs a normal search", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
const updateFindMatchesCount = [0];
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "Dynamic",
|
|
},
|
|
matchesPerPage: [11, 5, 0, 3, 0, 0, 0, 1, 1, 1, 0, 3, 4, 4],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
updateFindMatchesCount,
|
|
});
|
|
|
|
expect(updateFindMatchesCount[0]).toBe(9);
|
|
});
|
|
|
|
it("performs a normal search but the total counts is only updated one time", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
null,
|
|
false
|
|
);
|
|
const updateFindMatchesCount = [0];
|
|
const updateFindControlState = [0];
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "Dynamic",
|
|
},
|
|
matchesPerPage: [11, 5, 0, 3, 0, 0, 0, 1, 1, 1, 0, 3, 4, 4],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
updateFindMatchesCount,
|
|
updateFindControlState,
|
|
});
|
|
|
|
expect(updateFindMatchesCount[0]).toBe(1);
|
|
expect(updateFindControlState[0]).toBe(0);
|
|
});
|
|
|
|
it("performs a normal search and finds the previous result", async function () {
|
|
// Page 14 (with page index 13) contains five results. By default, the
|
|
// first result (match index 0) is selected, so the previous result
|
|
// should be the fifth result (match index 4).
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "conference",
|
|
findPrevious: true,
|
|
},
|
|
matchesPerPage: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
|
|
selectedMatch: {
|
|
pageIndex: 13,
|
|
matchIndex: 4,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("performs a case sensitive search", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "Dynamic",
|
|
caseSensitive: true,
|
|
},
|
|
matchesPerPage: [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("performs an entire word search", async function () {
|
|
// Page 13 contains both 'Government' and 'Governmental', so the latter
|
|
// should not be found with entire word search.
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "Government",
|
|
entireWord: true,
|
|
},
|
|
matchesPerPage: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
|
|
selectedMatch: {
|
|
pageIndex: 12,
|
|
matchIndex: 0,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("performs a multiple term (no phrase) search", async function () {
|
|
// Page 9 contains 'alternate' and pages 6 and 9 contain 'solution'.
|
|
// Both should be found for multiple term (no phrase) search.
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: ["alternate", "solution"],
|
|
},
|
|
matchesPerPage: [0, 0, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0, 0],
|
|
selectedMatch: {
|
|
pageIndex: 5,
|
|
matchIndex: 0,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("performs a multiple term (phrase) search", async function () {
|
|
// Page 9 contains 'alternate solution' and pages 6 and 9 contain
|
|
// 'solution'. Both should be found for multiple term (phrase) search.
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: ["alternate solution", "solution"],
|
|
},
|
|
matchesPerPage: [0, 0, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 0],
|
|
selectedMatch: {
|
|
pageIndex: 5,
|
|
matchIndex: 0,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("performs a normal search, where the text is normalized", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"fraction-highlight.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "fraction",
|
|
},
|
|
matchesPerPage: [3],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[19, 46, 62]],
|
|
pageMatchesLength: [[8, 8, 8]],
|
|
});
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "1/2",
|
|
},
|
|
matchesPerPage: [2],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[27, 54]],
|
|
pageMatchesLength: [[1, 1]],
|
|
});
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "½",
|
|
},
|
|
matchesPerPage: [2],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[27, 54]],
|
|
pageMatchesLength: [[1, 1]],
|
|
});
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "1",
|
|
},
|
|
matchesPerPage: [3],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[27, 54, 55]],
|
|
pageMatchesLength: [[1, 1, 1]],
|
|
});
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "2",
|
|
},
|
|
matchesPerPage: [2],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[27, 54]],
|
|
pageMatchesLength: [[1, 1]],
|
|
});
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "1/",
|
|
},
|
|
matchesPerPage: [3],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[27, 54, 55]],
|
|
pageMatchesLength: [[1, 1, 1]],
|
|
});
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "1/21",
|
|
},
|
|
matchesPerPage: [1],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[54]],
|
|
pageMatchesLength: [[2]],
|
|
});
|
|
});
|
|
|
|
it("performs a normal search, where the text with diacritics is normalized", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"french_diacritics.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "a",
|
|
},
|
|
matchesPerPage: [6],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[0, 2, 4, 6, 8, 10]],
|
|
pageMatchesLength: [[1, 1, 1, 1, 1, 1]],
|
|
});
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "u",
|
|
},
|
|
matchesPerPage: [6],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[44, 46, 48, 50, 52, 54]],
|
|
pageMatchesLength: [[1, 1, 1, 1, 1, 1]],
|
|
});
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "ë",
|
|
matchDiacritics: true,
|
|
},
|
|
matchesPerPage: [2],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[28, 30]],
|
|
pageMatchesLength: [[1, 1]],
|
|
});
|
|
});
|
|
|
|
it("performs a search where one of the results contains an hyphen", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "optimiz",
|
|
},
|
|
matchesPerPage: [1, 4, 2, 3, 3, 0, 2, 9, 1, 0, 0, 6, 3, 4],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("performs a search where the result is on two lines", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "user experience",
|
|
},
|
|
matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[2734]],
|
|
pageMatchesLength: [[14]],
|
|
});
|
|
});
|
|
|
|
it("performs a search where the result is on two lines with a punctuation at eol", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "version.the",
|
|
},
|
|
matchesPerPage: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
selectedMatch: {
|
|
pageIndex: 1,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[], [1486]],
|
|
pageMatchesLength: [[], [11]],
|
|
});
|
|
});
|
|
|
|
it("performs a search with a minus sign in the query", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "trace-based just-in-time",
|
|
},
|
|
matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [
|
|
[0],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[2081],
|
|
],
|
|
pageMatchesLength: [
|
|
[24],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[],
|
|
[24],
|
|
],
|
|
});
|
|
});
|
|
|
|
it("performs a search with square brackets in the query", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "[Programming Languages]",
|
|
},
|
|
matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[1497]],
|
|
pageMatchesLength: [[25]],
|
|
});
|
|
});
|
|
|
|
it("performs a search with parenthesis in the query", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "\t (checks)",
|
|
},
|
|
matchesPerPage: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
selectedMatch: {
|
|
pageIndex: 1,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[], [201]],
|
|
pageMatchesLength: [[], [9]],
|
|
});
|
|
});
|
|
|
|
it("performs a search with a final dot in the query", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
// The whitespace after the dot mustn't be matched.
|
|
const query = "complex applications.";
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query,
|
|
},
|
|
matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[1941]],
|
|
pageMatchesLength: [[21]],
|
|
});
|
|
});
|
|
|
|
it("performs a search with a dot in the query and a missing whitespace", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
// The whitespace after the dot must be matched.
|
|
const query = "complex applications.J";
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query,
|
|
},
|
|
matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[1941]],
|
|
pageMatchesLength: [[23]],
|
|
});
|
|
});
|
|
|
|
it("performs a search with a dot followed by a whitespace in the query", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
const query = "complex applications. j";
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query,
|
|
},
|
|
matchesPerPage: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[1941]],
|
|
pageMatchesLength: [[23]],
|
|
});
|
|
});
|
|
|
|
it("performs a search in a text containing diacritics before -\\n", async function () {
|
|
if (isNodeJS) {
|
|
pending("Linked test-cases are not supported in Node.js.");
|
|
}
|
|
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"issue14562.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "ä",
|
|
matchDiacritics: true,
|
|
},
|
|
matchesPerPage: [80],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [
|
|
[
|
|
302, 340, 418, 481, 628, 802, 983, 989, 1015, 1063, 1084, 1149, 1157,
|
|
1278, 1346, 1394, 1402, 1424, 1500, 1524, 1530, 1686, 1776, 1788,
|
|
1859, 1881, 1911, 1948, 2066, 2076, 2163, 2180, 2215, 2229, 2274,
|
|
2324, 2360, 2402, 2413, 2424, 2463, 2532, 2538, 2553, 2562, 2576,
|
|
2602, 2613, 2638, 2668, 2792, 2805, 2836, 2847, 2858, 2895, 2901,
|
|
2915, 2939, 2959, 3089, 3236, 3246, 3336, 3384, 3391, 3465, 3474,
|
|
3482, 3499, 3687, 3693, 3708, 3755, 3786, 3862, 3974, 4049, 4055,
|
|
4068,
|
|
],
|
|
],
|
|
pageMatchesLength: [
|
|
[
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
],
|
|
],
|
|
});
|
|
});
|
|
|
|
it("performs a search in a text containing some Hangul syllables", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"bug1771477.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "안녕하세요 세계",
|
|
},
|
|
matchesPerPage: [1],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[139]],
|
|
pageMatchesLength: [[8]],
|
|
});
|
|
});
|
|
|
|
it("performs a search in a text containing an ideographic at the end of a line", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"issue15340.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "検知機構",
|
|
},
|
|
matchesPerPage: [1],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[29]],
|
|
pageMatchesLength: [[4]],
|
|
});
|
|
});
|
|
|
|
it("performs a search in a text containing fullwidth chars", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"issue15690.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "o",
|
|
},
|
|
matchesPerPage: [13],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[0, 10, 13, 30, 39, 41, 55, 60, 66, 84, 102, 117, 134]],
|
|
pageMatchesLength: [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
|
|
});
|
|
});
|
|
|
|
it("performs a search in a text with some Katakana at the end of a line", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"issue15759.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "ソレノイド",
|
|
},
|
|
matchesPerPage: [1],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[6]],
|
|
pageMatchesLength: [[5]],
|
|
});
|
|
});
|
|
|
|
it("performs a search with a single diacritic", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController();
|
|
|
|
await testEmptySearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "\u064E",
|
|
},
|
|
});
|
|
});
|
|
|
|
it("performs a search in a text containing combining diacritics", async function () {
|
|
if (isNodeJS) {
|
|
pending("Linked test-cases are not supported in Node.js.");
|
|
}
|
|
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"issue12909.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "הספר",
|
|
matchDiacritics: true,
|
|
},
|
|
matchesPerPage: [0, 0, 0, 0, 0, 0, 0, 0, 1],
|
|
selectedMatch: {
|
|
pageIndex: 8,
|
|
matchIndex: 0,
|
|
},
|
|
});
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "הספר",
|
|
matchDiacritics: false,
|
|
},
|
|
matchesPerPage: [0, 1, 0, 0, 0, 0, 0, 0, 1],
|
|
selectedMatch: {
|
|
pageIndex: 8,
|
|
matchIndex: 0,
|
|
},
|
|
});
|
|
});
|
|
|
|
it("performs a search in a text with some Hiragana diacritics at the end of a line", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"issue16063.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "行うことができる速結端子",
|
|
},
|
|
matchesPerPage: [1],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[63]],
|
|
pageMatchesLength: [[12]],
|
|
});
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "デュプレックス",
|
|
},
|
|
matchesPerPage: [1],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[205]],
|
|
pageMatchesLength: [[7]],
|
|
});
|
|
});
|
|
|
|
it("performs a search in a text with some UTF-32 chars", async function () {
|
|
if (isNodeJS) {
|
|
pending("Linked test-cases are not supported in Node.js.");
|
|
}
|
|
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"bug1820909.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "31350",
|
|
},
|
|
matchesPerPage: [1, 2],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[41], [131, 1359]],
|
|
pageMatchesLength: [[5], [5, 5]],
|
|
});
|
|
});
|
|
|
|
it("performs a search in a text with some UTF-32 chars followed by a dash at the end of a line", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"bug1820909.1.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "abcde",
|
|
},
|
|
matchesPerPage: [2],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[42, 95]],
|
|
pageMatchesLength: [[5, 5]],
|
|
});
|
|
});
|
|
|
|
it("performs a search in a text with some arabic chars in different unicode ranges but with same normalized form", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"ArabicCIDTrueType.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "\u0629",
|
|
},
|
|
matchesPerPage: [4],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[6, 25, 44, 63]],
|
|
pageMatchesLength: [[1, 1, 1, 1]],
|
|
});
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "\ufe94",
|
|
},
|
|
matchesPerPage: [4],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[6, 25, 44, 63]],
|
|
pageMatchesLength: [[1, 1, 1, 1]],
|
|
});
|
|
});
|
|
|
|
it("performs a search in a text with some f ligatures", async function () {
|
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
|
"copy_paste_ligatures.pdf"
|
|
);
|
|
|
|
await testSearch({
|
|
eventBus,
|
|
pdfFindController,
|
|
state: {
|
|
query: "f",
|
|
},
|
|
matchesPerPage: [9],
|
|
selectedMatch: {
|
|
pageIndex: 0,
|
|
matchIndex: 0,
|
|
},
|
|
pageMatches: [[5, 6, 6, 7, 8, 9, 9, 10, 10]],
|
|
pageMatchesLength: [[1, 1, 1, 1, 1, 1, 1, 1, 1]],
|
|
});
|
|
});
|
|
});
|