Fix search in pdf a containing some UTF-32 characters (bug 1820909)
Some chars were supposed to have a length equals to 1 but UTF-32 chars can be longuer.
This commit is contained in:
parent
a0ef5a4ae1
commit
07b094729e
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -576,3 +576,4 @@
|
|||||||
!bug1770750.pdf
|
!bug1770750.pdf
|
||||||
!issue16063.pdf
|
!issue16063.pdf
|
||||||
!issue16067.pdf
|
!issue16067.pdf
|
||||||
|
!bug1820909.1.pdf
|
||||||
|
BIN
test/pdfs/bug1820909.1.pdf
Executable file
BIN
test/pdfs/bug1820909.1.pdf
Executable file
Binary file not shown.
2
test/pdfs/bug1820909.pdf.link
Normal file
2
test/pdfs/bug1820909.pdf.link
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
https://web.archive.org/web/20221122204959/https://www.unicode.org/charts/PDF/U31350.pdf
|
||||||
|
|
@ -7455,5 +7455,12 @@
|
|||||||
"rounds": 1,
|
"rounds": 1,
|
||||||
"link": true,
|
"link": true,
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "bug1820909",
|
||||||
|
"file": "pdfs/bug1820909.pdf",
|
||||||
|
"md5": "d95a83a868671a03cbf322f16b2e2b9d",
|
||||||
|
"link": true,
|
||||||
|
"type": "other"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -854,4 +854,50 @@ describe("pdf_find_controller", function () {
|
|||||||
pageMatchesLength: [[7]],
|
pageMatchesLength: [[7]],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("performs a search in a text with some UTF-32 chars", async function () {
|
||||||
|
if (isNodeJS) {
|
||||||
|
pending("Linked test-cases are not supported in Node.js.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
||||||
|
"bug1820909.pdf"
|
||||||
|
);
|
||||||
|
|
||||||
|
await testSearch({
|
||||||
|
eventBus,
|
||||||
|
pdfFindController,
|
||||||
|
state: {
|
||||||
|
query: "31350",
|
||||||
|
},
|
||||||
|
matchesPerPage: [1, 2],
|
||||||
|
selectedMatch: {
|
||||||
|
pageIndex: 0,
|
||||||
|
matchIndex: 0,
|
||||||
|
},
|
||||||
|
pageMatches: [[41], [131, 1359]],
|
||||||
|
pageMatchesLength: [[5], [5, 5]],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("performs a search in a text with some UTF-32 chars followed by a dash at the end of a line", async function () {
|
||||||
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
||||||
|
"bug1820909.1.pdf"
|
||||||
|
);
|
||||||
|
|
||||||
|
await testSearch({
|
||||||
|
eventBus,
|
||||||
|
pdfFindController,
|
||||||
|
state: {
|
||||||
|
query: "abcde",
|
||||||
|
},
|
||||||
|
matchesPerPage: [2],
|
||||||
|
selectedMatch: {
|
||||||
|
pageIndex: 0,
|
||||||
|
matchIndex: 0,
|
||||||
|
},
|
||||||
|
pageMatches: [[42, 95]],
|
||||||
|
pageMatchesLength: [[5, 5]],
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
@ -289,21 +289,26 @@ function normalize(text) {
|
|||||||
// "X-\n" is removed because an hyphen at the end of a line
|
// "X-\n" is removed because an hyphen at the end of a line
|
||||||
// with not a space before is likely here to mark a break
|
// with not a space before is likely here to mark a break
|
||||||
// in a word.
|
// in a word.
|
||||||
// The \n isn't in the original text so here y = i, n = 1 and o = 2.
|
// If X is encoded with UTF-32 then it can have a length greater than 1.
|
||||||
positions.push([i - shift + 1, 1 + shift]);
|
// The \n isn't in the original text so here y = i, n = X.len - 2 and
|
||||||
|
// o = X.len - 1.
|
||||||
|
const len = p5.length - 2;
|
||||||
|
positions.push([i - shift + len, 1 + shift]);
|
||||||
shift += 1;
|
shift += 1;
|
||||||
shiftOrigin += 1;
|
shiftOrigin += 1;
|
||||||
eol += 1;
|
eol += 1;
|
||||||
return p5.charAt(0);
|
return p5.slice(0, -2);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p6) {
|
if (p6) {
|
||||||
// An ideographic at the end of a line doesn't imply adding an extra
|
// An ideographic at the end of a line doesn't imply adding an extra
|
||||||
// white space.
|
// white space.
|
||||||
positions.push([i - shift + 1, shift]);
|
// A CJK can be encoded in UTF-32, hence their length isn't always 1.
|
||||||
|
const len = p6.length - 1;
|
||||||
|
positions.push([i - shift + len, shift]);
|
||||||
shiftOrigin += 1;
|
shiftOrigin += 1;
|
||||||
eol += 1;
|
eol += 1;
|
||||||
return p6.charAt(0);
|
return p6.slice(0, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p7) {
|
if (p7) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user