Tweak the Bidi-detection heuristics for very short RTL strings (issue 11656)
Very short strings can narrowly miss the existing Bidi-detection threshold, leading to incorrect text-selection and copying behaviour. In my testing, neither Adobe Reader or PDFium seem to handle copying "correctly" for this document. Hence it's not entirely clear to me that we actually want to fix this, since tweaking these heuristics can *obviously* cause regressions elsewhere (and our test coverage for RTL-text isn't exactly great).
This commit is contained in:
parent
6a15973a1b
commit
5f77d3719b
@ -158,7 +158,8 @@ function bidi(str, startLevel = -1, vertical = false) {
|
||||
|
||||
// Detect the bidi method
|
||||
// - If there are no rtl characters then no bidi needed
|
||||
// - If less than 30% chars are rtl then string is primarily ltr
|
||||
// - If less than 30% chars are rtl then string is primarily ltr,
|
||||
// unless the string is very short.
|
||||
// - If more than 30% chars are rtl then string is primarily rtl
|
||||
if (numBidi === 0) {
|
||||
isLTR = true;
|
||||
@ -166,7 +167,7 @@ function bidi(str, startLevel = -1, vertical = false) {
|
||||
}
|
||||
|
||||
if (startLevel === -1) {
|
||||
if (numBidi / strLength < 0.3) {
|
||||
if (numBidi / strLength < 0.3 && strLength > 4) {
|
||||
isLTR = true;
|
||||
startLevel = 0;
|
||||
} else {
|
||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -448,6 +448,7 @@
|
||||
!annotation-square-circle-without-appearance.pdf
|
||||
!annotation-stamp.pdf
|
||||
!issue14048.pdf
|
||||
!issue11656.pdf
|
||||
!annotation-fileattachment.pdf
|
||||
!annotation-text-widget.pdf
|
||||
!annotation-choice-widget.pdf
|
||||
|
BIN
test/pdfs/issue11656.pdf
Normal file
BIN
test/pdfs/issue11656.pdf
Normal file
Binary file not shown.
@ -5080,6 +5080,12 @@
|
||||
"lastPage": 1,
|
||||
"type": "eq"
|
||||
},
|
||||
{ "id": "issue11656",
|
||||
"file": "pdfs/issue11656.pdf",
|
||||
"md5": "82d5d4f5978a4974707deb1ea98e62f2",
|
||||
"rounds": 1,
|
||||
"type": "text"
|
||||
},
|
||||
{ "id": "vertical",
|
||||
"file": "pdfs/vertical.pdf",
|
||||
"md5": "8a74d33504701edcefeef2afd022765e",
|
||||
|
@ -16,6 +16,28 @@
|
||||
import { bidi } from "../../src/core/bidi.js";
|
||||
|
||||
describe("bidi", function () {
|
||||
it(
|
||||
"should mark text as LTR if there's only LTR-characters, " +
|
||||
"when the string is very short",
|
||||
function () {
|
||||
const str = "foo";
|
||||
const bidiText = bidi(str, -1, false);
|
||||
|
||||
expect(bidiText.str).toEqual("foo");
|
||||
expect(bidiText.dir).toEqual("ltr");
|
||||
}
|
||||
);
|
||||
|
||||
it("should mark text as LTR if there's only LTR-characters", function () {
|
||||
const str = "Lorem ipsum dolor sit amet, consectetur adipisicing elit.";
|
||||
const bidiText = bidi(str, -1, false);
|
||||
|
||||
expect(bidiText.str).toEqual(
|
||||
"Lorem ipsum dolor sit amet, consectetur adipisicing elit."
|
||||
);
|
||||
expect(bidiText.dir).toEqual("ltr");
|
||||
});
|
||||
|
||||
it("should mark text as RTL if more than 30% of text is RTL", function () {
|
||||
// 33% of test text are RTL characters
|
||||
const test = "\u0645\u0635\u0631 Egypt";
|
||||
@ -34,4 +56,16 @@ describe("bidi", function () {
|
||||
expect(bidiText.str).toEqual(result);
|
||||
expect(bidiText.dir).toEqual("ltr");
|
||||
});
|
||||
|
||||
it(
|
||||
"should mark text as RTL if less than 30% of text is RTL, " +
|
||||
"when the string is very short (issue 11656)",
|
||||
function () {
|
||||
const str = "()\u05d1("; // 25% of the string is RTL characters.
|
||||
const bidiText = bidi(str, -1, false);
|
||||
|
||||
expect(bidiText.str).toEqual("(\u05d1)(");
|
||||
expect(bidiText.dir).toEqual("rtl");
|
||||
}
|
||||
);
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user