Merge pull request #14213 from Snuffleupagus/issue-11656

Tweak the Bidi-detection heuristics for very short RTL strings (issue 11656)
This commit is contained in:
Jonas Jenwald 2021-11-03 22:09:14 +01:00 committed by GitHub
commit e1a35e7bb6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 44 additions and 2 deletions

View File

@ -158,7 +158,8 @@ function bidi(str, startLevel = -1, vertical = false) {
// Detect the bidi method // Detect the bidi method
// - If there are no rtl characters then no bidi needed // - If there are no rtl characters then no bidi needed
// - If less than 30% chars are rtl then string is primarily ltr // - If less than 30% chars are rtl then string is primarily ltr,
// unless the string is very short.
// - If more than 30% chars are rtl then string is primarily rtl // - If more than 30% chars are rtl then string is primarily rtl
if (numBidi === 0) { if (numBidi === 0) {
isLTR = true; isLTR = true;
@ -166,7 +167,7 @@ function bidi(str, startLevel = -1, vertical = false) {
} }
if (startLevel === -1) { if (startLevel === -1) {
if (numBidi / strLength < 0.3) { if (numBidi / strLength < 0.3 && strLength > 4) {
isLTR = true; isLTR = true;
startLevel = 0; startLevel = 0;
} else { } else {

View File

@ -448,6 +448,7 @@
!annotation-square-circle-without-appearance.pdf !annotation-square-circle-without-appearance.pdf
!annotation-stamp.pdf !annotation-stamp.pdf
!issue14048.pdf !issue14048.pdf
!issue11656.pdf
!annotation-fileattachment.pdf !annotation-fileattachment.pdf
!annotation-text-widget.pdf !annotation-text-widget.pdf
!annotation-choice-widget.pdf !annotation-choice-widget.pdf

BIN
test/pdfs/issue11656.pdf Normal file

Binary file not shown.

View File

@ -5080,6 +5080,12 @@
"lastPage": 1, "lastPage": 1,
"type": "eq" "type": "eq"
}, },
{ "id": "issue11656",
"file": "pdfs/issue11656.pdf",
"md5": "82d5d4f5978a4974707deb1ea98e62f2",
"rounds": 1,
"type": "text"
},
{ "id": "vertical", { "id": "vertical",
"file": "pdfs/vertical.pdf", "file": "pdfs/vertical.pdf",
"md5": "8a74d33504701edcefeef2afd022765e", "md5": "8a74d33504701edcefeef2afd022765e",

View File

@ -16,6 +16,28 @@
import { bidi } from "../../src/core/bidi.js"; import { bidi } from "../../src/core/bidi.js";
describe("bidi", function () { describe("bidi", function () {
it(
"should mark text as LTR if there's only LTR-characters, " +
"when the string is very short",
function () {
const str = "foo";
const bidiText = bidi(str, -1, false);
expect(bidiText.str).toEqual("foo");
expect(bidiText.dir).toEqual("ltr");
}
);
it("should mark text as LTR if there's only LTR-characters", function () {
const str = "Lorem ipsum dolor sit amet, consectetur adipisicing elit.";
const bidiText = bidi(str, -1, false);
expect(bidiText.str).toEqual(
"Lorem ipsum dolor sit amet, consectetur adipisicing elit."
);
expect(bidiText.dir).toEqual("ltr");
});
it("should mark text as RTL if more than 30% of text is RTL", function () { it("should mark text as RTL if more than 30% of text is RTL", function () {
// 33% of test text are RTL characters // 33% of test text are RTL characters
const test = "\u0645\u0635\u0631 Egypt"; const test = "\u0645\u0635\u0631 Egypt";
@ -34,4 +56,16 @@ describe("bidi", function () {
expect(bidiText.str).toEqual(result); expect(bidiText.str).toEqual(result);
expect(bidiText.dir).toEqual("ltr"); expect(bidiText.dir).toEqual("ltr");
}); });
it(
"should mark text as RTL if less than 30% of text is RTL, " +
"when the string is very short (issue 11656)",
function () {
const str = "()\u05d1("; // 25% of the string is RTL characters.
const bidiText = bidi(str, -1, false);
expect(bidiText.str).toEqual("(\u05d1)(");
expect(bidiText.dir).toEqual("rtl");
}
);
}); });