Merge pull request #15701 from Snuffleupagus/move-string-helpers

Move some string helper functions to the worker-thread
This commit is contained in:
Tim van der Meij 2022-11-19 11:20:07 +01:00 committed by GitHub
commit d6908ee145
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 102 additions and 75 deletions

View File

@ -23,26 +23,25 @@ import {
AnnotationType, AnnotationType,
assert, assert,
BASELINE_FACTOR, BASELINE_FACTOR,
escapeString,
FeatureTest, FeatureTest,
getModificationDate, getModificationDate,
IDENTITY_MATRIX, IDENTITY_MATRIX,
isAscii,
LINE_DESCENT_FACTOR, LINE_DESCENT_FACTOR,
LINE_FACTOR, LINE_FACTOR,
OPS, OPS,
RenderingIntentFlag, RenderingIntentFlag,
shadow, shadow,
stringToPDFString, stringToPDFString,
stringToUTF16BEString,
unreachable, unreachable,
Util, Util,
warn, warn,
} from "../shared/util.js"; } from "../shared/util.js";
import { import {
collectActions, collectActions,
escapeString,
getInheritableProperty, getInheritableProperty,
getRotationMatrix, getRotationMatrix,
isAscii,
numberToString, numberToString,
stringToUTF16String, stringToUTF16String,
} from "./core_utils.js"; } from "./core_utils.js";
@ -1879,7 +1878,11 @@ class WidgetAnnotation extends Annotation {
value, value,
}; };
const encoder = val => (isAscii(val) ? val : stringToUTF16BEString(val)); const encoder = val => {
return isAscii(val)
? val
: stringToUTF16String(val, /* bigEndian = */ true);
};
dict.set("V", Array.isArray(value) ? value.map(encoder) : encoder(value)); dict.set("V", Array.isArray(value) ? value.map(encoder) : encoder(value));
const maybeMK = this._getMKDict(rotation); const maybeMK = this._getMKDict(rotation);
@ -3546,14 +3549,19 @@ class FreeTextAnnotation extends MarkupAnnotation {
freetext.set("DA", da); freetext.set("DA", da);
freetext.set( freetext.set(
"Contents", "Contents",
isAscii(value) ? value : stringToUTF16BEString(value) isAscii(value)
? value
: stringToUTF16String(value, /* bigEndian = */ true)
); );
freetext.set("F", 4); freetext.set("F", 4);
freetext.set("Border", [0, 0, 0]); freetext.set("Border", [0, 0, 0]);
freetext.set("Rotate", rotation); freetext.set("Rotate", rotation);
if (user) { if (user) {
freetext.set("T", isAscii(user) ? user : stringToUTF16BEString(user)); freetext.set(
"T",
isAscii(user) ? user : stringToUTF16String(user, /* bigEndian = */ true)
);
} }
if (apRef || ap) { if (apRef || ap) {

View File

@ -313,6 +313,19 @@ function escapePDFName(str) {
return buffer.join(""); return buffer.join("");
} }
// Replace "(", ")", "\n", "\r" and "\" by "\(", "\)", "\\n", "\\r" and "\\"
// in order to write it in a PDF file.
function escapeString(str) {
return str.replace(/([()\\\n\r])/g, match => {
if (match === "\n") {
return "\\n";
} else if (match === "\r") {
return "\\r";
}
return `\\${match}`;
});
}
function _collectJS(entry, xref, list, parents) { function _collectJS(entry, xref, list, parents) {
if (!entry) { if (!entry) {
return; return;
@ -572,6 +585,10 @@ function getNewAnnotationsMap(annotationStorage) {
return newAnnotationsByPage.size > 0 ? newAnnotationsByPage : null; return newAnnotationsByPage.size > 0 ? newAnnotationsByPage : null;
} }
function isAscii(str) {
return /^[\x00-\x7F]*$/.test(str);
}
function stringToUTF16HexString(str) { function stringToUTF16HexString(str) {
const buf = []; const buf = [];
for (let i = 0, ii = str.length; i < ii; i++) { for (let i = 0, ii = str.length; i < ii; i++) {
@ -584,8 +601,11 @@ function stringToUTF16HexString(str) {
return buf.join(""); return buf.join("");
} }
function stringToUTF16String(str) { function stringToUTF16String(str, bigEndian = false) {
const buf = []; const buf = [];
if (bigEndian) {
buf.push("\xFE\xFF");
}
for (let i = 0, ii = str.length; i < ii; i++) { for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.charCodeAt(i); const char = str.charCodeAt(i);
buf.push( buf.push(
@ -614,11 +634,13 @@ export {
DocStats, DocStats,
encodeToXmlString, encodeToXmlString,
escapePDFName, escapePDFName,
escapeString,
getArrayLookupTableFactory, getArrayLookupTableFactory,
getInheritableProperty, getInheritableProperty,
getLookupTableFactory, getLookupTableFactory,
getNewAnnotationsMap, getNewAnnotationsMap,
getRotationMatrix, getRotationMatrix,
isAscii,
isWhiteSpace, isWhiteSpace,
log2, log2,
MissingDataException, MissingDataException,

View File

@ -13,9 +13,14 @@
* limitations under the License. * limitations under the License.
*/ */
import { bytesToString, escapeString, warn } from "../shared/util.js"; import { bytesToString, warn } from "../shared/util.js";
import { Dict, Name, Ref } from "./primitives.js"; import { Dict, Name, Ref } from "./primitives.js";
import { escapePDFName, numberToString, parseXFAPath } from "./core_utils.js"; import {
escapePDFName,
escapeString,
numberToString,
parseXFAPath,
} from "./core_utils.js";
import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js"; import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
import { BaseStream } from "./base_stream.js"; import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./crypto.js"; import { calculateMD5 } from "./crypto.js";

View File

@ -1037,36 +1037,6 @@ function stringToPDFString(str) {
return strBuf.join(""); return strBuf.join("");
} }
function escapeString(str) {
// replace "(", ")", "\n", "\r" and "\"
// by "\(", "\)", "\\n", "\\r" and "\\"
// in order to write it in a PDF file.
return str.replace(/([()\\\n\r])/g, match => {
if (match === "\n") {
return "\\n";
} else if (match === "\r") {
return "\\r";
}
return `\\${match}`;
});
}
function isAscii(str) {
return /^[\x00-\x7F]*$/.test(str);
}
function stringToUTF16BEString(str) {
const buf = ["\xFE\xFF"];
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.charCodeAt(i);
buf.push(
String.fromCharCode((char >> 8) & 0xff),
String.fromCharCode(char & 0xff)
);
}
return buf.join("");
}
function stringToUTF8String(str) { function stringToUTF8String(str) {
return decodeURIComponent(escape(str)); return decodeURIComponent(escape(str));
} }
@ -1167,7 +1137,6 @@ export {
createPromiseCapability, createPromiseCapability,
createValidAbsoluteUrl, createValidAbsoluteUrl,
DocumentActionEventType, DocumentActionEventType,
escapeString,
FeatureTest, FeatureTest,
FONT_IDENTITY_MATRIX, FONT_IDENTITY_MATRIX,
FontType, FontType,
@ -1180,7 +1149,6 @@ export {
InvalidPDFException, InvalidPDFException,
isArrayBuffer, isArrayBuffer,
isArrayEqual, isArrayEqual,
isAscii,
LINE_DESCENT_FACTOR, LINE_DESCENT_FACTOR,
LINE_FACTOR, LINE_FACTOR,
MissingPDFException, MissingPDFException,
@ -1198,7 +1166,6 @@ export {
string32, string32,
stringToBytes, stringToBytes,
stringToPDFString, stringToPDFString,
stringToUTF16BEString,
stringToUTF8String, stringToUTF8String,
TextRenderingMode, TextRenderingMode,
UnexpectedResponseException, UnexpectedResponseException,

View File

@ -17,10 +17,14 @@ import { Dict, Ref } from "../../src/core/primitives.js";
import { import {
encodeToXmlString, encodeToXmlString,
escapePDFName, escapePDFName,
escapeString,
getInheritableProperty, getInheritableProperty,
isAscii,
isWhiteSpace, isWhiteSpace,
log2, log2,
parseXFAPath, parseXFAPath,
stringToUTF16HexString,
stringToUTF16String,
toRomanNumerals, toRomanNumerals,
validateCSSFont, validateCSSFont,
} from "../../src/core/core_utils.js"; } from "../../src/core/core_utils.js";
@ -221,6 +225,14 @@ describe("core_utils", function () {
}); });
}); });
describe("escapeString", function () {
it("should escape (, ), \\n, \\r, and \\", function () {
expect(escapeString("((a\\a))\n(b(b\\b)\rb)")).toEqual(
"\\(\\(a\\\\a\\)\\)\\n\\(b\\(b\\\\b\\)\\rb\\)"
);
});
});
describe("encodeToXmlString", function () { describe("encodeToXmlString", function () {
it("should get a correctly encoded string with some entities", function () { it("should get a correctly encoded string with some entities", function () {
const str = "\"\u0397ell😂' & <W😂rld>"; const str = "\"\u0397ell😂' & <W😂rld>";
@ -333,4 +345,50 @@ describe("core_utils", function () {
expect(cssFontInfo.italicAngle).toEqual("2.718"); expect(cssFontInfo.italicAngle).toEqual("2.718");
}); });
}); });
describe("isAscii", function () {
it("handles ascii/non-ascii strings", function () {
expect(isAscii("hello world")).toEqual(true);
expect(isAscii("こんにちは世界の")).toEqual(false);
expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual(
false
);
});
});
describe("stringToUTF16HexString", function () {
it("should encode a string in UTF16 hexadecimal format", function () {
expect(stringToUTF16HexString("hello world")).toEqual(
"00680065006c006c006f00200077006f0072006c0064"
);
expect(stringToUTF16HexString("こんにちは世界の")).toEqual(
"30533093306b3061306f4e16754c306e"
);
});
});
describe("stringToUTF16String", function () {
it("should encode a string in UTF16", function () {
expect(stringToUTF16String("hello world")).toEqual(
"\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
);
expect(stringToUTF16String("こんにちは世界の")).toEqual(
"\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
);
});
it("should encode a string in UTF16BE with a BOM", function () {
expect(
stringToUTF16String("hello world", /* bigEndian = */ true)
).toEqual("\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d");
expect(
stringToUTF16String("こんにちは世界の", /* bigEndian = */ true)
).toEqual(
"\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
);
});
});
}); });

View File

@ -17,14 +17,11 @@ import {
bytesToString, bytesToString,
createPromiseCapability, createPromiseCapability,
createValidAbsoluteUrl, createValidAbsoluteUrl,
escapeString,
getModificationDate, getModificationDate,
isArrayBuffer, isArrayBuffer,
isAscii,
string32, string32,
stringToBytes, stringToBytes,
stringToPDFString, stringToPDFString,
stringToUTF16BEString,
} from "../../src/shared/util.js"; } from "../../src/shared/util.js";
describe("util", function () { describe("util", function () {
@ -246,40 +243,10 @@ describe("util", function () {
}); });
}); });
describe("escapeString", function () {
it("should escape (, ), \\n, \\r, and \\", function () {
expect(escapeString("((a\\a))\n(b(b\\b)\rb)")).toEqual(
"\\(\\(a\\\\a\\)\\)\\n\\(b\\(b\\\\b\\)\\rb\\)"
);
});
});
describe("getModificationDate", function () { describe("getModificationDate", function () {
it("should get a correctly formatted date", function () { it("should get a correctly formatted date", function () {
const date = new Date(Date.UTC(3141, 5, 9, 2, 6, 53)); const date = new Date(Date.UTC(3141, 5, 9, 2, 6, 53));
expect(getModificationDate(date)).toEqual("31410609020653"); expect(getModificationDate(date)).toEqual("31410609020653");
}); });
}); });
describe("isAscii", function () {
it("handles ascii/non-ascii strings", function () {
expect(isAscii("hello world")).toEqual(true);
expect(isAscii("こんにちは世界の")).toEqual(false);
expect(isAscii("hello world in Japanese is こんにちは世界の")).toEqual(
false
);
});
});
describe("stringToUTF16BEString", function () {
it("should encode a string in UTF16BE with a BOM", function () {
expect(stringToUTF16BEString("hello world")).toEqual(
"\xfe\xff\0h\0e\0l\0l\0o\0 \0w\0o\0r\0l\0d"
);
expect(stringToUTF16BEString("こんにちは世界の")).toEqual(
"\xfe\xff\x30\x53\x30\x93\x30\x6b\x30\x61" +
"\x30\x6f\x4e\x16\x75\x4c\x30\x6e"
);
});
});
}); });