diff --git a/src/shared/util.js b/src/shared/util.js index ba94fec8b..1af94158d 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -566,16 +566,20 @@ class AbortException extends BaseException { } } -const NullCharactersRegExp = /\x00/g; +const NullCharactersRegExp = /\x00+/g; +const InvisibleCharactersRegExp = /[\x01-\x1F]/g; /** * @param {string} str */ -function removeNullCharacters(str) { +function removeNullCharacters(str, replaceInvisible = false) { if (typeof str !== "string") { warn("The argument for removeNullCharacters must be a string."); return str; } + if (replaceInvisible) { + str = str.replace(InvisibleCharactersRegExp, " "); + } return str.replace(NullCharactersRegExp, ""); } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 2b4476710..a957af612 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -486,3 +486,4 @@ !pr12828.pdf !secHandler.pdf !rc_annotation.pdf +!issue14267.pdf \ No newline at end of file diff --git a/test/pdfs/issue14267.pdf b/test/pdfs/issue14267.pdf new file mode 100644 index 000000000..38d8d844e Binary files /dev/null and b/test/pdfs/issue14267.pdf differ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 59ff1c9a8..f0cf8e004 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1102,6 +1102,19 @@ describe("api", function () { await loadingTask.destroy(); }); + it("gets outline with non-displayable chars", async function () { + const loadingTask = getDocument(buildGetDocumentParams("issue14267.pdf")); + const pdfDoc = await loadingTask.promise; + const outline = await pdfDoc.getOutline(); + expect(Array.isArray(outline)).toEqual(true); + expect(outline.length).toEqual(1); + + const outlineItem = outline[0]; + expect(outlineItem.title).toEqual("hello\x11world"); + + await loadingTask.destroy(); + }); + it("gets non-existent permissions", async function () { const permissions = await pdfDocument.getPermissions(); expect(permissions).toEqual(null); diff --git a/test/unit/util_spec.js b/test/unit/util_spec.js index eb39f2b94..b86d287c4 100644 --- a/test/unit/util_spec.js +++ b/test/unit/util_spec.js @@ -185,6 +185,18 @@ describe("util", function () { const str = "string\x00With\x00Null\x00Chars"; expect(removeNullCharacters(str)).toEqual("stringWithNullChars"); }); + + it("should modify string with non-displayable characters", function () { + const str = Array.from(Array(32).keys()) + .map(x => String.fromCharCode(x) + "a") + .join(""); + // \x00 is replaced by an empty string. + const expected = + "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a"; + expect(removeNullCharacters(str, /* replaceInvisible */ true)).toEqual( + expected + ); + }); }); describe("ReadableStream", function () { diff --git a/web/base_tree_viewer.js b/web/base_tree_viewer.js index 6e203aea8..7a4f1e030 100644 --- a/web/base_tree_viewer.js +++ b/web/base_tree_viewer.js @@ -59,7 +59,12 @@ class BaseTreeViewer { * @private */ _normalizeTextContent(str) { - return removeNullCharacters(str) || /* en dash = */ "\u2013"; + // Chars in range [0x01-0x1F] will be replaced with a white space + // and 0x00 by "". + return ( + removeNullCharacters(str, /* replaceInvisible */ true) || + /* en dash = */ "\u2013" + ); } /**