Remove non-displayable chars from outline title (#14267)

- it aims to fix #14267;
 - there is nothing about chars in range [0-1F] in the specs but acrobat doesn't display them in any way.
This commit is contained in:
Calixte Denizet 2021-11-12 19:41:32 +01:00
parent 7d6d3fc124
commit 7041c62ccf
6 changed files with 38 additions and 3 deletions

View File

@ -566,16 +566,20 @@ class AbortException extends BaseException {
} }
} }
const NullCharactersRegExp = /\x00/g; const NullCharactersRegExp = /\x00+/g;
const InvisibleCharactersRegExp = /[\x01-\x1F]/g;
/** /**
* @param {string} str * @param {string} str
*/ */
function removeNullCharacters(str) { function removeNullCharacters(str, replaceInvisible = false) {
if (typeof str !== "string") { if (typeof str !== "string") {
warn("The argument for removeNullCharacters must be a string."); warn("The argument for removeNullCharacters must be a string.");
return str; return str;
} }
if (replaceInvisible) {
str = str.replace(InvisibleCharactersRegExp, " ");
}
return str.replace(NullCharactersRegExp, ""); return str.replace(NullCharactersRegExp, "");
} }

View File

@ -486,3 +486,4 @@
!pr12828.pdf !pr12828.pdf
!secHandler.pdf !secHandler.pdf
!rc_annotation.pdf !rc_annotation.pdf
!issue14267.pdf

BIN
test/pdfs/issue14267.pdf Normal file

Binary file not shown.

View File

@ -1102,6 +1102,19 @@ describe("api", function () {
await loadingTask.destroy(); await loadingTask.destroy();
}); });
it("gets outline with non-displayable chars", async function () {
const loadingTask = getDocument(buildGetDocumentParams("issue14267.pdf"));
const pdfDoc = await loadingTask.promise;
const outline = await pdfDoc.getOutline();
expect(Array.isArray(outline)).toEqual(true);
expect(outline.length).toEqual(1);
const outlineItem = outline[0];
expect(outlineItem.title).toEqual("hello\x11world");
await loadingTask.destroy();
});
it("gets non-existent permissions", async function () { it("gets non-existent permissions", async function () {
const permissions = await pdfDocument.getPermissions(); const permissions = await pdfDocument.getPermissions();
expect(permissions).toEqual(null); expect(permissions).toEqual(null);

View File

@ -185,6 +185,18 @@ describe("util", function () {
const str = "string\x00With\x00Null\x00Chars"; const str = "string\x00With\x00Null\x00Chars";
expect(removeNullCharacters(str)).toEqual("stringWithNullChars"); expect(removeNullCharacters(str)).toEqual("stringWithNullChars");
}); });
it("should modify string with non-displayable characters", function () {
const str = Array.from(Array(32).keys())
.map(x => String.fromCharCode(x) + "a")
.join("");
// \x00 is replaced by an empty string.
const expected =
"a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a";
expect(removeNullCharacters(str, /* replaceInvisible */ true)).toEqual(
expected
);
});
}); });
describe("ReadableStream", function () { describe("ReadableStream", function () {

View File

@ -59,7 +59,12 @@ class BaseTreeViewer {
* @private * @private
*/ */
_normalizeTextContent(str) { _normalizeTextContent(str) {
return removeNullCharacters(str) || /* en dash = */ "\u2013"; // Chars in range [0x01-0x1F] will be replaced with a white space
// and 0x00 by "".
return (
removeNullCharacters(str, /* replaceInvisible */ true) ||
/* en dash = */ "\u2013"
);
} }
/** /**