Remove non-displayable chars from outline title (#14267)

- it aims to fix #14267;
 - there is nothing about chars in range [0-1F] in the specs but acrobat doesn't display them in any way.
This commit is contained in:
Calixte Denizet 2021-11-12 19:41:32 +01:00
parent 7d6d3fc124
commit 7041c62ccf
6 changed files with 38 additions and 3 deletions

View File

@ -566,16 +566,20 @@ class AbortException extends BaseException {
}
}
const NullCharactersRegExp = /\x00/g;
const NullCharactersRegExp = /\x00+/g;
const InvisibleCharactersRegExp = /[\x01-\x1F]/g;
/**
* @param {string} str
*/
function removeNullCharacters(str) {
function removeNullCharacters(str, replaceInvisible = false) {
if (typeof str !== "string") {
warn("The argument for removeNullCharacters must be a string.");
return str;
}
if (replaceInvisible) {
str = str.replace(InvisibleCharactersRegExp, " ");
}
return str.replace(NullCharactersRegExp, "");
}

View File

@ -486,3 +486,4 @@
!pr12828.pdf
!secHandler.pdf
!rc_annotation.pdf
!issue14267.pdf

BIN
test/pdfs/issue14267.pdf Normal file

Binary file not shown.

View File

@ -1102,6 +1102,19 @@ describe("api", function () {
await loadingTask.destroy();
});
it("gets outline with non-displayable chars", async function () {
const loadingTask = getDocument(buildGetDocumentParams("issue14267.pdf"));
const pdfDoc = await loadingTask.promise;
const outline = await pdfDoc.getOutline();
expect(Array.isArray(outline)).toEqual(true);
expect(outline.length).toEqual(1);
const outlineItem = outline[0];
expect(outlineItem.title).toEqual("hello\x11world");
await loadingTask.destroy();
});
it("gets non-existent permissions", async function () {
const permissions = await pdfDocument.getPermissions();
expect(permissions).toEqual(null);

View File

@ -185,6 +185,18 @@ describe("util", function () {
const str = "string\x00With\x00Null\x00Chars";
expect(removeNullCharacters(str)).toEqual("stringWithNullChars");
});
it("should modify string with non-displayable characters", function () {
const str = Array.from(Array(32).keys())
.map(x => String.fromCharCode(x) + "a")
.join("");
// \x00 is replaced by an empty string.
const expected =
"a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a";
expect(removeNullCharacters(str, /* replaceInvisible */ true)).toEqual(
expected
);
});
});
describe("ReadableStream", function () {

View File

@ -59,7 +59,12 @@ class BaseTreeViewer {
* @private
*/
_normalizeTextContent(str) {
return removeNullCharacters(str) || /* en dash = */ "\u2013";
// Chars in range [0x01-0x1F] will be replaced with a white space
// and 0x00 by "".
return (
removeNullCharacters(str, /* replaceInvisible */ true) ||
/* en dash = */ "\u2013"
);
}
/**