From 809b96b40c6111f0b4776db622514a648543b7ed Mon Sep 17 00:00:00 2001 From: Jani Pehkonen Date: Tue, 21 Jan 2020 20:36:41 +0200 Subject: [PATCH] Hide .notdef glyphs in non-embedded Type1 fonts and don't ignore Widths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #11403 The PDF uses the non-embedded Type1 font Helvetica. Character codes 194 and 160 (`Â` and `NBSP`) are encoded as `.notdef`. We shouldn't show those glyphs because it seems that Acrobat Reader doesn't draw glyphs that are named `.notdef` in fonts like this. In addition to testing `glyphName === ".notdef"`, we must test also `glyphName === ""` because the name `""` is used in `core/encodings.js` for undefined glyphs in encodings like `WinAnsiEncoding`. The solution above hides the `Â` characters but now the replacement character (space) appears to be too wide. I found out that PDF.js ignores font's `Widths` array if the font has no `FontDescriptor` entry. That happens in #11403, so the default widths of Helvetica were used as specified in `core/metrics.js` and `.nodef` got a width of 333. The correct width is 0 as specified by the `Widths` array in the PDF. Thus we must never ignore `Widths`. --- src/core/evaluator.js | 26 ++++++--- src/core/fonts.js | 10 ++++ test/pdfs/.gitignore | 1 + test/pdfs/issue11403_reduced.pdf | 90 ++++++++++++++++++++++++++++++++ test/test_manifest.json | 6 +++ 5 files changed, 125 insertions(+), 8 deletions(-) create mode 100644 test/pdfs/issue11403_reduced.pdf diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 34147e921..984410f08 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2945,6 +2945,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var type = preEvaluatedFont.type; var maxCharIndex = composite ? 0xffff : 0xff; var properties; + const firstChar = dict.get("FirstChar") || 0; + const lastChar = dict.get("LastChar") || maxCharIndex; if (!descriptor) { if (type === "Type3") { @@ -2981,15 +2983,25 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { widths: metrics.widths, defaultWidth: metrics.defaultWidth, flags, - firstChar: 0, - lastChar: maxCharIndex, + firstChar, + lastChar, }; + const widths = dict.get("Widths"); return this.extractDataStructures(dict, dict, properties).then( properties => { - properties.widths = this.buildCharCodeToWidth( - metrics.widths, - properties - ); + if (widths) { + const glyphWidths = []; + let j = firstChar; + for (let i = 0, ii = widths.length; i < ii; i++) { + glyphWidths[j++] = this.xref.fetchIfRef(widths[i]); + } + properties.widths = glyphWidths; + } else { + properties.widths = this.buildCharCodeToWidth( + metrics.widths, + properties + ); + } return new Font(baseFontName, null, properties); } ); @@ -3001,8 +3013,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { // to ignore this rule when a variant of a standard font is used. // TODO Fill the width array depending on which of the base font this is // a variant. - var firstChar = dict.get("FirstChar") || 0; - var lastChar = dict.get("LastChar") || maxCharIndex; var fontName = descriptor.get("FontName"); var baseFont = dict.get("BaseFont"); diff --git a/src/core/fonts.js b/src/core/fonts.js index 55fb0daed..1071eac16 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -3392,6 +3392,16 @@ var Font = (function FontClosure() { // back to the char code. fontCharCode = this.toFontChar[charcode] || charcode; if (this.missingFile) { + const glyphName = + this.differences[charcode] || this.defaultEncoding[charcode]; + if ( + (glyphName === ".notdef" || glyphName === "") && + this.type === "Type1" + ) { + // .notdef glyphs should be invisible in non-embedded Type1 fonts, so + // replace them with spaces. + fontCharCode = 0x20; + } fontCharCode = mapSpecialUnicodeValues(fontCharCode); } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 3d4367168..787687ce5 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -167,6 +167,7 @@ !issue3405r.pdf !issue7339_reduced.pdf !issue3438.pdf +!issue11403_reduced.pdf !issue2074.pdf !scan-bad.pdf !bug847420.pdf diff --git a/test/pdfs/issue11403_reduced.pdf b/test/pdfs/issue11403_reduced.pdf new file mode 100644 index 000000000..57840d637 --- /dev/null +++ b/test/pdfs/issue11403_reduced.pdf @@ -0,0 +1,90 @@ +%PDF-1.5 +%âãÏÓ +1 0 obj +<< +/Type /Catalog +/Pages 2 0 R +>> +endobj + +2 0 obj +<< +/Type /Pages +/Count 1 +/Kids [3 0 R] +>> +endobj + +3 0 obj +<< +/Type /Page +/Parent 2 0 R +/Contents 6 0 R +/MediaBox [0 0 200 50] +/Resources 4 0 R +>> +endobj + +4 0 obj +<< + /Font << /F2 5 0 R >> +>> +endobj + +5 0 obj +<< + /Type /Font + /Subtype /Type1 + /BaseFont /Helvetica + /Encoding << + /Type /Encoding + /Differences [32 /space 35 /numbersign 37 /percent 40 /parenleft /parenright + 44 /comma /hyphen /period /slash /zero /one /two /three /four /five /six + /seven /eight /nine /colon /semicolon /less 65 /A /B /C /D /E /F /G /H /I + /J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y 91 /bracketleft 93 /bracketright + 97 /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p 114 /r /s /t /u /v /w /x + /y /z 128 /.notdef 147 /.notdef 160 /.notdef 194 /.notdef 226 /.notdef] + >> + /FirstChar 32 + /LastChar 226 + /Widths [278 0 0 556 0 889 0 0 333 333 0 0 278 333 278 278 556 556 556 556 556 + 556 556 556 556 556 278 278 584 0 0 0 0 667 667 722 722 667 611 778 722 278 + 500 667 556 833 722 778 667 778 722 667 611 722 667 944 667 667 0 278 0 278 + 0 0 0 556 556 500 556 556 278 556 556 222 222 500 222 833 556 556 556 0 333 + 500 278 556 500 722 500 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0] +>> +endobj + +6 0 obj +<< /Length 71 >> +stream +BT + 14 17 Td + /F2 18 Tf + (\302\240 2. Eat a healthy diet.) Tj +ET +endstream +endobj + +xref +0 7 +0000000000 65535 f +0000000017 00000 n +0000000074 00000 n +0000000140 00000 n +0000000254 00000 n +0000000306 00000 n +0000001548 00000 n + +trailer +<< +/Size 7 +/Root 1 0 R +/ID [<7b642074abed518fedb35b69fbbf85c0> <7b642074abed518fedb35b69fbbf85c0>] +>> +startxref +1677 +%%EOF diff --git a/test/test_manifest.json b/test/test_manifest.json index 1bd45a6d1..3818014ad 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1419,6 +1419,12 @@ "lastPage": 1, "type": "eq" }, + { "id": "issue11403", + "file": "pdfs/issue11403_reduced.pdf", + "md5": "08287b64f442cb7c329b97c4774aa1cd", + "rounds": 1, + "type": "eq" + }, { "id": "issue11139", "file": "pdfs/issue11139.pdf", "md5": "006dd4f4bb1878bc14a12072d81a4524",