Hide .notdef glyphs in non-embedded Type1 fonts and don't ignore Widths

Fixes #11403
The PDF uses the non-embedded Type1 font Helvetica. Character codes 194 and 160 (`Â` and `NBSP`) are encoded as `.notdef`. We shouldn't show those glyphs because it seems that Acrobat Reader doesn't draw glyphs that are named `.notdef` in fonts like this.

In addition to testing `glyphName === ".notdef"`, we must test also `glyphName === ""` because the name `""` is used in `core/encodings.js` for undefined glyphs in encodings like `WinAnsiEncoding`.

The solution above hides the `Â` characters but now the replacement character (space) appears to be too wide. I found out that PDF.js ignores font's `Widths` array if the font has no `FontDescriptor` entry. That happens in #11403, so the default widths of Helvetica were used as specified in `core/metrics.js` and `.nodef` got a width of 333. The correct width is 0 as specified by the `Widths` array in the PDF. Thus we must never ignore `Widths`.
This commit is contained in:
Jani Pehkonen 2020-01-21 20:36:41 +02:00
parent 40f531ee87
commit 809b96b40c
5 changed files with 125 additions and 8 deletions

View File

@ -2945,6 +2945,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var type = preEvaluatedFont.type;
var maxCharIndex = composite ? 0xffff : 0xff;
var properties;
const firstChar = dict.get("FirstChar") || 0;
const lastChar = dict.get("LastChar") || maxCharIndex;
if (!descriptor) {
if (type === "Type3") {
@ -2981,15 +2983,25 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
widths: metrics.widths,
defaultWidth: metrics.defaultWidth,
flags,
firstChar: 0,
lastChar: maxCharIndex,
firstChar,
lastChar,
};
const widths = dict.get("Widths");
return this.extractDataStructures(dict, dict, properties).then(
properties => {
properties.widths = this.buildCharCodeToWidth(
metrics.widths,
properties
);
if (widths) {
const glyphWidths = [];
let j = firstChar;
for (let i = 0, ii = widths.length; i < ii; i++) {
glyphWidths[j++] = this.xref.fetchIfRef(widths[i]);
}
properties.widths = glyphWidths;
} else {
properties.widths = this.buildCharCodeToWidth(
metrics.widths,
properties
);
}
return new Font(baseFontName, null, properties);
}
);
@ -3001,8 +3013,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
// to ignore this rule when a variant of a standard font is used.
// TODO Fill the width array depending on which of the base font this is
// a variant.
var firstChar = dict.get("FirstChar") || 0;
var lastChar = dict.get("LastChar") || maxCharIndex;
var fontName = descriptor.get("FontName");
var baseFont = dict.get("BaseFont");

View File

@ -3392,6 +3392,16 @@ var Font = (function FontClosure() {
// back to the char code.
fontCharCode = this.toFontChar[charcode] || charcode;
if (this.missingFile) {
const glyphName =
this.differences[charcode] || this.defaultEncoding[charcode];
if (
(glyphName === ".notdef" || glyphName === "") &&
this.type === "Type1"
) {
// .notdef glyphs should be invisible in non-embedded Type1 fonts, so
// replace them with spaces.
fontCharCode = 0x20;
}
fontCharCode = mapSpecialUnicodeValues(fontCharCode);
}

View File

@ -167,6 +167,7 @@
!issue3405r.pdf
!issue7339_reduced.pdf
!issue3438.pdf
!issue11403_reduced.pdf
!issue2074.pdf
!scan-bad.pdf
!bug847420.pdf

View File

@ -0,0 +1,90 @@
%PDF-1.5
%âãÏÓ
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Count 1
/Kids [3 0 R]
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/Contents 6 0 R
/MediaBox [0 0 200 50]
/Resources 4 0 R
>>
endobj
4 0 obj
<<
/Font << /F2 5 0 R >>
>>
endobj
5 0 obj
<<
/Type /Font
/Subtype /Type1
/BaseFont /Helvetica
/Encoding <<
/Type /Encoding
/Differences [32 /space 35 /numbersign 37 /percent 40 /parenleft /parenright
44 /comma /hyphen /period /slash /zero /one /two /three /four /five /six
/seven /eight /nine /colon /semicolon /less 65 /A /B /C /D /E /F /G /H /I
/J /K /L /M /N /O /P /Q /R /S /T /U /V /W /X /Y 91 /bracketleft 93 /bracketright
97 /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o /p 114 /r /s /t /u /v /w /x
/y /z 128 /.notdef 147 /.notdef 160 /.notdef 194 /.notdef 226 /.notdef]
>>
/FirstChar 32
/LastChar 226
/Widths [278 0 0 556 0 889 0 0 333 333 0 0 278 333 278 278 556 556 556 556 556
556 556 556 556 556 278 278 584 0 0 0 0 667 667 722 722 667 611 778 722 278
500 667 556 833 722 778 667 778 722 667 611 722 667 944 667 667 0 278 0 278
0 0 0 556 556 500 556 556 278 556 556 222 222 500 222 833 556 556 556 0 333
500 278 556 500 722 500 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0]
>>
endobj
6 0 obj
<< /Length 71 >>
stream
BT
14 17 Td
/F2 18 Tf
(\302\240 2. Eat a healthy diet.) Tj
ET
endstream
endobj
xref
0 7
0000000000 65535 f
0000000017 00000 n
0000000074 00000 n
0000000140 00000 n
0000000254 00000 n
0000000306 00000 n
0000001548 00000 n
trailer
<<
/Size 7
/Root 1 0 R
/ID [<7b642074abed518fedb35b69fbbf85c0> <7b642074abed518fedb35b69fbbf85c0>]
>>
startxref
1677
%%EOF

View File

@ -1419,6 +1419,12 @@
"lastPage": 1,
"type": "eq"
},
{ "id": "issue11403",
"file": "pdfs/issue11403_reduced.pdf",
"md5": "08287b64f442cb7c329b97c4774aa1cd",
"rounds": 1,
"type": "eq"
},
{ "id": "issue11139",
"file": "pdfs/issue11139.pdf",
"md5": "006dd4f4bb1878bc14a12072d81a4524",