Merge pull request #11186 from Snuffleupagus/issue-9655

Improve the heuristics, in `PartialEvaluator._buildSimpleFontToUnicode`, for glyphNames of the Cdd{d}/cdd{d} format (issue 9655)
This commit is contained in:
Tim van der Meij 2019-10-06 19:50:43 +02:00 committed by GitHub
commit cead77ef3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 32 additions and 7 deletions

View File

@ -1977,7 +1977,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
* @returns {ToUnicodeMap} * @returns {ToUnicodeMap}
* @private * @private
*/ */
_buildSimpleFontToUnicode(properties) { _buildSimpleFontToUnicode(properties, forceGlyphs = false) {
assert(!properties.composite, 'Must be a simple font.'); assert(!properties.composite, 'Must be a simple font.');
let toUnicode = [], charcode, glyphName; let toUnicode = [], charcode, glyphName;
@ -2017,14 +2017,31 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
code = parseInt(glyphName.substring(1), 16); code = parseInt(glyphName.substring(1), 16);
} }
break; break;
case 'C': // Cddd glyph case 'C': // Cdd{d} glyph
case 'c': // cddd glyph case 'c': // cdd{d} glyph
if (glyphName.length >= 3) { if (glyphName.length >= 3 && glyphName.length <= 4) {
code = +glyphName.substring(1); const codeStr = glyphName.substring(1);
if (forceGlyphs) {
code = parseInt(codeStr, 16);
break;
}
// Normally the Cdd{d}/cdd{d} glyphName format will contain
// regular, i.e. base 10, charCodes (see issue4550.pdf)...
code = +codeStr;
// ... however some PDF generators violate that assumption by
// containing glyph, i.e. base 16, codes instead.
// In that case we need to re-parse the *entire* encoding to
// prevent broken text-selection (fixes issue9655_reduced.pdf).
if (Number.isNaN(code) &&
Number.isInteger(parseInt(codeStr, 16))) {
return this._buildSimpleFontToUnicode(properties,
/* forceGlyphs */ true);
}
} }
break; break;
default: default: // 'uniXXXX'/'uXXXX{XX}' glyphs
// 'uniXXXX'/'uXXXX{XX}' glyphs
let unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); let unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
if (unicode !== -1) { if (unicode !== -1) {
code = unicode; code = unicode;

View File

@ -73,6 +73,7 @@
!issue9291.pdf !issue9291.pdf
!issue9418.pdf !issue9418.pdf
!issue9458.pdf !issue9458.pdf
!issue9655_reduced.pdf
!issue9915_reduced.pdf !issue9915_reduced.pdf
!issue9940.pdf !issue9940.pdf
!issue10388_reduced.pdf !issue10388_reduced.pdf

Binary file not shown.

View File

@ -495,6 +495,13 @@
"rounds": 1, "rounds": 1,
"type": "text" "type": "text"
}, },
{ "id": "issue9655-text",
"file": "pdfs/issue9655_reduced.pdf",
"md5": "87259a82cf3cda18e240517ca53c312a",
"rounds": 1,
"link": false,
"type": "text"
},
{ "id": "jai-pdf", { "id": "jai-pdf",
"file": "pdfs/jai.pdf", "file": "pdfs/jai.pdf",
"md5": "1f5dd128c3757420a881a155f2f8ace3", "md5": "1f5dd128c3757420a881a155f2f8ace3",