Merge pull request #11186 from Snuffleupagus/issue-9655

Improve the heuristics, in `PartialEvaluator._buildSimpleFontToUnicode`, for glyphNames of the Cdd{d}/cdd{d} format (issue 9655)
This commit is contained in:
Tim van der Meij 2019-10-06 19:50:43 +02:00 committed by GitHub
commit cead77ef3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 32 additions and 7 deletions

View File

@ -1977,7 +1977,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
* @returns {ToUnicodeMap}
* @private
*/
_buildSimpleFontToUnicode(properties) {
_buildSimpleFontToUnicode(properties, forceGlyphs = false) {
assert(!properties.composite, 'Must be a simple font.');
let toUnicode = [], charcode, glyphName;
@ -2017,14 +2017,31 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
code = parseInt(glyphName.substring(1), 16);
}
break;
case 'C': // Cddd glyph
case 'c': // cddd glyph
if (glyphName.length >= 3) {
code = +glyphName.substring(1);
case 'C': // Cdd{d} glyph
case 'c': // cdd{d} glyph
if (glyphName.length >= 3 && glyphName.length <= 4) {
const codeStr = glyphName.substring(1);
if (forceGlyphs) {
code = parseInt(codeStr, 16);
break;
}
// Normally the Cdd{d}/cdd{d} glyphName format will contain
// regular, i.e. base 10, charCodes (see issue4550.pdf)...
code = +codeStr;
// ... however some PDF generators violate that assumption by
// containing glyph, i.e. base 16, codes instead.
// In that case we need to re-parse the *entire* encoding to
// prevent broken text-selection (fixes issue9655_reduced.pdf).
if (Number.isNaN(code) &&
Number.isInteger(parseInt(codeStr, 16))) {
return this._buildSimpleFontToUnicode(properties,
/* forceGlyphs */ true);
}
}
break;
default:
// 'uniXXXX'/'uXXXX{XX}' glyphs
default: // 'uniXXXX'/'uXXXX{XX}' glyphs
let unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
if (unicode !== -1) {
code = unicode;

View File

@ -73,6 +73,7 @@
!issue9291.pdf
!issue9418.pdf
!issue9458.pdf
!issue9655_reduced.pdf
!issue9915_reduced.pdf
!issue9940.pdf
!issue10388_reduced.pdf

Binary file not shown.

View File

@ -495,6 +495,13 @@
"rounds": 1,
"type": "text"
},
{ "id": "issue9655-text",
"file": "pdfs/issue9655_reduced.pdf",
"md5": "87259a82cf3cda18e240517ca53c312a",
"rounds": 1,
"link": false,
"type": "text"
},
{ "id": "jai-pdf",
"file": "pdfs/jai.pdf",
"md5": "1f5dd128c3757420a881a155f2f8ace3",