Fix the charCodeOf method in IdentityToUnicodeMap in order to prevent text selection from breaking

After PR 6590, `font.spaceWidth` is now called in more cases than before (in `PartialEvaluator_getTextContent`), which exposed an underlying issue with `IdentityToUnicodeMap_charCodeOf` throwing an error.
This breaks text-selection in some PDF files found in the wild, hence this patch replaces the `error` with an actual function instead (modelled after `IdentityCMap_charCodeOf`).
This commit is contained in:
Jonas Jenwald 2015-12-05 12:22:09 +01:00
parent e2aca385c6
commit 4810b7b8fc
4 changed files with 79 additions and 3 deletions

View File

@ -15,7 +15,7 @@
/* globals FONT_IDENTITY_MATRIX, FontType, warn, GlyphsUnicode, error, string32,
readUint32, Stream, FontRendererFactory, shadow, stringToBytes,
bytesToString, info, assert, IdentityCMap, Name, CMapFactory, PDFJS,
isNum, Lexer, isArray, ISOAdobeCharset, ExpertCharset,
isNum, Lexer, isArray, ISOAdobeCharset, ExpertCharset, isInt,
ExpertSubsetCharset, Util, DingbatsGlyphsUnicode */
'use strict';
@ -2256,7 +2256,7 @@ var IdentityToUnicodeMap = (function IdentityToUnicodeMapClosure() {
},
charCodeOf: function (v) {
error('should not call .charCodeOf');
return (isInt(v) && v >= this.firstChar && v <= this.lastChar) ? v : -1;
}
};
@ -4700,7 +4700,7 @@ var Font = (function FontClosure() {
}
}
// ... via toUnicode map
if (!charcode && 'toUnicode' in this) {
if (!charcode && this.toUnicode) {
charcode = this.toUnicode.charCodeOf(glyphUnicode);
}
// setting it to unicode if negative or undefined

View File

@ -179,6 +179,7 @@
!issue5549.pdf
!issue5475.pdf
!annotation-border-styles.pdf
!IdentityToUnicodeMap_charCodeOf.pdf
!issue5481.pdf
!issue5567.pdf
!issue5701.pdf

View File

@ -0,0 +1,68 @@
%PDF-1.7
%âãÏÓ
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
2 0 obj
<<
/Kids [3 0 R]
/Count 1
/Type /Pages
>>
endobj
3 0 obj
<<
/Parent 2 0 R
/MediaBox [0 0 200 50]
/Resources
<<
/Font
<<
/F1 4 0 R
>>
>>
/Contents 5 0 R
/Type /Page
>>
endobj
4 0 obj
<<
/BaseFont /Times-Roman
/Subtype /Type1
/ToUnicode /Identity-H
/Encoding /WinAnsiEncoding
/Type /Font
>>
endobj
5 0 obj
<<
/Length 37
>>
stream
BT
10 20 TD
/F1 20 Tf
(ABCdef) Tj
ET
endstream
endobj xref
0 6
0000000000 65535 f
0000000015 00000 n
0000000066 00000 n
0000000125 00000 n
0000000254 00000 n
0000000378 00000 n
trailer
<<
/Root 1 0 R
/Size 6
>>
startxref
467
%%EOF

View File

@ -1453,6 +1453,13 @@
"lastPage": 1,
"type": "eq"
},
{ "id": "IdentityToUnicodeMap_charCodeOf",
"file": "pdfs/IdentityToUnicodeMap_charCodeOf.pdf",
"md5": "da030686418c5e37d889127a05dafb83",
"rounds": 1,
"link": false,
"type": "text"
},
{ "id": "bug894572",
"file": "pdfs/bug894572.pdf",
"md5": "e54a6b0451939f685ed37e3d46e16158",