Merge pull request #1599 from yurydelendik/issue-1597

Guessing pdf char size based on the CMap numbers
This commit is contained in:
Brendan Dahl 2012-04-24 13:49:30 -07:00
commit 0cb6d62089
4 changed files with 18 additions and 3 deletions

View File

@ -481,7 +481,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var toUnicode = dict.get('ToUnicode') ||
baseDict.get('ToUnicode');
if (toUnicode)
properties.toUnicode = this.readToUnicode(toUnicode, xref);
properties.toUnicode = this.readToUnicode(toUnicode, xref, properties);
if (properties.composite) {
// CIDSystemInfo helps to match CID to glyphs
@ -537,7 +537,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
properties.hasEncoding = hasEncoding;
},
readToUnicode: function PartialEvaluator_readToUnicode(toUnicode, xref) {
readToUnicode: function PartialEvaluator_readToUnicode(toUnicode, xref,
properties) {
var cmapObj = toUnicode;
var charToUnicode = [];
if (isName(cmapObj)) {
@ -626,6 +627,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
} else if (octet == 0x3E) {
if (token.length) {
// XXX guessing chars size by checking number size in the CMap
if (token.length <= 2 && properties.composite)
properties.wideChars = false;
if (token.length <= 4) {
// parsing hex number
tokens.push(parseInt(token, 16));
@ -843,6 +848,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
length1: length1,
length2: length2,
composite: composite,
wideChars: composite,
fixedPitch: false,
fontMatrix: dict.get('FontMatrix') || IDENTITY_MATRIX,
firstChar: firstChar || 0,

View File

@ -789,6 +789,7 @@ var Font = (function FontClosure() {
this.widths = properties.widths;
this.defaultWidth = properties.defaultWidth;
this.composite = properties.composite;
this.wideChars = properties.wideChars;
this.hasEncoding = properties.hasEncoding;
this.fontMatrix = properties.fontMatrix;
@ -2520,7 +2521,7 @@ var Font = (function FontClosure() {
glyphs = [];
if (this.composite) {
if (this.wideChars) {
// composite fonts have multi-byte strings convert the string from
// single-byte to multi-byte
// XXX assuming CIDFonts are two-byte - later need to extract the

View File

@ -0,0 +1 @@
http://content1d.omroep.nl/227cbd4ae54f95dd466a7a8475fec2ea/4f95b377/nos/docs/230412_brief_koningin.pdf

View File

@ -560,6 +560,13 @@
"link": true,
"type": "eq"
},
{ "id": "issue1597",
"file": "pdfs/issue1597.pdf",
"md5": "a5ebef467fd6e2fc0aeb56c9eb725ae3",
"rounds": 1,
"link": true,
"type": "eq"
},
{ "id": "issue1317",
"file": "pdfs/issue1317.pdf",
"md5": "6fb46275b30c48c8985617d4f86199e3",