diff --git a/src/core/evaluator.js b/src/core/evaluator.js index d080ac6c4..aa15c5fe2 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2197,6 +2197,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } var length1 = fontFile.dict.get('Length1'); var length2 = fontFile.dict.get('Length2'); + var length3 = fontFile.dict.get('Length3'); } } @@ -2207,6 +2208,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { file: fontFile, length1: length1, length2: length2, + length3: length3, loadedName: baseDict.loadedName, composite: composite, wideChars: composite, diff --git a/src/core/fonts.js b/src/core/fonts.js index cff289d7a..4fb026966 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -54,6 +54,7 @@ var shadow = sharedUtil.shadow; var stringToBytes = sharedUtil.stringToBytes; var string32 = sharedUtil.string32; var warn = sharedUtil.warn; +var MissingDataException = sharedUtil.MissingDataException; var Stream = coreStream.Stream; var Lexer = coreParser.Lexer; var getGlyphsUnicode = coreGlyphList.getGlyphsUnicode; @@ -3637,231 +3638,352 @@ var CFFStandardStrings = [ ]; // Type1Font is also a CIDFontType0. -var Type1Font = function Type1Font(name, file, properties) { - // Some bad generators embed pfb file as is, we have to strip 6-byte headers. - // Also, length1 and length2 might be off by 6 bytes as well. - // http://www.math.ubc.ca/~cass/piscript/type1.pdf - var PFB_HEADER_SIZE = 6; - var headerBlockLength = properties.length1; - var eexecBlockLength = properties.length2; - var pfbHeader = file.peekBytes(PFB_HEADER_SIZE); - var pfbHeaderPresent = pfbHeader[0] === 0x80 && pfbHeader[1] === 0x01; - if (pfbHeaderPresent) { - file.skip(PFB_HEADER_SIZE); - headerBlockLength = (pfbHeader[5] << 24) | (pfbHeader[4] << 16) | - (pfbHeader[3] << 8) | pfbHeader[2]; - } +var Type1Font = (function Type1FontClosure() { + function findBlock(streamBytes, signature, startIndex) { + var streamBytesLength = streamBytes.length; + var signatureLength = signature.length; + var scanLength = streamBytesLength - signatureLength; - // Get the data block containing glyphs and subrs informations - var headerBlock = new Stream(file.getBytes(headerBlockLength)); - var headerBlockParser = new Type1Parser(headerBlock); - headerBlockParser.extractFontHeader(properties); - - if (pfbHeaderPresent) { - pfbHeader = file.getBytes(PFB_HEADER_SIZE); - eexecBlockLength = (pfbHeader[5] << 24) | (pfbHeader[4] << 16) | - (pfbHeader[3] << 8) | pfbHeader[2]; - } - - // Decrypt the data blocks and retrieve it's content - var eexecBlock = new Stream(file.getBytes(eexecBlockLength)); - var eexecBlockParser = new Type1Parser(eexecBlock, true); - var data = eexecBlockParser.extractFontProgram(); - for (var info in data.properties) { - properties[info] = data.properties[info]; - } - - var charstrings = data.charstrings; - var type2Charstrings = this.getType2Charstrings(charstrings); - var subrs = this.getType2Subrs(data.subrs); - - this.charstrings = charstrings; - this.data = this.wrap(name, type2Charstrings, this.charstrings, - subrs, properties); - this.seacs = this.getSeacs(data.charstrings); -}; - -Type1Font.prototype = { - get numGlyphs() { - return this.charstrings.length + 1; - }, - - getCharset: function Type1Font_getCharset() { - var charset = ['.notdef']; - var charstrings = this.charstrings; - for (var glyphId = 0; glyphId < charstrings.length; glyphId++) { - charset.push(charstrings[glyphId].glyphName); + var i = startIndex, j, found = false; + while (i < scanLength) { + j = 0; + while (j < signatureLength && streamBytes[i + j] === signature[j]) { + j++; + } + if (j >= signatureLength) { // `signature` found, skip over whitespace. + i += j; + while (i < streamBytesLength && Lexer.isSpace(streamBytes[i])) { + i++; + } + found = true; + break; + } + i++; } - return charset; - }, + return { + found: found, + length: i, + }; + } - getGlyphMapping: function Type1Font_getGlyphMapping(properties) { - var charstrings = this.charstrings; - var glyphNames = ['.notdef'], glyphId; - for (glyphId = 0; glyphId < charstrings.length; glyphId++) { - glyphNames.push(charstrings[glyphId].glyphName); + function getHeaderBlock(stream, suggestedLength) { + var EEXEC_SIGNATURE = [0x65, 0x65, 0x78, 0x65, 0x63]; + + var streamStartPos = stream.pos; // Save the initial stream position. + var headerBytes, headerBytesLength, block; + try { + headerBytes = stream.getBytes(suggestedLength); + headerBytesLength = headerBytes.length; + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + // Ignore errors if the `suggestedLength` is huge enough that a Uint8Array + // cannot hold the result of `getBytes`, and fallback to simply checking + // the entire stream (fixes issue3928.pdf). } - var encoding = properties.builtInEncoding; - if (encoding) { - var builtInEncoding = Object.create(null); - for (var charCode in encoding) { - glyphId = glyphNames.indexOf(encoding[charCode]); - if (glyphId >= 0) { - builtInEncoding[charCode] = glyphId; + + if (headerBytesLength === suggestedLength) { + // Most of the time `suggestedLength` is correct, so to speed things up we + // initially only check the last few bytes to see if the header was found. + // Otherwise we (potentially) check the entire stream to prevent errors in + // `Type1Parser` (fixes issue5686.pdf). + block = findBlock(headerBytes, EEXEC_SIGNATURE, + suggestedLength - 2 * EEXEC_SIGNATURE.length); + + if (block.found && block.length === suggestedLength) { + return { + stream: new Stream(headerBytes), + length: suggestedLength, + }; + } + } + warn('Invalid "Length1" property in Type1 font -- trying to recover.'); + stream.pos = streamStartPos; // Reset the stream position. + + var SCAN_BLOCK_LENGTH = 2048; + var actualLength; + while (true) { + var scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH); + block = findBlock(scanBytes, EEXEC_SIGNATURE, 0); + + if (block.length === 0) { + break; + } + stream.pos += block.length; // Update the stream position. + + if (block.found) { + actualLength = stream.pos - streamStartPos; + break; + } + } + stream.pos = streamStartPos; // Reset the stream position. + + if (actualLength) { + return { + stream: new Stream(stream.getBytes(actualLength)), + length: actualLength, + }; + } + warn('Unable to recover "Length1" property in Type1 font -- using as is.'); + return { + stream: new Stream(stream.getBytes(suggestedLength)), + length: suggestedLength, + }; + } + + function getEexecBlock(stream, suggestedLength) { + // We should ideally parse the eexec block to ensure that `suggestedLength` + // is correct, so we don't truncate the block data if it's too small. + // However, this would also require checking if the fixed-content portion + // exists (using the 'Length3' property), and ensuring that it's valid. + // + // Given that `suggestedLength` almost always is correct, all the validation + // would require a great deal of unnecessary parsing for most fonts. + // To save time, we always fetch the entire stream instead, which also avoid + // issues if `suggestedLength` is huge (see comment in `getHeaderBlock`). + // + // NOTE: This means that the function can include the fixed-content portion + // in the returned eexec block. In practice this does *not* seem to matter, + // since `Type1Parser_extractFontProgram` will skip over any non-commands. + var eexecBytes = stream.getBytes(); + return { + stream: new Stream(eexecBytes), + length: eexecBytes.length, + }; + } + + function Type1Font(name, file, properties) { + // Some bad generators embed pfb file as is, we have to strip 6-byte header. + // Also, length1 and length2 might be off by 6 bytes as well. + // http://www.math.ubc.ca/~cass/piscript/type1.pdf + var PFB_HEADER_SIZE = 6; + var headerBlockLength = properties.length1; + var eexecBlockLength = properties.length2; + var pfbHeader = file.peekBytes(PFB_HEADER_SIZE); + var pfbHeaderPresent = pfbHeader[0] === 0x80 && pfbHeader[1] === 0x01; + if (pfbHeaderPresent) { + file.skip(PFB_HEADER_SIZE); + headerBlockLength = (pfbHeader[5] << 24) | (pfbHeader[4] << 16) | + (pfbHeader[3] << 8) | pfbHeader[2]; + } + + // Get the data block containing glyphs and subrs informations + var headerBlock = getHeaderBlock(file, headerBlockLength); + headerBlockLength = headerBlock.length; + var headerBlockParser = new Type1Parser(headerBlock.stream); + headerBlockParser.extractFontHeader(properties); + + if (pfbHeaderPresent) { + pfbHeader = file.getBytes(PFB_HEADER_SIZE); + eexecBlockLength = (pfbHeader[5] << 24) | (pfbHeader[4] << 16) | + (pfbHeader[3] << 8) | pfbHeader[2]; + } + + // Decrypt the data blocks and retrieve it's content + var eexecBlock = getEexecBlock(file, eexecBlockLength); + eexecBlockLength = eexecBlock.length; + var eexecBlockParser = new Type1Parser(eexecBlock.stream, true); + var data = eexecBlockParser.extractFontProgram(); + for (var info in data.properties) { + properties[info] = data.properties[info]; + } + + var charstrings = data.charstrings; + var type2Charstrings = this.getType2Charstrings(charstrings); + var subrs = this.getType2Subrs(data.subrs); + + this.charstrings = charstrings; + this.data = this.wrap(name, type2Charstrings, this.charstrings, + subrs, properties); + this.seacs = this.getSeacs(data.charstrings); + } + + Type1Font.prototype = { + get numGlyphs() { + return this.charstrings.length + 1; + }, + + getCharset: function Type1Font_getCharset() { + var charset = ['.notdef']; + var charstrings = this.charstrings; + for (var glyphId = 0; glyphId < charstrings.length; glyphId++) { + charset.push(charstrings[glyphId].glyphName); + } + return charset; + }, + + getGlyphMapping: function Type1Font_getGlyphMapping(properties) { + var charstrings = this.charstrings; + var glyphNames = ['.notdef'], glyphId; + for (glyphId = 0; glyphId < charstrings.length; glyphId++) { + glyphNames.push(charstrings[glyphId].glyphName); + } + var encoding = properties.builtInEncoding; + if (encoding) { + var builtInEncoding = Object.create(null); + for (var charCode in encoding) { + glyphId = glyphNames.indexOf(encoding[charCode]); + if (glyphId >= 0) { + builtInEncoding[charCode] = glyphId; + } } } - } - return type1FontGlyphMapping(properties, builtInEncoding, glyphNames); - }, + return type1FontGlyphMapping(properties, builtInEncoding, glyphNames); + }, - getSeacs: function Type1Font_getSeacs(charstrings) { - var i, ii; - var seacMap = []; - for (i = 0, ii = charstrings.length; i < ii; i++) { - var charstring = charstrings[i]; - if (charstring.seac) { - // Offset by 1 for .notdef - seacMap[i + 1] = charstring.seac; - } - } - return seacMap; - }, - - getType2Charstrings: function Type1Font_getType2Charstrings( - type1Charstrings) { - var type2Charstrings = []; - for (var i = 0, ii = type1Charstrings.length; i < ii; i++) { - type2Charstrings.push(type1Charstrings[i].charstring); - } - return type2Charstrings; - }, - - getType2Subrs: function Type1Font_getType2Subrs(type1Subrs) { - var bias = 0; - var count = type1Subrs.length; - if (count < 1133) { - bias = 107; - } else if (count < 33769) { - bias = 1131; - } else { - bias = 32768; - } - - // Add a bunch of empty subrs to deal with the Type2 bias - var type2Subrs = []; - var i; - for (i = 0; i < bias; i++) { - type2Subrs.push([0x0B]); - } - - for (i = 0; i < count; i++) { - type2Subrs.push(type1Subrs[i]); - } - - return type2Subrs; - }, - - wrap: function Type1Font_wrap(name, glyphs, charstrings, subrs, properties) { - var cff = new CFF(); - cff.header = new CFFHeader(1, 0, 4, 4); - - cff.names = [name]; - - var topDict = new CFFTopDict(); - // CFF strings IDs 0...390 are predefined names, so refering - // to entries in our own String INDEX starts at SID 391. - topDict.setByName('version', 391); - topDict.setByName('Notice', 392); - topDict.setByName('FullName', 393); - topDict.setByName('FamilyName', 394); - topDict.setByName('Weight', 395); - topDict.setByName('Encoding', null); // placeholder - topDict.setByName('FontMatrix', properties.fontMatrix); - topDict.setByName('FontBBox', properties.bbox); - topDict.setByName('charset', null); // placeholder - topDict.setByName('CharStrings', null); // placeholder - topDict.setByName('Private', null); // placeholder - cff.topDict = topDict; - - var strings = new CFFStrings(); - strings.add('Version 0.11'); // Version - strings.add('See original notice'); // Notice - strings.add(name); // FullName - strings.add(name); // FamilyName - strings.add('Medium'); // Weight - cff.strings = strings; - - cff.globalSubrIndex = new CFFIndex(); - - var count = glyphs.length; - var charsetArray = [0]; - var i, ii; - for (i = 0; i < count; i++) { - var index = CFFStandardStrings.indexOf(charstrings[i].glyphName); - // TODO: Insert the string and correctly map it. Previously it was - // thought mapping names that aren't in the standard strings to .notdef - // was fine, however in issue818 when mapping them all to .notdef the - // adieresis glyph no longer worked. - if (index === -1) { - index = 0; - } - charsetArray.push((index >> 8) & 0xff, index & 0xff); - } - cff.charset = new CFFCharset(false, 0, [], charsetArray); - - var charStringsIndex = new CFFIndex(); - charStringsIndex.add([0x8B, 0x0E]); // .notdef - for (i = 0; i < count; i++) { - charStringsIndex.add(glyphs[i]); - } - cff.charStrings = charStringsIndex; - - var privateDict = new CFFPrivateDict(); - privateDict.setByName('Subrs', null); // placeholder - var fields = [ - 'BlueValues', - 'OtherBlues', - 'FamilyBlues', - 'FamilyOtherBlues', - 'StemSnapH', - 'StemSnapV', - 'BlueShift', - 'BlueFuzz', - 'BlueScale', - 'LanguageGroup', - 'ExpansionFactor', - 'ForceBold', - 'StdHW', - 'StdVW' - ]; - for (i = 0, ii = fields.length; i < ii; i++) { - var field = fields[i]; - if (!(field in properties.privateData)) { - continue; - } - var value = properties.privateData[field]; - if (isArray(value)) { - // All of the private dictionary array data in CFF must be stored as - // "delta-encoded" numbers. - for (var j = value.length - 1; j > 0; j--) { - value[j] -= value[j - 1]; // ... difference from previous value + getSeacs: function Type1Font_getSeacs(charstrings) { + var i, ii; + var seacMap = []; + for (i = 0, ii = charstrings.length; i < ii; i++) { + var charstring = charstrings[i]; + if (charstring.seac) { + // Offset by 1 for .notdef + seacMap[i + 1] = charstring.seac; } } - privateDict.setByName(field, value); - } - cff.topDict.privateDict = privateDict; + return seacMap; + }, - var subrIndex = new CFFIndex(); - for (i = 0, ii = subrs.length; i < ii; i++) { - subrIndex.add(subrs[i]); - } - privateDict.subrsIndex = subrIndex; + getType2Charstrings: function Type1Font_getType2Charstrings( + type1Charstrings) { + var type2Charstrings = []; + for (var i = 0, ii = type1Charstrings.length; i < ii; i++) { + type2Charstrings.push(type1Charstrings[i].charstring); + } + return type2Charstrings; + }, - var compiler = new CFFCompiler(cff); - return compiler.compile(); - } -}; + getType2Subrs: function Type1Font_getType2Subrs(type1Subrs) { + var bias = 0; + var count = type1Subrs.length; + if (count < 1133) { + bias = 107; + } else if (count < 33769) { + bias = 1131; + } else { + bias = 32768; + } + + // Add a bunch of empty subrs to deal with the Type2 bias + var type2Subrs = []; + var i; + for (i = 0; i < bias; i++) { + type2Subrs.push([0x0B]); + } + + for (i = 0; i < count; i++) { + type2Subrs.push(type1Subrs[i]); + } + + return type2Subrs; + }, + + wrap: function Type1Font_wrap(name, glyphs, charstrings, subrs, + properties) { + var cff = new CFF(); + cff.header = new CFFHeader(1, 0, 4, 4); + + cff.names = [name]; + + var topDict = new CFFTopDict(); + // CFF strings IDs 0...390 are predefined names, so refering + // to entries in our own String INDEX starts at SID 391. + topDict.setByName('version', 391); + topDict.setByName('Notice', 392); + topDict.setByName('FullName', 393); + topDict.setByName('FamilyName', 394); + topDict.setByName('Weight', 395); + topDict.setByName('Encoding', null); // placeholder + topDict.setByName('FontMatrix', properties.fontMatrix); + topDict.setByName('FontBBox', properties.bbox); + topDict.setByName('charset', null); // placeholder + topDict.setByName('CharStrings', null); // placeholder + topDict.setByName('Private', null); // placeholder + cff.topDict = topDict; + + var strings = new CFFStrings(); + strings.add('Version 0.11'); // Version + strings.add('See original notice'); // Notice + strings.add(name); // FullName + strings.add(name); // FamilyName + strings.add('Medium'); // Weight + cff.strings = strings; + + cff.globalSubrIndex = new CFFIndex(); + + var count = glyphs.length; + var charsetArray = [0]; + var i, ii; + for (i = 0; i < count; i++) { + var index = CFFStandardStrings.indexOf(charstrings[i].glyphName); + // TODO: Insert the string and correctly map it. Previously it was + // thought mapping names that aren't in the standard strings to .notdef + // was fine, however in issue818 when mapping them all to .notdef the + // adieresis glyph no longer worked. + if (index === -1) { + index = 0; + } + charsetArray.push((index >> 8) & 0xff, index & 0xff); + } + cff.charset = new CFFCharset(false, 0, [], charsetArray); + + var charStringsIndex = new CFFIndex(); + charStringsIndex.add([0x8B, 0x0E]); // .notdef + for (i = 0; i < count; i++) { + charStringsIndex.add(glyphs[i]); + } + cff.charStrings = charStringsIndex; + + var privateDict = new CFFPrivateDict(); + privateDict.setByName('Subrs', null); // placeholder + var fields = [ + 'BlueValues', + 'OtherBlues', + 'FamilyBlues', + 'FamilyOtherBlues', + 'StemSnapH', + 'StemSnapV', + 'BlueShift', + 'BlueFuzz', + 'BlueScale', + 'LanguageGroup', + 'ExpansionFactor', + 'ForceBold', + 'StdHW', + 'StdVW' + ]; + for (i = 0, ii = fields.length; i < ii; i++) { + var field = fields[i]; + if (!(field in properties.privateData)) { + continue; + } + var value = properties.privateData[field]; + if (isArray(value)) { + // All of the private dictionary array data in CFF must be stored as + // "delta-encoded" numbers. + for (var j = value.length - 1; j > 0; j--) { + value[j] -= value[j - 1]; // ... difference from previous value + } + } + privateDict.setByName(field, value); + } + cff.topDict.privateDict = privateDict; + + var subrIndex = new CFFIndex(); + for (i = 0, ii = subrs.length; i < ii; i++) { + subrIndex.add(subrs[i]); + } + privateDict.subrsIndex = subrIndex; + + var compiler = new CFFCompiler(cff); + return compiler.compile(); + } + }; + + return Type1Font; +})(); var CFFFont = (function CFFFontClosure() { function CFFFont(file, properties) { diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index b675adf06..9cdc808b9 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -49,6 +49,8 @@ !issue3207r.pdf !issue3263r.pdf !issue3879r.pdf +!issue5686.pdf +!issue3928.pdf !close-path-bug.pdf !issue6019.pdf !issue6621.pdf diff --git a/test/pdfs/issue3928.pdf b/test/pdfs/issue3928.pdf new file mode 100644 index 000000000..f47c1464b Binary files /dev/null and b/test/pdfs/issue3928.pdf differ diff --git a/test/pdfs/issue5686.pdf b/test/pdfs/issue5686.pdf new file mode 100644 index 000000000..6525d1107 Binary files /dev/null and b/test/pdfs/issue5686.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 6113fcdd9..c0efbe3b5 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1443,6 +1443,22 @@ "link": false, "type": "eq" }, + { "id": "issue5686", + "file": "pdfs/issue5686.pdf", + "md5": "78d16b9df07a355ad00d70504a9194f8", + "rounds": 1, + "link": false, + "type": "eq", + "about": "Type1 font where Length1/Length2 are slightly incorrect." + }, + { "id": "issue3928", + "file": "pdfs/issue3928.pdf", + "md5": "1963493f843e981cbe768b707ef7f08a", + "rounds": 1, + "link": false, + "type": "eq", + "about": "Type1 font where Length1/Length2 are several orders of magnitude too large." + }, { "id": "html5checker", "file": "pdfs/html5checker.pdf", "md5": "74bbd80d1e7eb5f2951582233ef9ebab",