diff --git a/src/core/evaluator.js b/src/core/evaluator.js index d080ac6c4..aa15c5fe2 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2197,6 +2197,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } var length1 = fontFile.dict.get('Length1'); var length2 = fontFile.dict.get('Length2'); + var length3 = fontFile.dict.get('Length3'); } } @@ -2207,6 +2208,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { file: fontFile, length1: length1, length2: length2, + length3: length3, loadedName: baseDict.loadedName, composite: composite, wideChars: composite, diff --git a/src/core/fonts.js b/src/core/fonts.js index 7fb4060d3..4fb026966 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -54,6 +54,7 @@ var shadow = sharedUtil.shadow; var stringToBytes = sharedUtil.stringToBytes; var string32 = sharedUtil.string32; var warn = sharedUtil.warn; +var MissingDataException = sharedUtil.MissingDataException; var Stream = coreStream.Stream; var Lexer = coreParser.Lexer; var getGlyphsUnicode = coreGlyphList.getGlyphsUnicode; @@ -3638,6 +3639,120 @@ var CFFStandardStrings = [ // Type1Font is also a CIDFontType0. var Type1Font = (function Type1FontClosure() { + function findBlock(streamBytes, signature, startIndex) { + var streamBytesLength = streamBytes.length; + var signatureLength = signature.length; + var scanLength = streamBytesLength - signatureLength; + + var i = startIndex, j, found = false; + while (i < scanLength) { + j = 0; + while (j < signatureLength && streamBytes[i + j] === signature[j]) { + j++; + } + if (j >= signatureLength) { // `signature` found, skip over whitespace. + i += j; + while (i < streamBytesLength && Lexer.isSpace(streamBytes[i])) { + i++; + } + found = true; + break; + } + i++; + } + return { + found: found, + length: i, + }; + } + + function getHeaderBlock(stream, suggestedLength) { + var EEXEC_SIGNATURE = [0x65, 0x65, 0x78, 0x65, 0x63]; + + var streamStartPos = stream.pos; // Save the initial stream position. + var headerBytes, headerBytesLength, block; + try { + headerBytes = stream.getBytes(suggestedLength); + headerBytesLength = headerBytes.length; + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + // Ignore errors if the `suggestedLength` is huge enough that a Uint8Array + // cannot hold the result of `getBytes`, and fallback to simply checking + // the entire stream (fixes issue3928.pdf). + } + + if (headerBytesLength === suggestedLength) { + // Most of the time `suggestedLength` is correct, so to speed things up we + // initially only check the last few bytes to see if the header was found. + // Otherwise we (potentially) check the entire stream to prevent errors in + // `Type1Parser` (fixes issue5686.pdf). + block = findBlock(headerBytes, EEXEC_SIGNATURE, + suggestedLength - 2 * EEXEC_SIGNATURE.length); + + if (block.found && block.length === suggestedLength) { + return { + stream: new Stream(headerBytes), + length: suggestedLength, + }; + } + } + warn('Invalid "Length1" property in Type1 font -- trying to recover.'); + stream.pos = streamStartPos; // Reset the stream position. + + var SCAN_BLOCK_LENGTH = 2048; + var actualLength; + while (true) { + var scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH); + block = findBlock(scanBytes, EEXEC_SIGNATURE, 0); + + if (block.length === 0) { + break; + } + stream.pos += block.length; // Update the stream position. + + if (block.found) { + actualLength = stream.pos - streamStartPos; + break; + } + } + stream.pos = streamStartPos; // Reset the stream position. + + if (actualLength) { + return { + stream: new Stream(stream.getBytes(actualLength)), + length: actualLength, + }; + } + warn('Unable to recover "Length1" property in Type1 font -- using as is.'); + return { + stream: new Stream(stream.getBytes(suggestedLength)), + length: suggestedLength, + }; + } + + function getEexecBlock(stream, suggestedLength) { + // We should ideally parse the eexec block to ensure that `suggestedLength` + // is correct, so we don't truncate the block data if it's too small. + // However, this would also require checking if the fixed-content portion + // exists (using the 'Length3' property), and ensuring that it's valid. + // + // Given that `suggestedLength` almost always is correct, all the validation + // would require a great deal of unnecessary parsing for most fonts. + // To save time, we always fetch the entire stream instead, which also avoid + // issues if `suggestedLength` is huge (see comment in `getHeaderBlock`). + // + // NOTE: This means that the function can include the fixed-content portion + // in the returned eexec block. In practice this does *not* seem to matter, + // since `Type1Parser_extractFontProgram` will skip over any non-commands. + var eexecBytes = stream.getBytes(); + return { + stream: new Stream(eexecBytes), + length: eexecBytes.length, + }; + } + function Type1Font(name, file, properties) { // Some bad generators embed pfb file as is, we have to strip 6-byte header. // Also, length1 and length2 might be off by 6 bytes as well. @@ -3654,8 +3769,9 @@ var Type1Font = (function Type1FontClosure() { } // Get the data block containing glyphs and subrs informations - var headerBlock = new Stream(file.getBytes(headerBlockLength)); - var headerBlockParser = new Type1Parser(headerBlock); + var headerBlock = getHeaderBlock(file, headerBlockLength); + headerBlockLength = headerBlock.length; + var headerBlockParser = new Type1Parser(headerBlock.stream); headerBlockParser.extractFontHeader(properties); if (pfbHeaderPresent) { @@ -3665,8 +3781,9 @@ var Type1Font = (function Type1FontClosure() { } // Decrypt the data blocks and retrieve it's content - var eexecBlock = new Stream(file.getBytes(eexecBlockLength)); - var eexecBlockParser = new Type1Parser(eexecBlock, true); + var eexecBlock = getEexecBlock(file, eexecBlockLength); + eexecBlockLength = eexecBlock.length; + var eexecBlockParser = new Type1Parser(eexecBlock.stream, true); var data = eexecBlockParser.extractFontProgram(); for (var info in data.properties) { properties[info] = data.properties[info]; diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index b675adf06..9cdc808b9 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -49,6 +49,8 @@ !issue3207r.pdf !issue3263r.pdf !issue3879r.pdf +!issue5686.pdf +!issue3928.pdf !close-path-bug.pdf !issue6019.pdf !issue6621.pdf diff --git a/test/pdfs/issue3928.pdf b/test/pdfs/issue3928.pdf new file mode 100644 index 000000000..f47c1464b Binary files /dev/null and b/test/pdfs/issue3928.pdf differ diff --git a/test/pdfs/issue5686.pdf b/test/pdfs/issue5686.pdf new file mode 100644 index 000000000..6525d1107 Binary files /dev/null and b/test/pdfs/issue5686.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 6113fcdd9..c0efbe3b5 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1443,6 +1443,22 @@ "link": false, "type": "eq" }, + { "id": "issue5686", + "file": "pdfs/issue5686.pdf", + "md5": "78d16b9df07a355ad00d70504a9194f8", + "rounds": 1, + "link": false, + "type": "eq", + "about": "Type1 font where Length1/Length2 are slightly incorrect." + }, + { "id": "issue3928", + "file": "pdfs/issue3928.pdf", + "md5": "1963493f843e981cbe768b707ef7f08a", + "rounds": 1, + "link": false, + "type": "eq", + "about": "Type1 font where Length1/Length2 are several orders of magnitude too large." + }, { "id": "html5checker", "file": "pdfs/html5checker.pdf", "md5": "74bbd80d1e7eb5f2951582233ef9ebab",