Restructure/rewrite of the Type1 font parser.

This commit is contained in:
Brendan Dahl 2013-03-27 17:15:44 -07:00
parent 921f3211a4
commit 028151d13a
2 changed files with 325 additions and 245 deletions

View File

@ -17,7 +17,7 @@
/* globals assert, bytesToString, CIDToUnicodeMaps, error, ExpertCharset, /* globals assert, bytesToString, CIDToUnicodeMaps, error, ExpertCharset,
ExpertSubsetCharset, FileReaderSync, globalScope, GlyphsUnicode, ExpertSubsetCharset, FileReaderSync, globalScope, GlyphsUnicode,
info, isArray, isNum, ISOAdobeCharset, isWorker, PDFJS, Stream, info, isArray, isNum, ISOAdobeCharset, isWorker, PDFJS, Stream,
stringToBytes, TextDecoder, TODO, warn */ stringToBytes, TextDecoder, TODO, warn, Lexer */
'use strict'; 'use strict';
@ -5057,8 +5057,11 @@ var Type1CharString = (function Type1CharStringClosure() {
* Type1Parser encapsulate the needed code for parsing a Type1 font * Type1Parser encapsulate the needed code for parsing a Type1 font
* program. Some of its logic depends on the Type2 charstrings * program. Some of its logic depends on the Type2 charstrings
* structure. * structure.
* Note: this doesn't really parse the font since that would require evaluation
* of PostScript, but it is possible in most cases to extract what we need
* without a full parse.
*/ */
var Type1Parser = function type1Parser() { var Type1Parser = (function Type1ParserClosure() {
/* /*
* Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence * Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence
* of Plaintext Bytes. The function took a key as a parameter which can be * of Plaintext Bytes. The function took a key as a parameter which can be
@ -5081,73 +5084,93 @@ var Type1Parser = function type1Parser() {
return decryptedString.slice(discardNumber); return decryptedString.slice(discardNumber);
} }
function isSpecial(c) {
return c === '/' ||
c === '[' || c === ']' ||
c === '{' || c === '}' ||
c === '(' || c === ')';
}
function Type1Parser(stream, encrypted) {
if (encrypted) {
stream = new Stream(decrypt(stream.getBytes(), EEXEC_ENCRYPT_KEY, 4));
}
this.stream = stream;
}
Type1Parser.prototype = {
readNumberArray: function Type1Parser_readNumberArray() {
this.getToken(); // read '[' or '{' (arrays can start with either)
var array = [];
while (true) {
var token = this.getToken();
if (token === null || token === ']' || token === '}') {
break;
}
array.push(parseFloat(token || 0));
}
return array;
},
readNumber: function Type1Parser_readNumber() {
var token = this.getToken();
return parseFloat(token || 0);
},
readInt: function Type1Parser_readInt() {
// Use '| 0' to prevent setting a double into length such as the double
// does not flow into the loop variable.
var token = this.getToken();
return parseInt(token || 0, 10) | 0;
},
readBoolean: function Type1Parser_readBoolean() {
var token = this.getToken();
// Use 1 and 0 since that's what type2 charstrings use.
return token === 'true' ? 1 : 0;
},
getToken: function Type1Parser_getToken() {
// Eat whitespace and comments.
var comment = false;
var ch;
var stream = this.stream;
while (true) {
if ((ch = stream.lookChar()) === null)
return null;
if (comment) {
if (ch === '\x0a' || ch === '\x0d') {
comment = false;
}
} else if (ch === '%') {
comment = true;
} else if (!Lexer.isSpace(ch)) {
break;
}
stream.skip();
}
if (isSpecial(ch)) {
stream.skip();
return ch;
}
var token = '';
do {
token += ch;
stream.skip();
ch = stream.lookChar();
} while (ch !== null && !Lexer.isSpace(ch) && !isSpecial(ch));
return token;
},
/* /*
* Returns an object containing a Subrs array and a CharStrings * Returns an object containing a Subrs array and a CharStrings
* array extracted from and eexec encrypted block of data * array extracted from and eexec encrypted block of data
*/ */
function readNumberArray(str, index) { extractFontProgram: function Type1Parser_extractFontProgram() {
var start = index; var stream = this.stream;
while (str[index++] != '[')
start++;
start++;
var count = 0;
while (str[index++] != ']')
count++;
str = str.substr(start, count);
str = str.trim();
// Remove adjacent spaces
str = str.replace(/\s+/g, ' ');
var array = str.split(' ');
for (var i = 0, ii = array.length; i < ii; i++)
array[i] = parseFloat(array[i] || 0);
return array;
}
function readNumber(str, index) {
while (str[index] == ' ')
index++;
var start = index;
var count = 0;
while (str[index++] != ' ')
count++;
return parseFloat(str.substr(start, count) || 0);
}
function readBoolean(str, index) {
while (str[index] == ' ')
index++;
var start = index;
var count = 0;
var length = str.length;
while (index < length && str[index++] != ' ') {
count++;
}
// Use 1 and 0 since that's what type2 charstrings use.
return str.substr(start, count) === 'true' ? 1 : 0;
}
function isSeparator(c) {
return c == ' ' || c == '\n' || c == '\x0d';
}
this.extractFontProgram = function Type1Parser_extractFontProgram(stream) {
var eexec = decrypt(stream, EEXEC_ENCRYPT_KEY, 4);
var eexecStr = '';
for (var i = 0, ii = eexec.length; i < ii; i++)
eexecStr += String.fromCharCode(eexec[i]);
var glyphsSection = false, subrsSection = false;
var subrs = [], charstrings = []; var subrs = [], charstrings = [];
var program = { var program = {
subrs: [], subrs: [],
@ -5158,120 +5181,104 @@ var Type1Parser = function type1Parser() {
} }
} }
}; };
var token;
while ((token = this.getToken()) !== null) {
if (token !== '/') {
continue;
}
token = this.getToken();
switch (token) {
case 'CharStrings':
// The number immediately following CharStrings must be greater or
// equal to the number of CharStrings.
this.getToken();
this.getToken(); // read in 'dict'
this.getToken(); // read in 'dup'
this.getToken(); // read in 'begin'
while(true) {
token = this.getToken();
if (token === null || token === 'end') {
break;
}
var glyph = ''; if (token !== '/') {
var token = ''; continue;
var length = 0; }
var glyph = this.getToken();
var c = ''; var length = this.readInt();
var count = eexecStr.length; this.getToken(); // read in 'RD' or '-|'
for (var i = 0; i < count; i++) { var data = stream.makeSubStream(stream.pos + 1, length);
var getToken = function getToken() {
while (i < count && isSeparator(eexecStr[i]))
++i;
var token = '';
while (i < count && !isSeparator(eexecStr[i]))
token += eexecStr[i++];
return token;
};
var c = eexecStr[i];
if ((glyphsSection || subrsSection) &&
(token == 'RD' || token == '-|')) {
i++;
var data = eexec.slice(i, i + length);
var lenIV = program.properties.privateData['lenIV']; var lenIV = program.properties.privateData['lenIV'];
var encoded = decrypt(data, CHAR_STRS_ENCRYPT_KEY, lenIV); var encoded = decrypt(data.getBytes(), CHAR_STRS_ENCRYPT_KEY,
lenIV);
if (glyphsSection) { // Skip past the required space and binary data.
stream.skip(1 + length);
token = this.getToken(); // read in 'ND' or '|-'
if (token === 'noaccess') {
this.getToken(); // read in 'def'
}
charstrings.push({ charstrings.push({
glyph: glyph, glyph: glyph,
encoded: encoded encoded: encoded
}); });
} else {
subrs.push(encoded);
} }
i += length;
token = '';
} else if (isSeparator(c)) {
// Use '| 0' to prevent setting a double into length such as the double
// does not flow into the loop variable.
length = parseInt(token, 10) | 0;
token = '';
} else {
token += c;
if (!glyphsSection) {
switch (token) {
case '/CharString':
glyphsSection = true;
break; break;
case '/Subrs': case 'Subrs':
++i; var num = this.readInt();
var num = parseInt(getToken(), 10); this.getToken(); // read in 'array'
getToken(); // read in 'array'
for (var j = 0; j < num; ++j) { for (var j = 0; j < num; ++j) {
var t = getToken(); // read in 'dup' token = this.getToken(); // read in 'dup'
if (t == 'ND' || t == '|-' || t == 'noaccess') var index = this.readInt();
break;
var index = parseInt(getToken(), 10);
if (index > j) if (index > j)
j = index; j = index;
var length = parseInt(getToken(), 10); var length = this.readInt();
getToken(); // read in 'RD' this.getToken(); // read in 'RD' or '-|'
var data = eexec.slice(i + 1, i + 1 + length); var data = stream.makeSubStream(stream.pos + 1, length);
var lenIV = program.properties.privateData['lenIV']; var lenIV = program.properties.privateData['lenIV'];
var encoded = decrypt(data, CHAR_STRS_ENCRYPT_KEY, lenIV); var encoded = decrypt(data.getBytes(), CHAR_STRS_ENCRYPT_KEY,
i = i + 1 + length; lenIV);
t = getToken(); // read in 'NP' // Skip past the required space and binary data.
if (t == 'noaccess') stream.skip(1 + length);
getToken(); // read in 'put' token = this.getToken(); // read in 'NP' or '|'
if (token === 'noaccess') {
this.getToken(); // read in 'put'
}
subrs[index] = encoded; subrs[index] = encoded;
} }
break; break;
case '/BlueValues': case 'BlueValues':
case '/OtherBlues': case 'OtherBlues':
case '/FamilyBlues': case 'FamilyBlues':
case '/FamilyOtherBlues': case 'FamilyOtherBlues':
var blueArray = readNumberArray(eexecStr, i + 1); var blueArray = this.readNumberArray();
// *Blue* values may contain invalid data: disables reading of // *Blue* values may contain invalid data: disables reading of
// those values when hinting is disabled. // those values when hinting is disabled.
if (blueArray.length > 0 && (blueArray.length % 2) === 0 && if (blueArray.length > 0 && (blueArray.length % 2) === 0 &&
HINTING_ENABLED) { HINTING_ENABLED) {
program.properties.privateData[token.substring(1)] = blueArray; program.properties.privateData[token] = blueArray;
} }
break; break;
case '/StemSnapH': case 'StemSnapH':
case '/StemSnapV': case 'StemSnapV':
program.properties.privateData[token.substring(1)] = program.properties.privateData[token] = this.readNumberArray();
readNumberArray(eexecStr, i + 1);
break; break;
case '/StdHW': case 'StdHW':
case '/StdVW': case 'StdVW':
program.properties.privateData[token.substring(1)] = program.properties.privateData[token] =
readNumberArray(eexecStr, i + 1)[0]; this.readNumberArray()[0];
break; break;
case '/BlueShift': case 'BlueShift':
case '/lenIV': case 'lenIV':
case '/BlueFuzz': case 'BlueFuzz':
case '/BlueScale': case 'BlueScale':
case '/LanguageGroup': case 'LanguageGroup':
case '/ExpansionFactor': case 'ExpansionFactor':
program.properties.privateData[token.substring(1)] = program.properties.privateData[token] = this.readNumber();
readNumber(eexecStr, i + 1);
break; break;
case '/ForceBold': case 'ForceBold':
program.properties.privateData[token.substring(1)] = program.properties.privateData[token] = this.readBoolean();
readBoolean(eexecStr, i + 1);
break; break;
} }
} else if (c == '/') {
token = glyph = '';
while ((c = eexecStr[++i]) != ' ')
glyph += c;
}
}
} }
for (var i = 0; i < charstrings.length; i++) { for (var i = 0; i < charstrings.length; i++) {
@ -5296,56 +5303,39 @@ var Type1Parser = function type1Parser() {
} }
return program; return program;
}; },
this.extractFontHeader = function Type1Parser_extractFontHeader(stream, extractFontHeader: function Type1Parser_extractFontHeader(properties) {
properties) { var token;
var headerString = ''; while ((token = this.getToken()) !== null) {
for (var i = 0, ii = stream.length; i < ii; i++) if (token !== '/') {
headerString += String.fromCharCode(stream[i]); continue;
var token = '';
var count = headerString.length;
for (var i = 0; i < count; i++) {
var getToken = function getToken() {
var character = headerString[i];
while (i < count && (isSeparator(character) || character == '/'))
character = headerString[++i];
var token = '';
while (i < count && !(isSeparator(character) || character == '/')) {
token += character;
character = headerString[++i];
} }
token = this.getToken();
return token;
};
var c = headerString[i];
if (isSeparator(c)) {
switch (token) { switch (token) {
case '/FontMatrix': case 'FontMatrix':
var matrix = readNumberArray(headerString, i + 1); var matrix = this.readNumberArray();
properties.fontMatrix = matrix; properties.fontMatrix = matrix;
break; break;
case '/Encoding': case 'Encoding':
var encodingArg = getToken(); var encodingArg = this.getToken();
var encoding; var encoding;
if (!/^\d+$/.test(encodingArg)) { if (!/^\d+$/.test(encodingArg)) {
// encoding name is specified // encoding name is specified
encoding = Encodings[encodingArg]; encoding = Encodings[encodingArg];
} else { } else {
encoding = []; encoding = [];
var size = parseInt(encodingArg, 10); var size = parseInt(encodingArg, 10) | 0;
getToken(); // read in 'array' this.getToken(); // read in 'array'
for (var j = 0; j < size; j++) { for (var j = 0; j < size; j++) {
var token = getToken(); var token = this.getToken();
if (token == 'dup') { if (token === 'dup') {
var index = parseInt(getToken(), 10); var index = this.readInt();
var glyph = getToken(); this.getToken(); // read in '/'
var glyph = this.getToken();
encoding[index] = glyph; encoding[index] = glyph;
getToken(); // read the in 'put' this.getToken(); // read the in 'put'
} }
} }
} }
@ -5355,13 +5345,12 @@ var Type1Parser = function type1Parser() {
} }
break; break;
} }
token = '';
} else {
token += c;
} }
} }
}; };
};
return Type1Parser;
})();
/** /**
* The CFF class takes a Type1 file and wrap it into a * The CFF class takes a Type1 file and wrap it into a
@ -5435,17 +5424,17 @@ var CFFStandardStrings = [
'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman', 'Semibold' 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman', 'Semibold'
]; ];
var type1Parser = new Type1Parser();
// Type1Font is also a CIDFontType0. // Type1Font is also a CIDFontType0.
var Type1Font = function Type1Font(name, file, properties) { var Type1Font = function Type1Font(name, file, properties) {
// Get the data block containing glyphs and subrs informations // Get the data block containing glyphs and subrs informations
var headerBlock = file.getBytes(properties.length1); var headerBlock = new Stream(file.getBytes(properties.length1));
type1Parser.extractFontHeader(headerBlock, properties); var headerBlockParser = new Type1Parser(headerBlock);
headerBlockParser.extractFontHeader(properties);
// Decrypt the data blocks and retrieve it's content // Decrypt the data blocks and retrieve it's content
var eexecBlock = file.getBytes(properties.length2); var eexecBlock = new Stream(file.getBytes(properties.length2));
var data = type1Parser.extractFontProgram(eexecBlock); var eexecBlockParser = new Type1Parser(eexecBlock, true);
var data = eexecBlockParser.extractFontProgram();
for (var info in data.properties) for (var info in data.properties)
properties[info] = data.properties[info]; properties[info] = data.properties[info];

View File

@ -1,7 +1,7 @@
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ /* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
/* globals expect, it, describe, CFFCompiler, CFFParser, CFFIndex, CFFStrings, /* globals expect, it, describe, CFFCompiler, CFFParser, CFFIndex, CFFStrings,
SEAC_ANALYSIS_ENABLED:true */ SEAC_ANALYSIS_ENABLED:true, Type1Parser, StringStream */
'use strict'; 'use strict';
@ -297,4 +297,95 @@ describe('font', function() {
}); });
// TODO a lot more compiler tests // TODO a lot more compiler tests
}); });
describe('Type1Parser', function() {
it('splits tokens', function() {
var stream = new StringStream('/BlueValues[-17 0]noaccess def');
var parser = new Type1Parser(stream);
expect(parser.getToken()).toEqual('/');
expect(parser.getToken()).toEqual('BlueValues');
expect(parser.getToken()).toEqual('[');
expect(parser.getToken()).toEqual('-17');
expect(parser.getToken()).toEqual('0');
expect(parser.getToken()).toEqual(']');
expect(parser.getToken()).toEqual('noaccess');
expect(parser.getToken()).toEqual('def');
expect(parser.getToken()).toEqual(null);
});
it('handles glued tokens', function() {
var stream = new StringStream('dup/CharStrings');
var parser = new Type1Parser(stream);
expect(parser.getToken()).toEqual('dup');
expect(parser.getToken()).toEqual('/');
expect(parser.getToken()).toEqual('CharStrings');
});
it('ignores whitespace', function() {
var stream = new StringStream('\nab c\t');
var parser = new Type1Parser(stream);
expect(parser.getToken()).toEqual('ab');
expect(parser.getToken()).toEqual('c');
});
it('parses numbers', function() {
var stream = new StringStream('123');
var parser = new Type1Parser(stream);
expect(parser.readNumber()).toEqual(123);
});
it('parses booleans', function() {
var stream = new StringStream('true false');
var parser = new Type1Parser(stream);
expect(parser.readBoolean()).toEqual(1);
expect(parser.readBoolean()).toEqual(0);
});
it('parses number arrays', function() {
var stream = new StringStream('[1 2]');
var parser = new Type1Parser(stream);
expect(parser.readNumberArray()).toEqual([1, 2]);
// Variation on spacing.
var stream = new StringStream('[ 1 2 ]');
parser = new Type1Parser(stream);
expect(parser.readNumberArray()).toEqual([1, 2]);
});
it('skips comments', function() {
var stream = new StringStream(
'%!PS-AdobeFont-1.0: CMSY10 003.002\n' +
'%%Title: CMSY10\n' +
'%Version: 003.002\n' +
'FontDirectory');
var parser = new Type1Parser(stream);
expect(parser.getToken()).toEqual('FontDirectory');
});
it('parses font program', function() {
var stream = new StringStream(
'/ExpansionFactor 99\n' +
'/Subrs 1 array\n' +
'dup 0 1 RD x noaccess put\n'+
'/CharStrings 46 dict dup begin\n' +
'/.notdef 1 RD x ND' + '\n' +
'end');
var parser = new Type1Parser(stream);
var program = parser.extractFontProgram();
expect(program.charstrings.length).toEqual(1);
expect(program.properties.privateData.ExpansionFactor).toEqual(99);
});
it('parses font header font matrix', function() {
var stream = new StringStream(
'/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def\n');
var parser = new Type1Parser(stream);
var props = {};
var program = parser.extractFontHeader(props);
expect(props.fontMatrix).toEqual([0.001, 0, 0, 0.001, 0, 0]);
});
it('parses font header encoding', function() {
var stream = new StringStream(
'/Encoding 256 array\n' +
'0 1 255 {1 index exch /.notdef put} for\n' +
'dup 33 /arrowright put\n' +
'readonly def\n');
var parser = new Type1Parser(stream);
var props = {};
var program = parser.extractFontHeader(props);
expect(props.baseEncoding[33]).toEqual('arrowright');
});
});
}); });