Restructure/rewrite of the Type1 font parser.

This commit is contained in:
Brendan Dahl 2013-03-27 17:15:44 -07:00
parent 921f3211a4
commit 028151d13a
2 changed files with 325 additions and 245 deletions

View File

@ -17,7 +17,7 @@
/* globals assert, bytesToString, CIDToUnicodeMaps, error, ExpertCharset, /* globals assert, bytesToString, CIDToUnicodeMaps, error, ExpertCharset,
ExpertSubsetCharset, FileReaderSync, globalScope, GlyphsUnicode, ExpertSubsetCharset, FileReaderSync, globalScope, GlyphsUnicode,
info, isArray, isNum, ISOAdobeCharset, isWorker, PDFJS, Stream, info, isArray, isNum, ISOAdobeCharset, isWorker, PDFJS, Stream,
stringToBytes, TextDecoder, TODO, warn */ stringToBytes, TextDecoder, TODO, warn, Lexer */
'use strict'; 'use strict';
@ -5057,8 +5057,11 @@ var Type1CharString = (function Type1CharStringClosure() {
* Type1Parser encapsulate the needed code for parsing a Type1 font * Type1Parser encapsulate the needed code for parsing a Type1 font
* program. Some of its logic depends on the Type2 charstrings * program. Some of its logic depends on the Type2 charstrings
* structure. * structure.
* Note: this doesn't really parse the font since that would require evaluation
* of PostScript, but it is possible in most cases to extract what we need
* without a full parse.
*/ */
var Type1Parser = function type1Parser() { var Type1Parser = (function Type1ParserClosure() {
/* /*
* Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence * Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence
* of Plaintext Bytes. The function took a key as a parameter which can be * of Plaintext Bytes. The function took a key as a parameter which can be
@ -5081,271 +5084,258 @@ var Type1Parser = function type1Parser() {
return decryptedString.slice(discardNumber); return decryptedString.slice(discardNumber);
} }
/* function isSpecial(c) {
* Returns an object containing a Subrs array and a CharStrings return c === '/' ||
* array extracted from and eexec encrypted block of data c === '[' || c === ']' ||
*/ c === '{' || c === '}' ||
function readNumberArray(str, index) { c === '(' || c === ')';
var start = index;
while (str[index++] != '[')
start++;
start++;
var count = 0;
while (str[index++] != ']')
count++;
str = str.substr(start, count);
str = str.trim();
// Remove adjacent spaces
str = str.replace(/\s+/g, ' ');
var array = str.split(' ');
for (var i = 0, ii = array.length; i < ii; i++)
array[i] = parseFloat(array[i] || 0);
return array;
} }
function readNumber(str, index) { function Type1Parser(stream, encrypted) {
while (str[index] == ' ') if (encrypted) {
index++; stream = new Stream(decrypt(stream.getBytes(), EEXEC_ENCRYPT_KEY, 4));
var start = index;
var count = 0;
while (str[index++] != ' ')
count++;
return parseFloat(str.substr(start, count) || 0);
}
function readBoolean(str, index) {
while (str[index] == ' ')
index++;
var start = index;
var count = 0;
var length = str.length;
while (index < length && str[index++] != ' ') {
count++;
} }
this.stream = stream;
// Use 1 and 0 since that's what type2 charstrings use.
return str.substr(start, count) === 'true' ? 1 : 0;
} }
Type1Parser.prototype = {
function isSeparator(c) { readNumberArray: function Type1Parser_readNumberArray() {
return c == ' ' || c == '\n' || c == '\x0d'; this.getToken(); // read '[' or '{' (arrays can start with either)
} var array = [];
while (true) {
this.extractFontProgram = function Type1Parser_extractFontProgram(stream) { var token = this.getToken();
var eexec = decrypt(stream, EEXEC_ENCRYPT_KEY, 4); if (token === null || token === ']' || token === '}') {
var eexecStr = ''; break;
for (var i = 0, ii = eexec.length; i < ii; i++)
eexecStr += String.fromCharCode(eexec[i]);
var glyphsSection = false, subrsSection = false;
var subrs = [], charstrings = [];
var program = {
subrs: [],
charstrings: [],
properties: {
'privateData': {
'lenIV': 4
} }
array.push(parseFloat(token || 0));
} }
}; return array;
},
var glyph = ''; readNumber: function Type1Parser_readNumber() {
var token = ''; var token = this.getToken();
var length = 0; return parseFloat(token || 0);
},
var c = ''; readInt: function Type1Parser_readInt() {
var count = eexecStr.length; // Use '| 0' to prevent setting a double into length such as the double
for (var i = 0; i < count; i++) { // does not flow into the loop variable.
var getToken = function getToken() { var token = this.getToken();
while (i < count && isSeparator(eexecStr[i])) return parseInt(token || 0, 10) | 0;
++i; },
var token = ''; readBoolean: function Type1Parser_readBoolean() {
while (i < count && !isSeparator(eexecStr[i])) var token = this.getToken();
token += eexecStr[i++];
return token; // Use 1 and 0 since that's what type2 charstrings use.
}; return token === 'true' ? 1 : 0;
var c = eexecStr[i]; },
if ((glyphsSection || subrsSection) && getToken: function Type1Parser_getToken() {
(token == 'RD' || token == '-|')) { // Eat whitespace and comments.
i++; var comment = false;
var data = eexec.slice(i, i + length); var ch;
var lenIV = program.properties.privateData['lenIV']; var stream = this.stream;
var encoded = decrypt(data, CHAR_STRS_ENCRYPT_KEY, lenIV); while (true) {
if ((ch = stream.lookChar()) === null)
return null;
if (glyphsSection) { if (comment) {
charstrings.push({ if (ch === '\x0a' || ch === '\x0d') {
glyph: glyph, comment = false;
encoded: encoded
});
} else {
subrs.push(encoded);
}
i += length;
token = '';
} else if (isSeparator(c)) {
// Use '| 0' to prevent setting a double into length such as the double
// does not flow into the loop variable.
length = parseInt(token, 10) | 0;
token = '';
} else {
token += c;
if (!glyphsSection) {
switch (token) {
case '/CharString':
glyphsSection = true;
break;
case '/Subrs':
++i;
var num = parseInt(getToken(), 10);
getToken(); // read in 'array'
for (var j = 0; j < num; ++j) {
var t = getToken(); // read in 'dup'
if (t == 'ND' || t == '|-' || t == 'noaccess')
break;
var index = parseInt(getToken(), 10);
if (index > j)
j = index;
var length = parseInt(getToken(), 10);
getToken(); // read in 'RD'
var data = eexec.slice(i + 1, i + 1 + length);
var lenIV = program.properties.privateData['lenIV'];
var encoded = decrypt(data, CHAR_STRS_ENCRYPT_KEY, lenIV);
i = i + 1 + length;
t = getToken(); // read in 'NP'
if (t == 'noaccess')
getToken(); // read in 'put'
subrs[index] = encoded;
}
break;
case '/BlueValues':
case '/OtherBlues':
case '/FamilyBlues':
case '/FamilyOtherBlues':
var blueArray = readNumberArray(eexecStr, i + 1);
// *Blue* values may contain invalid data: disables reading of
// those values when hinting is disabled.
if (blueArray.length > 0 && (blueArray.length % 2) === 0 &&
HINTING_ENABLED) {
program.properties.privateData[token.substring(1)] = blueArray;
}
break;
case '/StemSnapH':
case '/StemSnapV':
program.properties.privateData[token.substring(1)] =
readNumberArray(eexecStr, i + 1);
break;
case '/StdHW':
case '/StdVW':
program.properties.privateData[token.substring(1)] =
readNumberArray(eexecStr, i + 1)[0];
break;
case '/BlueShift':
case '/lenIV':
case '/BlueFuzz':
case '/BlueScale':
case '/LanguageGroup':
case '/ExpansionFactor':
program.properties.privateData[token.substring(1)] =
readNumber(eexecStr, i + 1);
break;
case '/ForceBold':
program.properties.privateData[token.substring(1)] =
readBoolean(eexecStr, i + 1);
break;
} }
} else if (c == '/') { } else if (ch === '%') {
token = glyph = ''; comment = true;
while ((c = eexecStr[++i]) != ' ') } else if (!Lexer.isSpace(ch)) {
glyph += c; break;
} }
stream.skip();
} }
} if (isSpecial(ch)) {
stream.skip();
for (var i = 0; i < charstrings.length; i++) { return ch;
var glyph = charstrings[i].glyph;
var encoded = charstrings[i].encoded;
var charString = new Type1CharString();
var error = charString.convert(encoded, subrs);
var output = charString.output;
if (error) {
// It seems when FreeType encounters an error while evaluating a glyph
// that it completely ignores the glyph so we'll mimic that behaviour
// here and put an endchar to make the validator happy.
output = [14];
} }
program.charstrings.push({ var token = '';
glyph: glyph, do {
data: output, token += ch;
seac: charString.seac, stream.skip();
lsb: charString.lsb, ch = stream.lookChar();
width: charString.width } while (ch !== null && !Lexer.isSpace(ch) && !isSpecial(ch));
}); return token;
} },
return program; /*
}; * Returns an object containing a Subrs array and a CharStrings
* array extracted from and eexec encrypted block of data
*/
extractFontProgram: function Type1Parser_extractFontProgram() {
var stream = this.stream;
this.extractFontHeader = function Type1Parser_extractFontHeader(stream, var subrs = [], charstrings = [];
properties) { var program = {
var headerString = ''; subrs: [],
for (var i = 0, ii = stream.length; i < ii; i++) charstrings: [],
headerString += String.fromCharCode(stream[i]); properties: {
'privateData': {
var token = ''; 'lenIV': 4
var count = headerString.length; }
for (var i = 0; i < count; i++) {
var getToken = function getToken() {
var character = headerString[i];
while (i < count && (isSeparator(character) || character == '/'))
character = headerString[++i];
var token = '';
while (i < count && !(isSeparator(character) || character == '/')) {
token += character;
character = headerString[++i];
} }
return token;
}; };
var token;
var c = headerString[i]; while ((token = this.getToken()) !== null) {
if (isSeparator(c)) { if (token !== '/') {
continue;
}
token = this.getToken();
switch (token) { switch (token) {
case '/FontMatrix': case 'CharStrings':
var matrix = readNumberArray(headerString, i + 1); // The number immediately following CharStrings must be greater or
// equal to the number of CharStrings.
this.getToken();
this.getToken(); // read in 'dict'
this.getToken(); // read in 'dup'
this.getToken(); // read in 'begin'
while(true) {
token = this.getToken();
if (token === null || token === 'end') {
break;
}
if (token !== '/') {
continue;
}
var glyph = this.getToken();
var length = this.readInt();
this.getToken(); // read in 'RD' or '-|'
var data = stream.makeSubStream(stream.pos + 1, length);
var lenIV = program.properties.privateData['lenIV'];
var encoded = decrypt(data.getBytes(), CHAR_STRS_ENCRYPT_KEY,
lenIV);
// Skip past the required space and binary data.
stream.skip(1 + length);
token = this.getToken(); // read in 'ND' or '|-'
if (token === 'noaccess') {
this.getToken(); // read in 'def'
}
charstrings.push({
glyph: glyph,
encoded: encoded
});
}
break;
case 'Subrs':
var num = this.readInt();
this.getToken(); // read in 'array'
for (var j = 0; j < num; ++j) {
token = this.getToken(); // read in 'dup'
var index = this.readInt();
if (index > j)
j = index;
var length = this.readInt();
this.getToken(); // read in 'RD' or '-|'
var data = stream.makeSubStream(stream.pos + 1, length);
var lenIV = program.properties.privateData['lenIV'];
var encoded = decrypt(data.getBytes(), CHAR_STRS_ENCRYPT_KEY,
lenIV);
// Skip past the required space and binary data.
stream.skip(1 + length);
token = this.getToken(); // read in 'NP' or '|'
if (token === 'noaccess') {
this.getToken(); // read in 'put'
}
subrs[index] = encoded;
}
break;
case 'BlueValues':
case 'OtherBlues':
case 'FamilyBlues':
case 'FamilyOtherBlues':
var blueArray = this.readNumberArray();
// *Blue* values may contain invalid data: disables reading of
// those values when hinting is disabled.
if (blueArray.length > 0 && (blueArray.length % 2) === 0 &&
HINTING_ENABLED) {
program.properties.privateData[token] = blueArray;
}
break;
case 'StemSnapH':
case 'StemSnapV':
program.properties.privateData[token] = this.readNumberArray();
break;
case 'StdHW':
case 'StdVW':
program.properties.privateData[token] =
this.readNumberArray()[0];
break;
case 'BlueShift':
case 'lenIV':
case 'BlueFuzz':
case 'BlueScale':
case 'LanguageGroup':
case 'ExpansionFactor':
program.properties.privateData[token] = this.readNumber();
break;
case 'ForceBold':
program.properties.privateData[token] = this.readBoolean();
break;
}
}
for (var i = 0; i < charstrings.length; i++) {
var glyph = charstrings[i].glyph;
var encoded = charstrings[i].encoded;
var charString = new Type1CharString();
var error = charString.convert(encoded, subrs);
var output = charString.output;
if (error) {
// It seems when FreeType encounters an error while evaluating a glyph
// that it completely ignores the glyph so we'll mimic that behaviour
// here and put an endchar to make the validator happy.
output = [14];
}
program.charstrings.push({
glyph: glyph,
data: output,
seac: charString.seac,
lsb: charString.lsb,
width: charString.width
});
}
return program;
},
extractFontHeader: function Type1Parser_extractFontHeader(properties) {
var token;
while ((token = this.getToken()) !== null) {
if (token !== '/') {
continue;
}
token = this.getToken();
switch (token) {
case 'FontMatrix':
var matrix = this.readNumberArray();
properties.fontMatrix = matrix; properties.fontMatrix = matrix;
break; break;
case '/Encoding': case 'Encoding':
var encodingArg = getToken(); var encodingArg = this.getToken();
var encoding; var encoding;
if (!/^\d+$/.test(encodingArg)) { if (!/^\d+$/.test(encodingArg)) {
// encoding name is specified // encoding name is specified
encoding = Encodings[encodingArg]; encoding = Encodings[encodingArg];
} else { } else {
encoding = []; encoding = [];
var size = parseInt(encodingArg, 10); var size = parseInt(encodingArg, 10) | 0;
getToken(); // read in 'array' this.getToken(); // read in 'array'
for (var j = 0; j < size; j++) { for (var j = 0; j < size; j++) {
var token = getToken(); var token = this.getToken();
if (token == 'dup') { if (token === 'dup') {
var index = parseInt(getToken(), 10); var index = this.readInt();
var glyph = getToken(); this.getToken(); // read in '/'
var glyph = this.getToken();
encoding[index] = glyph; encoding[index] = glyph;
getToken(); // read the in 'put' this.getToken(); // read the in 'put'
} }
} }
} }
@ -5355,13 +5345,12 @@ var Type1Parser = function type1Parser() {
} }
break; break;
} }
token = '';
} else {
token += c;
} }
} }
}; };
};
return Type1Parser;
})();
/** /**
* The CFF class takes a Type1 file and wrap it into a * The CFF class takes a Type1 file and wrap it into a
@ -5435,17 +5424,17 @@ var CFFStandardStrings = [
'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman', 'Semibold' 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman', 'Semibold'
]; ];
var type1Parser = new Type1Parser();
// Type1Font is also a CIDFontType0. // Type1Font is also a CIDFontType0.
var Type1Font = function Type1Font(name, file, properties) { var Type1Font = function Type1Font(name, file, properties) {
// Get the data block containing glyphs and subrs informations // Get the data block containing glyphs and subrs informations
var headerBlock = file.getBytes(properties.length1); var headerBlock = new Stream(file.getBytes(properties.length1));
type1Parser.extractFontHeader(headerBlock, properties); var headerBlockParser = new Type1Parser(headerBlock);
headerBlockParser.extractFontHeader(properties);
// Decrypt the data blocks and retrieve it's content // Decrypt the data blocks and retrieve it's content
var eexecBlock = file.getBytes(properties.length2); var eexecBlock = new Stream(file.getBytes(properties.length2));
var data = type1Parser.extractFontProgram(eexecBlock); var eexecBlockParser = new Type1Parser(eexecBlock, true);
var data = eexecBlockParser.extractFontProgram();
for (var info in data.properties) for (var info in data.properties)
properties[info] = data.properties[info]; properties[info] = data.properties[info];

View File

@ -1,7 +1,7 @@
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ /* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
/* globals expect, it, describe, CFFCompiler, CFFParser, CFFIndex, CFFStrings, /* globals expect, it, describe, CFFCompiler, CFFParser, CFFIndex, CFFStrings,
SEAC_ANALYSIS_ENABLED:true */ SEAC_ANALYSIS_ENABLED:true, Type1Parser, StringStream */
'use strict'; 'use strict';
@ -297,4 +297,95 @@ describe('font', function() {
}); });
// TODO a lot more compiler tests // TODO a lot more compiler tests
}); });
describe('Type1Parser', function() {
it('splits tokens', function() {
var stream = new StringStream('/BlueValues[-17 0]noaccess def');
var parser = new Type1Parser(stream);
expect(parser.getToken()).toEqual('/');
expect(parser.getToken()).toEqual('BlueValues');
expect(parser.getToken()).toEqual('[');
expect(parser.getToken()).toEqual('-17');
expect(parser.getToken()).toEqual('0');
expect(parser.getToken()).toEqual(']');
expect(parser.getToken()).toEqual('noaccess');
expect(parser.getToken()).toEqual('def');
expect(parser.getToken()).toEqual(null);
});
it('handles glued tokens', function() {
var stream = new StringStream('dup/CharStrings');
var parser = new Type1Parser(stream);
expect(parser.getToken()).toEqual('dup');
expect(parser.getToken()).toEqual('/');
expect(parser.getToken()).toEqual('CharStrings');
});
it('ignores whitespace', function() {
var stream = new StringStream('\nab c\t');
var parser = new Type1Parser(stream);
expect(parser.getToken()).toEqual('ab');
expect(parser.getToken()).toEqual('c');
});
it('parses numbers', function() {
var stream = new StringStream('123');
var parser = new Type1Parser(stream);
expect(parser.readNumber()).toEqual(123);
});
it('parses booleans', function() {
var stream = new StringStream('true false');
var parser = new Type1Parser(stream);
expect(parser.readBoolean()).toEqual(1);
expect(parser.readBoolean()).toEqual(0);
});
it('parses number arrays', function() {
var stream = new StringStream('[1 2]');
var parser = new Type1Parser(stream);
expect(parser.readNumberArray()).toEqual([1, 2]);
// Variation on spacing.
var stream = new StringStream('[ 1 2 ]');
parser = new Type1Parser(stream);
expect(parser.readNumberArray()).toEqual([1, 2]);
});
it('skips comments', function() {
var stream = new StringStream(
'%!PS-AdobeFont-1.0: CMSY10 003.002\n' +
'%%Title: CMSY10\n' +
'%Version: 003.002\n' +
'FontDirectory');
var parser = new Type1Parser(stream);
expect(parser.getToken()).toEqual('FontDirectory');
});
it('parses font program', function() {
var stream = new StringStream(
'/ExpansionFactor 99\n' +
'/Subrs 1 array\n' +
'dup 0 1 RD x noaccess put\n'+
'/CharStrings 46 dict dup begin\n' +
'/.notdef 1 RD x ND' + '\n' +
'end');
var parser = new Type1Parser(stream);
var program = parser.extractFontProgram();
expect(program.charstrings.length).toEqual(1);
expect(program.properties.privateData.ExpansionFactor).toEqual(99);
});
it('parses font header font matrix', function() {
var stream = new StringStream(
'/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def\n');
var parser = new Type1Parser(stream);
var props = {};
var program = parser.extractFontHeader(props);
expect(props.fontMatrix).toEqual([0.001, 0, 0, 0.001, 0, 0]);
});
it('parses font header encoding', function() {
var stream = new StringStream(
'/Encoding 256 array\n' +
'0 1 255 {1 index exch /.notdef put} for\n' +
'dup 33 /arrowright put\n' +
'readonly def\n');
var parser = new Type1Parser(stream);
var props = {};
var program = parser.extractFontHeader(props);
expect(props.baseEncoding[33]).toEqual('arrowright');
});
});
}); });