pdf.js/utils/fonts_utils.js

411 lines
12 KiB
JavaScript
Raw Normal View History

2011-09-13 02:37:33 +09:00
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2011-06-19 15:21:30 +09:00
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
2011-07-06 15:06:45 +09:00
'use strict';
2011-08-26 02:20:19 +09:00
/*
* The Type2 reader code below is only used for debugging purpose since Type2
* is only a CharString format and is never used directly as a Font file.
*
* So the code here is useful for dumping the data content of a .cff file in
* order to investigate the similarity between a Type1 CharString and a Type2
2011-06-13 12:30:02 +09:00
* CharString or to understand the structure of the CFF format.
*/
2011-08-26 02:20:19 +09:00
/*
* Build a charset by assigning the glyph name and the human readable form
* of the glyph data.
*/
function readCharset(aStream, aCharstrings) {
var charset = {};
var format = aStream.getByte();
var count = aCharstrings.length - 1;
if (format == 0) {
2011-07-06 15:06:45 +09:00
charset['.notdef'] = readCharstringEncoding(aCharstrings[0]);
for (var i = 1; i < count + 1; i++) {
var sid = aStream.getByte() << 8 | aStream.getByte();
charset[CFFStrings[sid]] = readCharstringEncoding(aCharstrings[i]);
//log(CFFStrings[sid] + "::" + charset[CFFStrings[sid]]);
}
} else if (format == 1) {
for (var i = 1; i < count + 1; i++) {
var first = aStream.getByte();
first = (first << 8) | aStream.getByte();
var numLeft = aStream.getByte();
for (var j = 0; j <= numLeft; j++) {
var sid = first++;
if (CFFStrings[sid] == 'three')
log(aCharstrings[j]);
charset[CFFStrings[sid]] = readCharstringEncoding(aCharstrings[j]);
}
}
} else {
2011-07-06 15:06:45 +09:00
error('Invalid charset format');
}
return charset;
2011-07-06 15:06:45 +09:00
}
2011-08-26 02:20:19 +09:00
/*
* Take a Type2 binary charstring as input and transform it to a human
* readable representation as specified by the 'The Type 2 Charstring Format',
* chapter 3.1.
*/
function readCharstringEncoding(aString) {
if (!aString)
return '';
var charstringTokens = [];
var count = aString.length;
for (var i = 0; i < count; ) {
var value = aString[i++];
var token = null;
if (value < 0) {
continue;
} else if (value <= 11) {
token = CFFEncodingMap[value];
} else if (value == 12) {
token = CFFEncodingMap[value][aString[i++]];
} else if (value <= 18) {
token = CFFEncodingMap[value];
} else if (value <= 20) {
var mask = aString[i++];
token = CFFEncodingMap[value];
} else if (value <= 27) {
token = CFFEncodingMap[value];
} else if (value == 28) {
token = aString[i++] << 8 | aString[i++];
} else if (value <= 31) {
token = CFFEncodingMap[value];
} else if (value < 247) {
2011-09-16 05:32:44 +09:00
token = parseInt(value, 10) - 139;
} else if (value < 251) {
token = (value - 247) * 256 + aString[i++] + 108;
} else if (value < 255) {
token = -(value - 251) * 256 - aString[i++] - 108;
} else {// value == 255
token = aString[i++] << 24 | aString[i++] << 16 |
aString[i++] << 8 | aString[i];
}
charstringTokens.push(token);
}
return charstringTokens;
2011-07-06 15:06:45 +09:00
}
2011-08-26 02:20:19 +09:00
/*
* Take a binary DICT Data as input and transform it into a human readable
* form as specified by 'The Compact Font Format Specification', chapter 5.
*/
function readFontDictData(aString, aMap) {
var fontDictDataTokens = [];
var count = aString.length;
for (var i = 0; i < count; i) {
var value = aString[i++];
var token = null;
if (value == 12) {
token = aMap[value][aString[i++]];
} else if (value == 28) {
token = aString[i++] << 8 | aString[i++];
} else if (value == 29) {
token = aString[i++] << 24 |
aString[i++] << 16 |
2011-07-06 15:06:45 +09:00
aString[i++] << 8 |
aString[i++];
} else if (value == 30) {
2011-07-06 15:06:45 +09:00
token = '';
var parsed = false;
while (!parsed) {
var byte = aString[i++];
2011-09-16 05:32:44 +09:00
var nibbles = [parseInt(byte / 16, 10), parseInt(byte % 16, 10)];
for (var j = 0; j < nibbles.length; j++) {
var nibble = nibbles[j];
switch (nibble) {
case 0xA:
2011-07-06 15:06:45 +09:00
token += '.';
break;
case 0xB:
2011-07-06 15:06:45 +09:00
token += 'E';
break;
case 0xC:
2011-07-06 15:06:45 +09:00
token += 'E-';
break;
case 0xD:
break;
case 0xE:
2011-07-06 15:06:45 +09:00
token += '-';
break;
case 0xF:
parsed = true;
break;
default:
token += nibble;
break;
}
}
2011-07-06 15:06:45 +09:00
}
token = parseFloat(token);
} else if (value <= 31) {
token = aMap[value];
} else if (value <= 246) {
2011-09-16 05:32:44 +09:00
token = parseInt(value, 10) - 139;
} else if (value <= 250) {
token = (value - 247) * 256 + aString[i++] + 108;
} else if (value <= 254) {
token = -(value - 251) * 256 - aString[i++] - 108;
} else if (value == 255) {
2011-07-06 15:06:45 +09:00
error('255 is not a valid DICT command');
}
fontDictDataTokens.push(token);
}
return fontDictDataTokens;
2011-07-06 15:06:45 +09:00
}
2011-08-26 02:20:19 +09:00
/*
* Take a stream as input and return an array of objects.
* In CFF an INDEX is a structure with the following format:
* {
* count: 2 bytes (Number of objects stored in INDEX),
* offsize: 1 byte (Offset array element size),
* offset: [count + 1] bytes (Offsets array),
* data: - (Objects data)
* }
*
* More explanation are given in the 'CFF Font Format Specification',
* chapter 5.
*/
function readFontIndexData(aStream, aIsByte) {
var count = aStream.getByte() << 8 | aStream.getByte();
var offsize = aStream.getByte();
function getNextOffset() {
switch (offsize) {
case 0:
return 0;
case 1:
return aStream.getByte();
case 2:
return aStream.getByte() << 8 | aStream.getByte();
case 3:
return aStream.getByte() << 16 | aStream.getByte() << 8 |
aStream.getByte();
case 4:
return aStream.getByte() << 24 | aStream.getByte() << 16 |
aStream.getByte() << 8 | aStream.getByte();
}
2011-07-06 15:06:45 +09:00
error(offsize + ' is not a valid offset size');
2011-06-25 06:33:16 +09:00
return null;
2011-09-16 05:32:44 +09:00
}
var offsets = [];
for (var i = 0; i < count + 1; i++)
offsets.push(getNextOffset());
dump('Found ' + count + ' objects at offsets :' +
2011-07-06 15:06:45 +09:00
offsets + ' (offsize: ' + offsize + ')');
// Now extract the objects
var relativeOffset = aStream.pos;
var objects = [];
for (var i = 0; i < count; i++) {
var offset = offsets[i];
aStream.pos = relativeOffset + offset - 1;
var data = [];
var length = offsets[i + 1] - 1;
for (var j = offset - 1; j < length; j++)
data.push(aIsByte ? aStream.getByte() : aStream.getChar());
objects.push(data);
}
return objects;
2011-07-06 15:06:45 +09:00
}
var Type2Parser = function(aFilePath) {
var font = new Dict();
2011-06-13 12:30:02 +09:00
var xhr = new XMLHttpRequest();
2011-07-06 15:06:45 +09:00
xhr.open('GET', aFilePath, false);
xhr.mozResponseType = xhr.responseType = 'arraybuffer';
xhr.expected = (document.URL.indexOf('file:') == 0) ? 0 : 200;
2011-06-13 12:30:02 +09:00
xhr.send(null);
this.data = new Stream(xhr.mozResponseArrayBuffer || xhr.mozResponse ||
xhr.responseArrayBuffer || xhr.response);
// Turn on this flag for additional debugging logs
var debug = false;
function dump(aStr) {
if (debug)
log(aStr);
2011-09-16 05:32:44 +09:00
}
function parseAsToken(aString, aMap) {
var decoded = readFontDictData(aString, aMap);
var stack = [];
var count = decoded.length;
for (var i = 0; i < count; i++) {
var token = decoded[i];
if (IsNum(token)) {
stack.push(token);
} else {
switch (token.operand) {
2011-07-06 15:06:45 +09:00
case 'SID':
font.set(token.name, CFFStrings[stack.pop()]);
break;
2011-07-06 15:06:45 +09:00
case 'number number':
font.set(token.name, {
offset: stack.pop(),
size: stack.pop()
});
break;
2011-07-06 15:06:45 +09:00
case 'boolean':
font.set(token.name, stack.pop());
break;
2011-07-06 15:06:45 +09:00
case 'delta':
font.set(token.name, stack.pop());
break;
default:
if (token.operand && token.operand.length) {
var array = [];
for (var j = 0; j < token.operand.length; j++)
array.push(stack.pop());
font.set(token.name, array);
} else {
font.set(token.name, stack.pop());
}
break;
}
}
}
2011-09-16 05:32:44 +09:00
}
this.parse = function(aStream) {
2011-07-06 15:06:45 +09:00
font.set('major', aStream.getByte());
font.set('minor', aStream.getByte());
font.set('hdrSize', aStream.getByte());
font.set('offsize', aStream.getByte());
// Read the NAME Index
2011-07-06 15:06:45 +09:00
dump('Reading Index: Names');
font.set('Names', readFontIndexData(aStream));
dump('Names: ' + font.get('Names'));
// Read the Top Dict Index
2011-07-06 15:06:45 +09:00
dump('Reading Index: TopDict');
var topDict = readFontIndexData(aStream, true);
dump('TopDict: ' + topDict);
// Read the String Index
2011-07-06 15:06:45 +09:00
dump('Reading Index: Strings');
var strings = readFontIndexData(aStream);
dump('strings: ' + strings);
// Fill up the Strings dictionary with the new unique strings
for (var i = 0; i < strings.length; i++)
2011-07-06 15:06:45 +09:00
CFFStrings.push(strings[i].join(''));
// Parse the TopDict operator
var objects = [];
var count = topDict.length;
for (var i = 0; i < count; i++)
parseAsToken(topDict[i], CFFDictDataMap);
// Read the Global Subr Index that comes just after the Strings Index
// (cf. "The Compact Font Format Specification" Chapter 16)
2011-07-06 15:06:45 +09:00
dump('Reading Global Subr Index');
var subrs = readFontIndexData(aStream, true);
dump(subrs);
// Reading Private Dict
2011-07-06 15:06:45 +09:00
var priv = font.get('Private');
dump('Reading Private Dict (offset: ' + priv.offset +
2011-07-06 15:06:45 +09:00
' size: ' + priv.size + ')');
2011-06-25 06:33:16 +09:00
aStream.pos = priv.offset;
var privateDict = [];
2011-06-25 06:33:16 +09:00
for (var i = 0; i < priv.size; i++)
privateDict.push(aStream.getByte());
2011-07-06 15:06:45 +09:00
dump('private:' + privateDict);
parseAsToken(privateDict, CFFDictPrivateDataMap);
for (var p in font.map)
2011-07-06 15:06:45 +09:00
dump(p + '::' + font.get(p));
// Read CharStrings Index
2011-07-06 15:06:45 +09:00
var charStringsOffset = font.get('CharStrings');
dump('Read CharStrings Index (offset: ' + charStringsOffset + ')');
aStream.pos = charStringsOffset;
var charStrings = readFontIndexData(aStream, true);
2011-06-10 13:12:59 +09:00
// Read Charset
2011-07-06 15:06:45 +09:00
dump('Read Charset for ' + charStrings.length + ' glyphs');
var charsetEntry = font.get('charset');
if (charsetEntry == 0) {
2011-07-06 15:06:45 +09:00
error('Need to support CFFISOAdobeCharset');
} else if (charsetEntry == 1) {
2011-07-06 15:06:45 +09:00
error('Need to support CFFExpert');
} else if (charsetEntry == 2) {
2011-07-06 15:06:45 +09:00
error('Need to support CFFExpertSubsetCharset');
} else {
aStream.pos = charsetEntry;
var charset = readCharset(aStream, charStrings);
}
2011-09-16 05:32:44 +09:00
};
};
2011-06-13 12:30:02 +09:00
/*
* To try the Type2 decoder on a local file in the current directory:
*
* var cff = new Type2Parser("file.cff");
* cff.parse(this.data);
*
* To try the Type2 decoder on a custom built CFF array:
*
* var file = new Uint8Array(cffFileArray, 0, cffFileSize);
* var parser = new Type2Parser();
* parser.parse(new Stream(file));
*
*/
2011-08-26 02:20:19 +09:00
/*
* Write to a file to the disk (works only on Firefox in privilege mode)
* but this is useful for dumping a font file to the disk and check with
* fontforge or the ots program what's wrong with the file.
*
* writeToFile(fontData, "/tmp/pdf.js." + fontCount + ".cff");
*/
2011-06-13 12:30:02 +09:00
function writeToFile(aBytes, aFilePath) {
2011-07-06 15:06:45 +09:00
if (!('netscape' in window))
return;
2011-07-06 15:06:45 +09:00
netscape.security.PrivilegeManager.enablePrivilege('UniversalXPConnect');
var Cc = Components.classes,
Ci = Components.interfaces;
2011-07-06 15:06:45 +09:00
var file = Cc['@mozilla.org/file/local;1'].createInstance(Ci.nsILocalFile);
file.initWithPath(aFilePath);
2011-07-06 15:06:45 +09:00
var stream = Cc['@mozilla.org/network/file-output-stream;1']
.createInstance(Ci.nsIFileOutputStream);
stream.init(file, 0x04 | 0x08 | 0x20, 0x180, 0);
2011-07-06 15:06:45 +09:00
var bos = Cc['@mozilla.org/binaryoutputstream;1']
2011-06-13 12:30:02 +09:00
.createInstance(Ci.nsIBinaryOutputStream);
bos.setOutputStream(stream);
bos.writeByteArray(aBytes, aBytes.length);
stream.close();
2011-07-06 15:06:45 +09:00
}