CMaps binary packing

This commit is contained in:
Yury Delendik 2014-03-14 13:22:02 -05:00
parent e5cd75083f
commit 69efd9cb96
13 changed files with 1156 additions and 35 deletions

437
external/cmapscompress/compress.js vendored Normal file
View File

@ -0,0 +1,437 @@
/* Copyright 2014 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var fs = require('fs');
var path = require('path');
var parseAdobeCMap = require('./parse.js').parseAdobeCMap;
var optimizeCMap = require('./optimize.js').optimizeCMap;
function compressCmap(srcPath, destPath, verify) {
var content = fs.readFileSync(srcPath).toString();
var inputData = parseAdobeCMap(content);
optimizeCMap(inputData);
var out = writeByte((inputData.type << 1) | inputData.wmode);
if (inputData.comment) {
out += writeByte(0xE0) + writeString(inputData.comment);
}
if (inputData.usecmap) {
out += writeByte(0xE1) + writeString(inputData.usecmap);
}
var i = 0;
while (i < inputData.body.length) {
var item = inputData.body[i++], subitems = item.items;
var first = item.items[0];
var sequence = item.sequence === true;
var flags = (item.type << 5) | (sequence ? 0x10 : 0);
var nextStart, nextCode;
switch (item.type) {
case 0:
out += writeByte(flags | getHexSize(first.start)) + writeNumber(subitems.length);
out += first.start + writeNumber(subHex(first.end, first.start));
nextStart = incHex(first.end);
for (var j = 1; j < subitems.length; j++) {
out += writeNumber(subHex(subitems[j].start, nextStart)) +
writeNumber(subHex(subitems[j].end, subitems[j].start));
nextStart = incHex(subitems[j].end);
}
break;
case 1:
out += writeByte(flags | getHexSize(first.start)) + writeNumber(subitems.length);
out += first.start + writeNumber(subHex(first.end, first.start)) + writeNumber(first.code);
nextStart = incHex(first.end);
for (var j = 1; j < subitems.length; j++) {
out += writeNumber(subHex(subitems[j].start, nextStart)) +
writeNumber(subHex(subitems[j].end, subitems[j].start)) +
writeNumber(subitems[j].code);
nextStart = incHex(subitems[j].end);
}
break;
case 2:
out += writeByte(flags | getHexSize(first.char)) + writeNumber(subitems.length);
out += first.char + writeNumber(first.code);
nextStart = incHex(first.char);
nextCode = first.code + 1;
for (var j = 1; j < subitems.length; j++) {
out += (sequence ? '' : writeNumber(subHex(subitems[j].char, nextStart))) +
writeSigned(subitems[j].code - nextCode);
nextStart = incHex(subitems[j].char);
nextCode = item.items[j].code + 1;
}
break;
case 3:
out += writeByte(flags | getHexSize(first.start)) + writeNumber(subitems.length);
out += first.start + writeNumber(subHex(first.end, first.start)) + writeNumber(first.code);
nextStart = incHex(first.end);
for (var j = 1; j < subitems.length; j++) {
out += (sequence ? '' : writeNumber(subHex(subitems[j].start, nextStart))) +
writeNumber(subHex(subitems[j].end, subitems[j].start)) +
writeNumber(subitems[j].code);
nextStart = incHex(subitems[j].end);
}
break;
case 4:
out += writeByte(flags | getHexSize(first.code)) + writeNumber(subitems.length);
out += first.char + first.code;
nextStart = incHex(first.char);
nextCode = incHex(first.code);
for (var j = 1; j < subitems.length; j++) {
out += (sequence ? '' : writeNumber(subHex(subitems[j].char, nextStart))) +
writeSigned(subHex(subitems[j].code, nextCode));
nextStart = incHex(subitems[j].char);
nextCode = incHex(subitems[j].code);
}
break;
case 5:
out += writeByte(flags | getHexSize(first.code)) + writeNumber(subitems.length);
out += first.start + writeNumber(subHex(first.end, first.start)) + first.code;
nextStart = incHex(first.end);
for (var j = 1; j < subitems.length; j++) {
out += (sequence ? '' : writeNumber(subHex(subitems[j].start, nextStart))) +
writeNumber(subHex(subitems[j].end, subitems[j].start)) +
subitems[j].code;
nextStart = incHex(subitems[j].end);
}
break;
}
}
fs.writeFileSync(destPath, new Buffer(out, 'hex'));
if (verify) {
var result2 = parseCMap(out);
var isGood = JSON.stringify(inputData) == JSON.stringify(result2);
if (!isGood) {
throw new Error('Extracted data does not match the expected result');
}
}
return {
orig: fs.statSync(srcPath).size,
packed: out.length >> 1
};
}
function parseCMap(binaryData) {
var reader = {
buffer: binaryData,
pos: 0,
end: binaryData.length,
readByte: function () {
if (this.pos >= this.end) {
return -1;
}
var d1 = fromHexDigit(this.buffer[this.pos]);
var d2 = fromHexDigit(this.buffer[this.pos + 1]);
this.pos += 2;
return (d1 << 4) | d2;
},
readNumber: function () {
var n = 0;
var last;
do {
var b = this.readByte();
last = !(b & 0x80);
n = (n << 7) | (b & 0x7F);
} while (!last);
return n;
},
readSigned: function () {
var n = this.readNumber();
return (n & 1) ? -(n >>> 1) - 1 : n >>> 1;
},
readHex: function (size) {
var lengthInChars = (size + 1) << 1;
var s = this.buffer.substr(this.pos, lengthInChars);
this.pos += lengthInChars;
return s;
},
readHexNumber: function (size) {
var lengthInChars = (size + 1) << 1;
var stack = [];
do {
var b = this.readByte();
last = !(b & 0x80);
stack.push(b & 0x7F);
} while (!last);
var s = '', buffer = 0, bufferSize = 0;
while (s.length < lengthInChars) {
while (bufferSize < 4 && stack.length > 0) {
buffer = (stack.pop() << bufferSize) | buffer;
bufferSize += 7;
}
s = toHexDigit(buffer & 15) + s;
buffer >>= 4;
bufferSize -= 4;
}
return s;
},
readHexSigned: function (size) {
var num = this.readHexNumber(size);
var sign = fromHexDigit(num[num.length - 1]) & 1 ? 15 : 0;
var c = 0;
var result = '';
for (var i = 0; i < num.length; i++) {
c = (c << 4) | fromHexDigit(num[i]);
result += toHexDigit(sign ? (c >> 1) ^ sign : (c >> 1));
c &= 1;
}
return result;
},
readString: function () {
var len = this.readNumber();
var s = '';
for (var i = 0; i < len; i++) {
s += String.fromCharCode(this.readNumber());
}
return s;
}
};
var header = reader.readByte();
var result = {
type: header >> 1,
wmode: header & 1,
comment: null,
usecmap: null,
body: []
};
var b;
while ((b = reader.readByte()) >= 0) {
var type = b >> 5;
if (type === 7) {
switch (b & 0x1F) {
case 0:
result.comment = reader.readString();
break;
case 1:
result.usecmap = reader.readString();
break;
}
continue;
}
var sequence = !!(b & 0x10);
var dataSize = b & 15;
var subitems = [];
var item = {
type: type,
items: subitems
};
if (sequence) {
item.sequence = true;
}
var ucs2DataSize = 1;
var subitemsCount = reader.readNumber();
var start, end, code, char;
switch (type) {
case 0:
start = reader.readHex(dataSize);
end = addHex(reader.readHexNumber(dataSize), start);
subitems.push({start: start, end: end});
for (var i = 1; i < subitemsCount; i++) {
start = addHex(reader.readHexNumber(dataSize), incHex(end));
end = addHex(reader.readHexNumber(dataSize), start);
subitems.push({start: start, end: end});
}
break;
case 1:
start = reader.readHex(dataSize);
end = addHex(reader.readHexNumber(dataSize), start);
code = reader.readNumber();
subitems.push({start: start, end: end, code: code});
for (var i = 1; i < subitemsCount; i++) {
start = addHex(reader.readHexNumber(dataSize), incHex(end));
end = addHex(reader.readHexNumber(dataSize), start);
code = reader.readNumber();
subitems.push({start: start, end: end, code: code});
}
break;
case 2:
char = reader.readHex(dataSize);
code = reader.readNumber();
subitems.push({char: char, code: code});
for (var i = 1; i < subitemsCount; i++) {
char = sequence ? incHex(char) : addHex(reader.readHexNumber(dataSize), incHex(char));
code = reader.readSigned() + (code + 1);
subitems.push({char: char, code: code});
}
break;
case 3:
start = reader.readHex(dataSize);
end = addHex(reader.readHexNumber(dataSize), start);
code = reader.readNumber();
subitems.push({start: start, end: end, code: code});
for (var i = 1; i < subitemsCount; i++) {
start = sequence ? incHex(end) : addHex(reader.readHexNumber(dataSize), incHex(end));
end = addHex(reader.readHexNumber(dataSize), start);
code = reader.readNumber();
subitems.push({start: start, end: end, code: code});
}
break;
case 4:
char = reader.readHex(ucs2DataSize);
code = reader.readHex(dataSize);
subitems.push({char: char, code: code});
for (var i = 1; i < subitemsCount; i++) {
char = sequence ? incHex(char) : addHex(reader.readHexNumber(ucs2DataSize), incHex(char));
code = addHex(reader.readHexSigned(dataSize), incHex(code));
subitems.push({char: char, code: code});
}
break;
case 5:
start = reader.readHex(ucs2DataSize);
end = addHex(reader.readHexNumber(ucs2DataSize), start);
code = reader.readHex(dataSize);
subitems.push({start: start, end: end, code: code});
for (var i = 1; i < subitemsCount; i++) {
start = sequence ? incHex(end) : addHex(reader.readHexNumber(ucs2DataSize), incHex(end));
end = addHex(reader.readHexNumber(ucs2DataSize), start);
code = reader.readHex(dataSize);
subitems.push({start: start, end: end, code: code});
}
break;
default:
throw new Error('Unknown type: ' + type)
}
result.body.push(item);
}
return result;
}
function toHexDigit(n) {
return n.toString(16);
}
function fromHexDigit(s) {
return parseInt(s, 16);
}
function getHexSize(s) {
return (s.length >> 1) - 1;
}
function writeByte(b) {
return toHexDigit((b >> 4) & 15) + toHexDigit(b & 15);
}
function writeNumber(n) {
if (typeof n === 'string') {
var s = '', buffer = 0, bufferSize = 0;
var i = n.length;
while (i > 0) {
--i;
buffer = (fromHexDigit(n[i]) << bufferSize) | buffer;
bufferSize += 4;
if (bufferSize >= 7) {
s = writeByte((buffer & 0x7f) | (s.length > 0 ? 0x80 : 0)) + s;
buffer >>>= 7;
bufferSize -= 7;
}
}
if (buffer > 0) {
s = writeByte((buffer & 0x7f) | (s.length > 0 ? 0x80 : 0)) + s;
}
while (s.indexOf('80') === 0) {
s = s.substr(2);
}
return s;
} else {
var s = writeByte(n & 0x7F);
n >>>= 7;
while (n > 0) {
s = writeByte((n & 0x7F) | 0x80) + s;
n >>>= 7;
}
return s;
}
}
function writeSigned(n) {
if (typeof n === 'string') {
var t = '';
var c = fromHexDigit(n[0]);
var neg = c >= 8;
c = neg ? (c ^ 15) : c;
for (var i = 1; i < n.length; i++) {
var d = fromHexDigit(n[i]);
c = (c << 4) | (neg ? (d ^ 15) : d);
t += toHexDigit(c >> 3);
c = c & 7;
}
t += toHexDigit((c << 1) | (neg ? 1 : 0));
return writeNumber(t);
}
return n < 0 ? writeNumber(-2 * n - 1) : writeNumber(2 * n);
}
function writeString(s) {
var t = writeNumber(s.length);
for (var i = 0; i < s.length; i++) {
t += writeNumber(s.charCodeAt(i));
}
return t;
}
function addHex(a, b) {
var c = 0, s = '';
for (var i = a.length - 1; i >= 0; i--) {
c += fromHexDigit(a[i]) + fromHexDigit(b[i]);
if (c >= 16) {
s = toHexDigit(c - 16) + s;
c = 1;
} else {
s = toHexDigit(c) + s;
c = 0;
}
}
return s;
}
function subHex(a, b) {
var c = 0, s = '';
for (var i = a.length - 1; i >= 0; i--) {
c += fromHexDigit(a[i]) - fromHexDigit(b[i]);
if (c < 0) {
s = toHexDigit(c + 16) + s;
c = -1;
} else {
s = toHexDigit(c) + s;
c = 0;
}
}
return s;
}
function incHex(a) {
var c = 1, s = '';
for (var i = a.length - 1; i >= 0; i--) {
c += fromHexDigit(a[i]);
if (c >= 16) {
s = toHexDigit(c - 16) + s;
c = 1;
} else {
s = toHexDigit(c) + s;
c = 0;
}
}
return s;
}
exports.compressCmaps = function (src, dest, verify) {
var files = fs.readdirSync(src).filter(function (fn) {
return fn.indexOf('.') < 0; // skipping files with the extension
});
files.forEach(function (fn) {
var srcPath = path.join(src, fn);
var destPath = path.join(dest, fn + '.bcmap');
var stats = compressCmap(srcPath, destPath, verify);
console.log('Compressing ' + fn + ': ' + stats.orig + ' vs ' + stats.packed +
' ' + (stats.packed / stats.orig * 100).toFixed(1) + '%');
});
};

211
external/cmapscompress/optimize.js vendored Normal file
View File

@ -0,0 +1,211 @@
/* Copyright 2014 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
exports.optimizeCMap = function (data) {
var i = 1;
while (i < data.body.length) {
if (data.body[i - 1].type === data.body[i].type) {
data.body[i - 1].items = data.body[i - 1].items.concat(data.body[i].items);
data.body.splice(i, 1);
} else {
i++;
}
}
// split into groups with different lengths
var i = 0;
while (i < data.body.length) {
var item = data.body[i];
var keys = Object.keys(item.items[0]).filter(function (i) {
return typeof item.items[0][i] === 'string';
});
var j = 1;
while (j < item.items.length) {
var different = false;
for (var q = 0; q < keys.length && !different; q++) {
different = item.items[j - 1][keys[q]].length !== item.items[j][keys[q]].length;
}
if (different) {
break;
}
j++;
}
if (j < item.items.length) {
data.body.splice(i + 1, 0, {
type: item.type,
items: item.items.splice(j, item.items.length - j)
});
}
i++;
}
// find sequences of single char ranges
var i = 0;
while (i < data.body.length) {
var item = data.body[i];
if (item.type === 3 || item.type === 5) {
var j = 0;
while (j < item.items.length) {
var q = j;
while (j < item.items.length && item.items[j].start === item.items[j].end) {
j++;
}
if ((j - q) >= 9) {
if (j < item.items.length) {
data.body.splice(i + 1, 0, {
type: item.type,
items: item.items.splice(j, item.items.length - j)
});
}
if (q > 0) {
data.body.splice(i + 1, 0, {
type: item.type - 1,
items: item.items.splice(q, j - q).map(function (i) {
return {char: i.start, code: i.code };
})
});
i++;
} else {
item.type -= 1;
item.items = item.items.map(function (i) {
return {char: i.start, code: i.code };
});
}
continue;
}
j++;
}
}
i++;
}
// find sequences of increasing code/ranges order
var i = 0;
while (i < data.body.length) {
var item = data.body[i];
if (item.type >= 2 && item.type <= 5) {
var j = 1;
var startProp = item.type === 2 || item.type === 4 ? 'char' : 'start';
var endProp = item.type === 2 || item.type === 4 ? 'char' : 'end';
while (j < item.items.length) {
var q = j - 1;
while (j < item.items.length && incHex(item.items[j - 1][endProp]) === item.items[j][startProp]) {
j++;
}
if ((j - q) >= 9) {
if (j < item.items.length) {
data.body.splice(i + 1, 0, {
type: item.type,
items: item.items.splice(j, item.items.length - j)
});
}
if (q > 0) {
data.body.splice(i + 1, 0, {
type: item.type,
items: item.items.splice(q, j - q),
sequence: true
});
i++;
} else {
item.sequence = true;
}
continue;
}
j++;
}
}
i++;
}
// split non-sequences two groups where codes are close
var i = 0;
while (i < data.body.length) {
var item = data.body[i];
if (!item.sequence && (item.type === 2 || item.type === 3)) {
var subitems = item.items;
var codes = subitems.map(function (i) {
return i.code;
});
codes.sort(function (a, b) {
return a - b;
});
var maxDistance = 100, minItems = 10, itemsPerBucket = 50;
if (subitems.length > minItems && codes[codes.length - 1] - codes[0] > maxDistance) {
var gapsCount = Math.max(2, (subitems.length / itemsPerBucket) | 0);
var gaps = [];
for (var q = 0; q < gapsCount; q++) {
gaps.push({length: 0});
}
for (var j = 1; j < codes.length; j++) {
var gapLength = codes[j] - codes[j - 1];
var q = 0;
while (q < gaps.length && gaps[q].length > gapLength) {
q++;
}
if (q >= gaps.length) {
continue;
}
var q0 = q;
while (q < gaps.length) {
if (gaps[q].length < gaps[q0].length) {
q0 = q;
}
q++;
}
gaps[q0] = {length: gapLength, boundary: codes[j]};
}
var groups = gaps.filter(function (g) {
return g.length >= maxDistance;
}).map(function (g) {
return g.boundary;
});
groups.sort(function (a, b) {
return a - b;
});
if (groups.length > 1) {
var buckets = [item.items = []];
for (var j = 0; j < groups.length; j++) {
var newItem = {type: item.type, items: []}
buckets.push(newItem.items);
i++;
data.body.splice(i, 0, newItem);
}
for (var j = 0; j < subitems.length; j++) {
var code = subitems[j].code;
var q = 0;
while (q < groups.length && groups[q] <= code) {
q++;
}
buckets[q].push(subitems[j]);
}
}
}
}
i++;
}
};
function incHex(a) {
var c = 1, s = '';
for (var i = a.length - 1; i >= 0; i--) {
c += parseInt(a[i], 16);
if (c >= 16) {
s = '0' + s;
c = 1;
} else {
s = c.toString(16) + s;
c = 0;
}
}
return s;
}

101
external/cmapscompress/parse.js vendored Normal file
View File

@ -0,0 +1,101 @@
/* Copyright 2014 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
exports.parseAdobeCMap = function (content) {
var m = /(\bbegincmap\b[\s\S]*?)\bendcmap\b/.exec(content);
if (!m) {
throw new Error('cmap was not found');
}
var body = m[1].replace(/\r\n?/g, '\n');
var result = {
type: 1,
wmode: 0,
comment: 'Copyright 1990-2009 Adobe Systems Incorporated.\nAll rights reserved.\nhttp://sourceforge.net/adobe/cmap/wiki/License/',
usecmap: null,
body: []
};
m = /\/CMapType\s+(\d+)+\s+def\b/.exec(body);
result.type = +m[1];
m = /\/WMode\s+(\d+)+\s+def\b/.exec(body);
result.wmode = +m[1];
m = /\/([\w\-]+)\s+usecmap\b/.exec(body);
if (m) {
result.usecmap = m[1];
}
var re = /(\d+)\s+(begincodespacerange|beginnotdefrange|begincidchar|begincidrange|beginbfchar|beginbfrange)\n([\s\S]*?)\n(endcodespacerange|endnotdefrange|endcidchar|endcidrange|endbfchar|endbfrange)/g;
while (m = re.exec(body)) {
var lines = m[3].toLowerCase().split('\n');
var m2;
switch (m[2]) {
case 'begincodespacerange':
result.body.push({
type: 0,
items: lines.map(function (line) {
var m = /<(\w+)>\s+<(\w+)>/.exec(line);
return {start: m[1], end: m[2]};
})
});
break;
case 'beginnotdefrange':
result.body.push({
type: 1,
items: lines.map(function (line) {
var m = /<(\w+)>\s+<(\w+)>\s+(\d+)/.exec(line);
return {start: m[1], end: m[2], code: +m[3]};
})
});
break;
case 'begincidchar':
result.body.push({
type: 2,
items: lines.map(function (line) {
var m = /<(\w+)>\s+(\d+)/.exec(line);
return {char: m[1], code: +m[2]};
})
});
break;
case 'begincidrange':
result.body.push({
type: 3,
items: lines.map(function (line) {
var m = /<(\w+)>\s+<(\w+)>\s+(\d+)/.exec(line);
return {start: m[1], end: m[2], code: +m[3]};
})
});
break;
case 'beginbfchar':
result.body.push({
type: 4,
items: lines.map(function (line) {
var m = /<(\w+)>\s+<(\w+)>/.exec(line);
return {char: m[1], code: m[2]};
})
});
break;
case 'beginbfrange':
result.body.push({
type: 5,
items: lines.map(function (line) {
var m = /<(\w+)>\s+<(\w+)>\s+<(\w+)>/.exec(line);
return {start: m[1], end: m[2], code: m[3]};
})
});
break;
}
}
return result;
};

42
make.js
View File

@ -90,6 +90,7 @@ var COMMON_WEB_FILES =
target.generic = function() {
target.bundle({});
target.locale();
target.cmaps();
cd(ROOT_DIR);
echo();
@ -107,10 +108,10 @@ target.generic = function() {
copy: [
[COMMON_WEB_FILES, GENERIC_DIR + '/web'],
['external/webL10n/l10n.js', GENERIC_DIR + '/web'],
['external/cmaps/', GENERIC_DIR + '/web/cmaps'],
['web/viewer.css', GENERIC_DIR + '/web'],
['web/compatibility.js', GENERIC_DIR + '/web'],
['web/compressed.tracemonkey-pldi-09.pdf', GENERIC_DIR + '/web'],
['web/cmaps', GENERIC_DIR + '/web'],
['web/locale', GENERIC_DIR + '/web']
],
preprocess: [
@ -228,6 +229,25 @@ target.locale = function() {
chromeManifestContent.to(CHROME_MANIFEST_OUTPUT);
};
//
// make cmaps
// Compresses cmap files
//
target.cmaps = function (args) {
var CMAP_INPUT = 'external/cmaps';
var VIEWER_CMAP_OUTPUT = 'web/cmaps';
cd(ROOT_DIR);
echo();
echo('### Building cmaps');
rm('-rf', VIEWER_CMAP_OUTPUT);
mkdir('-p', VIEWER_CMAP_OUTPUT);
var compressCmaps =
require('./external/cmapscompress/compress.js').compressCmaps;
compressCmaps(CMAP_INPUT, VIEWER_CMAP_OUTPUT, true);
};
//
// make bundle
// Bundles all source files into one wrapper 'pdf.js' file, in the given order.
@ -410,6 +430,7 @@ target.minified = function() {
target.bundle({});
target.locale();
target.cmaps();
cd(ROOT_DIR);
echo();
@ -428,6 +449,7 @@ target.minified = function() {
[COMMON_WEB_FILES, MINIFIED_DIR + '/web'],
['web/viewer.css', MINIFIED_DIR + '/web'],
['web/compressed.tracemonkey-pldi-09.pdf', MINIFIED_DIR + '/web'],
['web/cmaps', MINIFIED_DIR + '/web'],
['web/locale', MINIFIED_DIR + '/web']
],
preprocess: [
@ -492,6 +514,7 @@ target.extension = function() {
echo('### Building extensions');
target.locale();
target.cmaps();
target.firefox();
target.chromium();
};
@ -544,6 +567,7 @@ target.firefox = function() {
FIREFOX_AMO_EXTENSION_NAME = 'pdf.js.amo.xpi';
target.locale();
target.cmaps();
target.bundle({ excludes: ['core/network.js'], defines: defines });
cd(ROOT_DIR);
@ -574,7 +598,7 @@ target.firefox = function() {
defines: defines,
copy: [
[COMMON_WEB_FILES, FIREFOX_BUILD_CONTENT_DIR + '/web'],
['external/cmaps/', FIREFOX_BUILD_CONTENT_DIR + '/web/cmaps'],
['web/cmaps/', FIREFOX_BUILD_CONTENT_DIR + '/web/cmaps'],
[FIREFOX_EXTENSION_DIR + 'tools/l10n.js',
FIREFOX_BUILD_CONTENT_DIR + '/web'],
['web/default_preferences.js', FIREFOX_BUILD_CONTENT_DIR]
@ -691,7 +715,7 @@ target.mozcentral = function() {
defines: defines,
copy: [
[COMMON_WEB_FILES, MOZCENTRAL_CONTENT_DIR + '/web'],
['external/cmaps/', MOZCENTRAL_CONTENT_DIR + '/web/cmaps'],
['web/cmaps/', MOZCENTRAL_CONTENT_DIR + '/web/cmaps'],
['extensions/firefox/tools/l10n.js', MOZCENTRAL_CONTENT_DIR + '/web'],
['web/default_preferences.js', MOZCENTRAL_CONTENT_DIR]
],
@ -746,6 +770,7 @@ target.mozcentral = function() {
target.b2g = function() {
target.locale();
target.cmaps();
echo();
echo('### Building B2G (Firefox OS App)');
@ -763,10 +788,10 @@ target.b2g = function() {
var setup = {
defines: defines,
copy: [
['external/cmaps/', B2G_BUILD_CONTENT_DIR + '/web/cmaps'],
['extensions/b2g/images', B2G_BUILD_CONTENT_DIR + '/web'],
['extensions/b2g/viewer.html', B2G_BUILD_CONTENT_DIR + '/web'],
['extensions/b2g/viewer.css', B2G_BUILD_CONTENT_DIR + '/web'],
['web/cmaps/', B2G_BUILD_CONTENT_DIR + '/web/cmaps'],
['web/locale', B2G_BUILD_CONTENT_DIR + '/web'],
['external/webL10n/l10n.js', B2G_BUILD_CONTENT_DIR + '/web']
],
@ -784,6 +809,9 @@ target.b2g = function() {
// make chrome
//
target.chromium = function() {
target.locale();
target.cmaps();
cd(ROOT_DIR);
echo();
echo('### Building Chromium extension');
@ -804,7 +832,6 @@ target.chromium = function() {
var setup = {
defines: defines,
copy: [
['external/cmaps/', CHROME_BUILD_CONTENT_DIR + '/web/cmaps'],
[COMMON_WEB_FILES, CHROME_BUILD_CONTENT_DIR + '/web'],
[['extensions/chromium/*.json',
'extensions/chromium/*.html',
@ -814,6 +841,7 @@ target.chromium = function() {
CHROME_BUILD_DIR],
['external/webL10n/l10n.js', CHROME_BUILD_CONTENT_DIR + '/web'],
['web/viewer.css', CHROME_BUILD_CONTENT_DIR + '/web'],
['web/cmaps/', CHROME_BUILD_CONTENT_DIR + '/web/cmaps'],
['web/locale', CHROME_BUILD_CONTENT_DIR + '/web']
],
preprocess: [
@ -931,6 +959,8 @@ target.test = function() {
// (Special tests for the Github bot)
//
target.bottest = function() {
target.cmaps();
target.unittest({}, function() {
target.fonttest({}, function() {
target.browsertest({noreftest: true});
@ -1011,6 +1041,8 @@ target.fonttest = function(options, callback) {
// make botmakeref
//
target.botmakeref = function() {
target.cmaps();
cd(ROOT_DIR);
echo();
echo('### Creating reference images');

View File

@ -15,7 +15,7 @@
* limitations under the License.
*/
/* globals Util, isString, isInt, warn, error, isCmd, isEOF, isName, Lexer,
isStream, StringStream */
isStream, StringStream, PDFJS, assert */
'use strict';
@ -275,6 +275,314 @@ var IdentityCMap = (function IdentityCMapClosure() {
return IdentityCMap;
})();
var BinaryCMapReader = (function BinaryCMapReaderClosure() {
function fetchBinaryData(url) {
var nonBinaryRequest = PDFJS.disableWorker;
var request = new XMLHttpRequest();
request.open('GET', url, false);
if (!nonBinaryRequest) {
try {
request.responseType = 'arraybuffer';
nonBinaryRequest = request.responseType !== 'arraybuffer';
} catch (e) {
nonBinaryRequest = true;
}
}
if (nonBinaryRequest && request.overrideMimeType) {
request.overrideMimeType('text/plain; charset=x-user-defined');
}
request.send(null);
if (request.status === 0 && /^https?:/i.test(url)) {
error('Unable to get binary cMap at: ' + url);
}
if (nonBinaryRequest) {
var data = Array.prototype.map.call(request.responseText, function (ch) {
return ch.charCodeAt(0) & 255;
});
return new Uint8Array(data);
}
return new Uint8Array(request.response);
}
function hexToInt(a, size) {
var n = 0;
for (var i = 0; i <= size; i++) {
n = (n << 8) | a[i];
}
return n >>> 0;
}
function hexToStr(a, size) {
return String.fromCharCode.apply(null, a.subarray(0, size + 1));
}
function addHex(a, b, size) {
var c = 0;
for (var i = size; i >= 0; i--) {
c += a[i] + b[i];
a[i] = c & 255;
c >>= 8;
}
}
function incHex(a, size) {
var c = 1;
for (var i = size; i >= 0 && c > 0; i--) {
c += a[i];
a[i] = c & 255;
c >>= 8;
}
}
var MAX_NUM_SIZE = 16;
var MAX_ENCODED_NUM_SIZE = 19; // ceil(MAX_NUM_SIZE * 7 / 8)
function BinaryCMapStream(data) {
this.buffer = data;
this.pos = 0;
this.end = data.length;
this.tmpBuf = new Uint8Array(MAX_ENCODED_NUM_SIZE);
}
BinaryCMapStream.prototype = {
readByte: function () {
if (this.pos >= this.end) {
return -1;
}
return this.buffer[this.pos++];
},
readNumber: function () {
var n = 0;
var last;
do {
var b = this.readByte();
if (b < 0) {
error('unexpected EOF in bcmap');
}
last = !(b & 0x80);
n = (n << 7) | (b & 0x7F);
} while (!last);
return n;
},
readSigned: function () {
var n = this.readNumber();
return (n & 1) ? ~(n >>> 1) : n >>> 1;
},
readHex: function (num, size) {
num.set(this.buffer.subarray(this.pos,
this.pos + size + 1));
this.pos += size + 1;
},
readHexNumber: function (num, size) {
var last;
var stack = this.tmpBuf, sp = 0;
do {
var b = this.readByte();
if (b < 0) {
error('unexpected EOF in bcmap');
}
last = !(b & 0x80);
stack[sp++] = b & 0x7F;
} while (!last);
var i = size, buffer = 0, bufferSize = 0;
while (i >= 0) {
while (bufferSize < 8 && stack.length > 0) {
buffer = (stack[--sp] << bufferSize) | buffer;
bufferSize += 7;
}
num[i] = buffer & 255;
i--;
buffer >>= 8;
bufferSize -= 8;
}
},
readHexSigned: function (num, size) {
this.readHexNumber(num, size);
var sign = num[size] & 1 ? 255 : 0;
var c = 0;
for (var i = 0; i <= size; i++) {
c = ((c & 1) << 8) | num[i];
num[i] = (c >> 1) ^ sign;
}
},
readString: function () {
var len = this.readNumber();
var s = '';
for (var i = 0; i < len; i++) {
s += String.fromCharCode(this.readNumber());
}
return s;
}
};
function processBinaryCMap(url, cMap, extend) {
var data = fetchBinaryData(url);
var stream = new BinaryCMapStream(data);
var header = stream.readByte();
cMap.vertical = !!(header & 1);
var useCMap = null;
var start = new Uint8Array(MAX_NUM_SIZE);
var end = new Uint8Array(MAX_NUM_SIZE);
var char = new Uint8Array(MAX_NUM_SIZE);
var charCode = new Uint8Array(MAX_NUM_SIZE);
var tmp = new Uint8Array(MAX_NUM_SIZE);
var code;
var b;
while ((b = stream.readByte()) >= 0) {
var type = b >> 5;
if (type === 7) { // metadata, e.g. comment or usecmap
switch (b & 0x1F) {
case 0:
stream.readString(); // skipping comment
break;
case 1:
useCMap = stream.readString();
break;
}
continue;
}
var sequence = !!(b & 0x10);
var dataSize = b & 15;
assert(dataSize + 1 <= MAX_NUM_SIZE);
var ucs2DataSize = 1;
var subitemsCount = stream.readNumber();
switch (type) {
case 0: // codespacerange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
cMap.addCodespaceRange(dataSize + 1, hexToInt(start, dataSize),
hexToInt(end, dataSize));
for (var i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
cMap.addCodespaceRange(dataSize + 1, hexToInt(start, dataSize),
hexToInt(end, dataSize));
}
break;
case 1: // notdefrange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
code = stream.readNumber();
// undefined range, skipping
for (var i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
code = stream.readNumber();
// nop
}
break;
case 2: // cidchar
stream.readHex(char, dataSize);
code = stream.readNumber();
cMap.mapOne(hexToInt(char, dataSize), String.fromCharCode(code));
for (var i = 1; i < subitemsCount; i++) {
incHex(char, dataSize);
if (!sequence) {
stream.readHexNumber(tmp, dataSize);
addHex(char, tmp, dataSize);
}
code = stream.readSigned() + (code + 1);
cMap.mapOne(hexToInt(char, dataSize), String.fromCharCode(code));
}
break;
case 3: // cidrange
stream.readHex(start, dataSize);
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
code = stream.readNumber();
cMap.mapRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
String.fromCharCode(code));
for (var i = 1; i < subitemsCount; i++) {
incHex(end, dataSize);
if (!sequence) {
stream.readHexNumber(start, dataSize);
addHex(start, end, dataSize);
} else {
start.set(end);
}
stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize);
code = stream.readNumber();
cMap.mapRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
String.fromCharCode(code));
}
break;
case 4: // bfchar
stream.readHex(char, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapOne(hexToInt(char, ucs2DataSize),
hexToStr(charCode, dataSize));
for (var i = 1; i < subitemsCount; i++) {
incHex(char, ucs2DataSize);
if (!sequence) {
stream.readHexNumber(tmp, ucs2DataSize);
addHex(char, tmp, ucs2DataSize);
}
incHex(charCode, dataSize);
stream.readHexSigned(tmp, dataSize);
addHex(charCode, tmp, dataSize);
cMap.mapOne(hexToInt(char, ucs2DataSize),
hexToStr(charCode, dataSize));
}
break;
case 5: // bfrange
stream.readHex(start, ucs2DataSize);
stream.readHexNumber(end, ucs2DataSize);
addHex(end, start, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapRange(hexToInt(start, ucs2DataSize),
hexToInt(end, ucs2DataSize),
hexToStr(charCode, dataSize));
for (var i = 1; i < subitemsCount; i++) {
incHex(end, ucs2DataSize);
if (!sequence) {
stream.readHexNumber(start, ucs2DataSize);
addHex(start, end, ucs2DataSize);
} else {
start.set(end);
}
stream.readHexNumber(end, ucs2DataSize);
addHex(end, start, ucs2DataSize);
stream.readHex(charCode, dataSize);
cMap.mapRange(hexToInt(start, ucs2DataSize),
hexToInt(end, ucs2DataSize),
hexToStr(charCode, dataSize));
}
break;
default:
error('Unknown type: ' + type);
break;
}
}
if (useCMap) {
extend(useCMap);
}
return cMap;
}
function BinaryCMapReader() {}
BinaryCMapReader.prototype = {
read: processBinaryCMap
};
return BinaryCMapReader;
})();
var CMapFactory = (function CMapFactoryClosure() {
function strToInt(str) {
var a = 0;
@ -417,7 +725,7 @@ var CMapFactory = (function CMapFactoryClosure() {
}
}
function parseCMap(cMap, lexer, builtInCMapUrl, useCMap) {
function parseCMap(cMap, lexer, builtInCMapParams, useCMap) {
var previous;
var embededUseCMap;
objLoop: while (true) {
@ -463,28 +771,41 @@ var CMapFactory = (function CMapFactoryClosure() {
useCMap = embededUseCMap;
}
if (useCMap) {
cMap.useCMap = createBuiltInCMap(useCMap, builtInCMapUrl);
// If there aren't any code space ranges defined clone all the parent ones
// into this cMap.
if (cMap.numCodespaceRanges === 0) {
var useCodespaceRanges = cMap.useCMap.codespaceRanges;
for (var i = 0; i < useCodespaceRanges.length; i++) {
cMap.codespaceRanges[i] = useCodespaceRanges[i].slice();
}
cMap.numCodespaceRanges = cMap.useCMap.numCodespaceRanges;
}
// Merge the map into the current one, making sure not to override
// any previously defined entries.
for (var key in cMap.useCMap.map) {
if (key in cMap.map) {
continue;
}
cMap.map[key] = cMap.useCMap.map[key];
}
extendCMap(cMap, builtInCMapParams, useCMap);
}
}
function createBuiltInCMap(name, builtInCMapUrl) {
function extendCMap(cMap, builtInCMapParams, useCMap) {
cMap.useCMap = createBuiltInCMap(useCMap, builtInCMapParams);
// If there aren't any code space ranges defined clone all the parent ones
// into this cMap.
if (cMap.numCodespaceRanges === 0) {
var useCodespaceRanges = cMap.useCMap.codespaceRanges;
for (var i = 0; i < useCodespaceRanges.length; i++) {
cMap.codespaceRanges[i] = useCodespaceRanges[i].slice();
}
cMap.numCodespaceRanges = cMap.useCMap.numCodespaceRanges;
}
// Merge the map into the current one, making sure not to override
// any previously defined entries.
for (var key in cMap.useCMap.map) {
if (key in cMap.map) {
continue;
}
cMap.map[key] = cMap.useCMap.map[key];
}
}
function parseBinaryCMap(name, builtInCMapParams) {
var url = builtInCMapParams.url + name + '.bcmap';
var cMap = new CMap(true);
new BinaryCMapReader().read(url, cMap, function (useCMap) {
extendCMap(cMap, builtInCMapParams, useCMap);
});
return cMap;
}
function createBuiltInCMap(name, builtInCMapParams) {
if (name === 'Identity-H') {
return new IdentityCMap(false, 2);
} else if (name === 'Identity-V') {
@ -493,9 +814,14 @@ var CMapFactory = (function CMapFactoryClosure() {
if (BUILT_IN_CMAPS.indexOf(name) === -1) {
error('Unknown cMap name: ' + name);
}
assert (builtInCMapParams, 'buildin cmap parameters are not provided');
if (builtInCMapParams.packed) {
return parseBinaryCMap(name, builtInCMapParams);
}
var request = new XMLHttpRequest();
var url = builtInCMapUrl + name;
var url = builtInCMapParams.url + name;
request.open('GET', url, false);
request.send(null);
if (request.status === 0 && /^https?:/i.test(url)) {
@ -503,19 +829,19 @@ var CMapFactory = (function CMapFactoryClosure() {
}
var cMap = new CMap(true);
var lexer = new Lexer(new StringStream(request.responseText));
parseCMap(cMap, lexer, builtInCMapUrl, null);
parseCMap(cMap, lexer, builtInCMapParams, null);
return cMap;
}
return {
create: function (encoding, builtInCMapUrl, useCMap) {
create: function (encoding, builtInCMapParams, useCMap) {
if (isName(encoding)) {
return createBuiltInCMap(encoding.name, builtInCMapUrl);
return createBuiltInCMap(encoding.name, builtInCMapParams);
} else if (isStream(encoding)) {
var cMap = new CMap();
var lexer = new Lexer(encoding);
try {
parseCMap(cMap, lexer, builtInCMapUrl, useCMap);
parseCMap(cMap, lexer, builtInCMapParams, useCMap);
} catch (e) {
warn('Invalid CMap data. ' + e);
}

View File

@ -1259,7 +1259,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
if (isName(cidEncoding)) {
properties.cidEncoding = cidEncoding.name;
}
properties.cMap = CMapFactory.create(cidEncoding, PDFJS.cMapUrl, null);
properties.cMap = CMapFactory.create(cidEncoding,
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null);
properties.vertical = properties.cMap.vertical;
}
this.extractDataStructures(dict, baseDict, xref, properties);

View File

@ -4223,7 +4223,8 @@ var Font = (function FontClosure() {
var ucs2CMapName = new Name(registry + '-' + ordering + '-UCS2');
// d) Obtain the CMap with the name constructed in step (c) (available
// from the ASN Web site; see the Bibliography).
var ucs2CMap = CMapFactory.create(ucs2CMapName, PDFJS.cMapUrl, null);
var ucs2CMap = CMapFactory.create(ucs2CMapName,
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null);
var cMap = properties.cMap;
var toUnicode = [];
for (var charcode in cMap.map) {

View File

@ -241,6 +241,7 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
PDFJS.verbosity = data.verbosity;
PDFJS.cMapUrl = data.cMapUrl === undefined ?
null : data.cMapUrl;
PDFJS.cMapPacked = data.cMapPacked === true;
getPdfManager(data).then(function () {
pdfManager.onLoadedStream().then(function(stream) {

View File

@ -36,6 +36,12 @@ PDFJS.maxImageSize = PDFJS.maxImageSize === undefined ? -1 : PDFJS.maxImageSize;
*/
PDFJS.cMapUrl = PDFJS.cMapUrl === undefined ? null : PDFJS.cMapUrl;
/**
* Specifies if CMaps are binary packed.
* @var {boolean}
*/
PDFJS.cMapPacked = PDFJS.cMapPacked === undefined ? false : PDFJS.cMapPacked;
/*
* By default fonts are converted to OpenType fonts and loaded via font face
* rules. If disabled, the font will be rendered using a built in font renderer
@ -942,6 +948,7 @@ var WorkerTransport = (function WorkerTransportClosure() {
disableRange: PDFJS.disableRange,
maxImageSize: PDFJS.maxImageSize,
cMapUrl: PDFJS.cMapUrl,
cMapPacked: PDFJS.cMapPacked,
disableFontFace: PDFJS.disableFontFace,
disableCreateObjectURL: PDFJS.disableCreateObjectURL,
verbosity: PDFJS.verbosity

View File

@ -28,7 +28,8 @@
// "firefox-bin: Fatal IO error 12 (Cannot allocate memory) on X server :1."
// PDFJS.disableWorker = true;
PDFJS.enableStats = true;
PDFJS.cMapUrl = '../external/cmaps/';
PDFJS.cMapUrl = '../web/cmaps/';
PDFJS.cMapPacked = true;
var appPath, masterMode, browser, canvas, dummyCanvas, currentTaskIdx,
manifest, stdout;

View File

@ -105,6 +105,7 @@ MIMEs = {
'.ico': 'image/x-icon',
'.png': 'image/png',
'.log': 'text/plain',
'.bcmap': 'application/octet-stream',
'.properties': 'text/plain'
}

3
web/.gitignore vendored
View File

@ -1,3 +1,4 @@
viewer-production.html
locale.properties
locale/
locale/
cmaps/

View File

@ -66,6 +66,7 @@ PDFJS.imageResourcesPath = './images/';
PDFJS.cMapUrl = '../external/cmaps/';
//#else
//PDFJS.cMapUrl = '../web/cmaps/';
//PDFJS.cMapPacked = true;
//#endif
var mozL10n = document.mozL10n || document.webL10n;