From 69efd9cb968a8076f5d09f9f7132d3d4f6f1fb81 Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Fri, 14 Mar 2014 13:22:02 -0500 Subject: [PATCH] CMaps binary packing --- external/cmapscompress/compress.js | 437 +++++++++++++++++++++++++++++ external/cmapscompress/optimize.js | 211 ++++++++++++++ external/cmapscompress/parse.js | 101 +++++++ make.js | 42 ++- src/core/cmap.js | 378 +++++++++++++++++++++++-- src/core/evaluator.js | 3 +- src/core/fonts.js | 3 +- src/core/worker.js | 1 + src/display/api.js | 7 + test/driver.js | 3 +- test/test.py | 1 + web/.gitignore | 3 +- web/viewer.js | 1 + 13 files changed, 1156 insertions(+), 35 deletions(-) create mode 100644 external/cmapscompress/compress.js create mode 100644 external/cmapscompress/optimize.js create mode 100644 external/cmapscompress/parse.js diff --git a/external/cmapscompress/compress.js b/external/cmapscompress/compress.js new file mode 100644 index 000000000..078cfe63f --- /dev/null +++ b/external/cmapscompress/compress.js @@ -0,0 +1,437 @@ +/* Copyright 2014 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +var fs = require('fs'); +var path = require('path'); +var parseAdobeCMap = require('./parse.js').parseAdobeCMap; +var optimizeCMap = require('./optimize.js').optimizeCMap; + +function compressCmap(srcPath, destPath, verify) { + var content = fs.readFileSync(srcPath).toString(); + var inputData = parseAdobeCMap(content); + optimizeCMap(inputData); + + var out = writeByte((inputData.type << 1) | inputData.wmode); + if (inputData.comment) { + out += writeByte(0xE0) + writeString(inputData.comment); + } + if (inputData.usecmap) { + out += writeByte(0xE1) + writeString(inputData.usecmap); + } + var i = 0; + while (i < inputData.body.length) { + var item = inputData.body[i++], subitems = item.items; + var first = item.items[0]; + var sequence = item.sequence === true; + var flags = (item.type << 5) | (sequence ? 0x10 : 0); + var nextStart, nextCode; + switch (item.type) { + case 0: + out += writeByte(flags | getHexSize(first.start)) + writeNumber(subitems.length); + out += first.start + writeNumber(subHex(first.end, first.start)); + nextStart = incHex(first.end); + for (var j = 1; j < subitems.length; j++) { + out += writeNumber(subHex(subitems[j].start, nextStart)) + + writeNumber(subHex(subitems[j].end, subitems[j].start)); + nextStart = incHex(subitems[j].end); + } + break; + case 1: + out += writeByte(flags | getHexSize(first.start)) + writeNumber(subitems.length); + out += first.start + writeNumber(subHex(first.end, first.start)) + writeNumber(first.code); + nextStart = incHex(first.end); + for (var j = 1; j < subitems.length; j++) { + out += writeNumber(subHex(subitems[j].start, nextStart)) + + writeNumber(subHex(subitems[j].end, subitems[j].start)) + + writeNumber(subitems[j].code); + nextStart = incHex(subitems[j].end); + } + break; + case 2: + out += writeByte(flags | getHexSize(first.char)) + writeNumber(subitems.length); + out += first.char + writeNumber(first.code); + nextStart = incHex(first.char); + nextCode = first.code + 1; + for (var j = 1; j < subitems.length; j++) { + out += (sequence ? '' : writeNumber(subHex(subitems[j].char, nextStart))) + + writeSigned(subitems[j].code - nextCode); + nextStart = incHex(subitems[j].char); + nextCode = item.items[j].code + 1; + } + break; + case 3: + out += writeByte(flags | getHexSize(first.start)) + writeNumber(subitems.length); + out += first.start + writeNumber(subHex(first.end, first.start)) + writeNumber(first.code); + nextStart = incHex(first.end); + for (var j = 1; j < subitems.length; j++) { + out += (sequence ? '' : writeNumber(subHex(subitems[j].start, nextStart))) + + writeNumber(subHex(subitems[j].end, subitems[j].start)) + + writeNumber(subitems[j].code); + nextStart = incHex(subitems[j].end); + } + break; + case 4: + out += writeByte(flags | getHexSize(first.code)) + writeNumber(subitems.length); + out += first.char + first.code; + nextStart = incHex(first.char); + nextCode = incHex(first.code); + for (var j = 1; j < subitems.length; j++) { + out += (sequence ? '' : writeNumber(subHex(subitems[j].char, nextStart))) + + writeSigned(subHex(subitems[j].code, nextCode)); + nextStart = incHex(subitems[j].char); + nextCode = incHex(subitems[j].code); + } + break; + case 5: + out += writeByte(flags | getHexSize(first.code)) + writeNumber(subitems.length); + out += first.start + writeNumber(subHex(first.end, first.start)) + first.code; + nextStart = incHex(first.end); + for (var j = 1; j < subitems.length; j++) { + out += (sequence ? '' : writeNumber(subHex(subitems[j].start, nextStart))) + + writeNumber(subHex(subitems[j].end, subitems[j].start)) + + subitems[j].code; + nextStart = incHex(subitems[j].end); + } + break; + + } + } + + fs.writeFileSync(destPath, new Buffer(out, 'hex')); + + if (verify) { + var result2 = parseCMap(out); + var isGood = JSON.stringify(inputData) == JSON.stringify(result2); + if (!isGood) { + throw new Error('Extracted data does not match the expected result'); + } + } + + return { + orig: fs.statSync(srcPath).size, + packed: out.length >> 1 + }; +} + +function parseCMap(binaryData) { + var reader = { + buffer: binaryData, + pos: 0, + end: binaryData.length, + readByte: function () { + if (this.pos >= this.end) { + return -1; + } + var d1 = fromHexDigit(this.buffer[this.pos]); + var d2 = fromHexDigit(this.buffer[this.pos + 1]); + this.pos += 2; + return (d1 << 4) | d2; + }, + readNumber: function () { + var n = 0; + var last; + do { + var b = this.readByte(); + last = !(b & 0x80); + n = (n << 7) | (b & 0x7F); + } while (!last); + return n; + }, + readSigned: function () { + var n = this.readNumber(); + return (n & 1) ? -(n >>> 1) - 1 : n >>> 1; + }, + readHex: function (size) { + var lengthInChars = (size + 1) << 1; + var s = this.buffer.substr(this.pos, lengthInChars); + this.pos += lengthInChars; + return s; + }, + readHexNumber: function (size) { + var lengthInChars = (size + 1) << 1; + var stack = []; + do { + var b = this.readByte(); + last = !(b & 0x80); + stack.push(b & 0x7F); + } while (!last); + var s = '', buffer = 0, bufferSize = 0; + while (s.length < lengthInChars) { + while (bufferSize < 4 && stack.length > 0) { + buffer = (stack.pop() << bufferSize) | buffer; + bufferSize += 7; + } + s = toHexDigit(buffer & 15) + s; + buffer >>= 4; + bufferSize -= 4; + } + return s; + }, + readHexSigned: function (size) { + var num = this.readHexNumber(size); + var sign = fromHexDigit(num[num.length - 1]) & 1 ? 15 : 0; + var c = 0; + var result = ''; + for (var i = 0; i < num.length; i++) { + c = (c << 4) | fromHexDigit(num[i]); + result += toHexDigit(sign ? (c >> 1) ^ sign : (c >> 1)); + c &= 1; + } + return result; + }, + readString: function () { + var len = this.readNumber(); + var s = ''; + for (var i = 0; i < len; i++) { + s += String.fromCharCode(this.readNumber()); + } + return s; + } + }; + + var header = reader.readByte(); + var result = { + type: header >> 1, + wmode: header & 1, + comment: null, + usecmap: null, + body: [] + }; + + var b; + while ((b = reader.readByte()) >= 0) { + var type = b >> 5; + if (type === 7) { + switch (b & 0x1F) { + case 0: + result.comment = reader.readString(); + break; + case 1: + result.usecmap = reader.readString(); + break; + } + continue; + } + var sequence = !!(b & 0x10); + var dataSize = b & 15; + var subitems = []; + var item = { + type: type, + items: subitems + }; + if (sequence) { + item.sequence = true; + } + var ucs2DataSize = 1; + var subitemsCount = reader.readNumber(); + var start, end, code, char; + switch (type) { + case 0: + start = reader.readHex(dataSize); + end = addHex(reader.readHexNumber(dataSize), start); + subitems.push({start: start, end: end}); + for (var i = 1; i < subitemsCount; i++) { + start = addHex(reader.readHexNumber(dataSize), incHex(end)); + end = addHex(reader.readHexNumber(dataSize), start); + subitems.push({start: start, end: end}); + } + break; + case 1: + start = reader.readHex(dataSize); + end = addHex(reader.readHexNumber(dataSize), start); + code = reader.readNumber(); + subitems.push({start: start, end: end, code: code}); + for (var i = 1; i < subitemsCount; i++) { + start = addHex(reader.readHexNumber(dataSize), incHex(end)); + end = addHex(reader.readHexNumber(dataSize), start); + code = reader.readNumber(); + subitems.push({start: start, end: end, code: code}); + } + break; + case 2: + char = reader.readHex(dataSize); + code = reader.readNumber(); + subitems.push({char: char, code: code}); + for (var i = 1; i < subitemsCount; i++) { + char = sequence ? incHex(char) : addHex(reader.readHexNumber(dataSize), incHex(char)); + code = reader.readSigned() + (code + 1); + subitems.push({char: char, code: code}); + } + break; + case 3: + start = reader.readHex(dataSize); + end = addHex(reader.readHexNumber(dataSize), start); + code = reader.readNumber(); + subitems.push({start: start, end: end, code: code}); + for (var i = 1; i < subitemsCount; i++) { + start = sequence ? incHex(end) : addHex(reader.readHexNumber(dataSize), incHex(end)); + end = addHex(reader.readHexNumber(dataSize), start); + code = reader.readNumber(); + subitems.push({start: start, end: end, code: code}); + } + break; + case 4: + char = reader.readHex(ucs2DataSize); + code = reader.readHex(dataSize); + subitems.push({char: char, code: code}); + for (var i = 1; i < subitemsCount; i++) { + char = sequence ? incHex(char) : addHex(reader.readHexNumber(ucs2DataSize), incHex(char)); + code = addHex(reader.readHexSigned(dataSize), incHex(code)); + subitems.push({char: char, code: code}); + } + break; + case 5: + start = reader.readHex(ucs2DataSize); + end = addHex(reader.readHexNumber(ucs2DataSize), start); + code = reader.readHex(dataSize); + subitems.push({start: start, end: end, code: code}); + for (var i = 1; i < subitemsCount; i++) { + start = sequence ? incHex(end) : addHex(reader.readHexNumber(ucs2DataSize), incHex(end)); + end = addHex(reader.readHexNumber(ucs2DataSize), start); + code = reader.readHex(dataSize); + subitems.push({start: start, end: end, code: code}); + } + break; + default: + throw new Error('Unknown type: ' + type) + } + result.body.push(item); + } + + return result; +} + +function toHexDigit(n) { + return n.toString(16); +} +function fromHexDigit(s) { + return parseInt(s, 16); +} +function getHexSize(s) { + return (s.length >> 1) - 1; +} +function writeByte(b) { + return toHexDigit((b >> 4) & 15) + toHexDigit(b & 15); +} +function writeNumber(n) { + if (typeof n === 'string') { + var s = '', buffer = 0, bufferSize = 0; + var i = n.length; + while (i > 0) { + --i; + buffer = (fromHexDigit(n[i]) << bufferSize) | buffer; + bufferSize += 4; + if (bufferSize >= 7) { + s = writeByte((buffer & 0x7f) | (s.length > 0 ? 0x80 : 0)) + s; + buffer >>>= 7; + bufferSize -= 7; + } + } + if (buffer > 0) { + s = writeByte((buffer & 0x7f) | (s.length > 0 ? 0x80 : 0)) + s; + } + while (s.indexOf('80') === 0) { + s = s.substr(2); + } + return s; + } else { + var s = writeByte(n & 0x7F); + n >>>= 7; + while (n > 0) { + s = writeByte((n & 0x7F) | 0x80) + s; + n >>>= 7; + } + return s; + } +} +function writeSigned(n) { + if (typeof n === 'string') { + var t = ''; + var c = fromHexDigit(n[0]); + var neg = c >= 8; + c = neg ? (c ^ 15) : c; + for (var i = 1; i < n.length; i++) { + var d = fromHexDigit(n[i]); + c = (c << 4) | (neg ? (d ^ 15) : d); + t += toHexDigit(c >> 3); + c = c & 7; + } + t += toHexDigit((c << 1) | (neg ? 1 : 0)); + return writeNumber(t); + } + return n < 0 ? writeNumber(-2 * n - 1) : writeNumber(2 * n); +} +function writeString(s) { + var t = writeNumber(s.length); + for (var i = 0; i < s.length; i++) { + t += writeNumber(s.charCodeAt(i)); + } + return t; +} +function addHex(a, b) { + var c = 0, s = ''; + for (var i = a.length - 1; i >= 0; i--) { + c += fromHexDigit(a[i]) + fromHexDigit(b[i]); + if (c >= 16) { + s = toHexDigit(c - 16) + s; + c = 1; + } else { + s = toHexDigit(c) + s; + c = 0; + } + } + return s; +} +function subHex(a, b) { + var c = 0, s = ''; + for (var i = a.length - 1; i >= 0; i--) { + c += fromHexDigit(a[i]) - fromHexDigit(b[i]); + if (c < 0) { + s = toHexDigit(c + 16) + s; + c = -1; + } else { + s = toHexDigit(c) + s; + c = 0; + } + } + return s; +} +function incHex(a) { + var c = 1, s = ''; + for (var i = a.length - 1; i >= 0; i--) { + c += fromHexDigit(a[i]); + if (c >= 16) { + s = toHexDigit(c - 16) + s; + c = 1; + } else { + s = toHexDigit(c) + s; + c = 0; + } + } + return s; +} + +exports.compressCmaps = function (src, dest, verify) { + var files = fs.readdirSync(src).filter(function (fn) { + return fn.indexOf('.') < 0; // skipping files with the extension + }); + files.forEach(function (fn) { + var srcPath = path.join(src, fn); + var destPath = path.join(dest, fn + '.bcmap'); + var stats = compressCmap(srcPath, destPath, verify); + console.log('Compressing ' + fn + ': ' + stats.orig + ' vs ' + stats.packed + + ' ' + (stats.packed / stats.orig * 100).toFixed(1) + '%'); + }); +}; diff --git a/external/cmapscompress/optimize.js b/external/cmapscompress/optimize.js new file mode 100644 index 000000000..1252b9a27 --- /dev/null +++ b/external/cmapscompress/optimize.js @@ -0,0 +1,211 @@ +/* Copyright 2014 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +exports.optimizeCMap = function (data) { + var i = 1; + while (i < data.body.length) { + if (data.body[i - 1].type === data.body[i].type) { + data.body[i - 1].items = data.body[i - 1].items.concat(data.body[i].items); + data.body.splice(i, 1); + } else { + i++; + } + } + // split into groups with different lengths + var i = 0; + while (i < data.body.length) { + var item = data.body[i]; + var keys = Object.keys(item.items[0]).filter(function (i) { + return typeof item.items[0][i] === 'string'; + }); + var j = 1; + while (j < item.items.length) { + var different = false; + for (var q = 0; q < keys.length && !different; q++) { + different = item.items[j - 1][keys[q]].length !== item.items[j][keys[q]].length; + } + if (different) { + break; + } + j++; + } + if (j < item.items.length) { + data.body.splice(i + 1, 0, { + type: item.type, + items: item.items.splice(j, item.items.length - j) + }); + } + i++; + } + // find sequences of single char ranges + var i = 0; + while (i < data.body.length) { + var item = data.body[i]; + if (item.type === 3 || item.type === 5) { + var j = 0; + while (j < item.items.length) { + var q = j; + while (j < item.items.length && item.items[j].start === item.items[j].end) { + j++; + } + if ((j - q) >= 9) { + if (j < item.items.length) { + data.body.splice(i + 1, 0, { + type: item.type, + items: item.items.splice(j, item.items.length - j) + }); + } + if (q > 0) { + data.body.splice(i + 1, 0, { + type: item.type - 1, + items: item.items.splice(q, j - q).map(function (i) { + return {char: i.start, code: i.code }; + }) + }); + i++; + } else { + item.type -= 1; + item.items = item.items.map(function (i) { + return {char: i.start, code: i.code }; + }); + } + continue; + } + j++; + } + } + i++; + } + + // find sequences of increasing code/ranges order + var i = 0; + while (i < data.body.length) { + var item = data.body[i]; + if (item.type >= 2 && item.type <= 5) { + var j = 1; + var startProp = item.type === 2 || item.type === 4 ? 'char' : 'start'; + var endProp = item.type === 2 || item.type === 4 ? 'char' : 'end'; + while (j < item.items.length) { + var q = j - 1; + while (j < item.items.length && incHex(item.items[j - 1][endProp]) === item.items[j][startProp]) { + j++; + } + if ((j - q) >= 9) { + if (j < item.items.length) { + data.body.splice(i + 1, 0, { + type: item.type, + items: item.items.splice(j, item.items.length - j) + }); + } + if (q > 0) { + data.body.splice(i + 1, 0, { + type: item.type, + items: item.items.splice(q, j - q), + sequence: true + }); + i++; + } else { + item.sequence = true; + } + continue; + } + j++; + } + } + i++; + } + + // split non-sequences two groups where codes are close + var i = 0; + while (i < data.body.length) { + var item = data.body[i]; + if (!item.sequence && (item.type === 2 || item.type === 3)) { + var subitems = item.items; + var codes = subitems.map(function (i) { + return i.code; + }); + codes.sort(function (a, b) { + return a - b; + }); + var maxDistance = 100, minItems = 10, itemsPerBucket = 50; + if (subitems.length > minItems && codes[codes.length - 1] - codes[0] > maxDistance) { + var gapsCount = Math.max(2, (subitems.length / itemsPerBucket) | 0); + var gaps = []; + for (var q = 0; q < gapsCount; q++) { + gaps.push({length: 0}); + } + for (var j = 1; j < codes.length; j++) { + var gapLength = codes[j] - codes[j - 1]; + var q = 0; + while (q < gaps.length && gaps[q].length > gapLength) { + q++; + } + if (q >= gaps.length) { + continue; + } + var q0 = q; + while (q < gaps.length) { + if (gaps[q].length < gaps[q0].length) { + q0 = q; + } + q++; + } + gaps[q0] = {length: gapLength, boundary: codes[j]}; + } + var groups = gaps.filter(function (g) { + return g.length >= maxDistance; + }).map(function (g) { + return g.boundary; + }); + groups.sort(function (a, b) { + return a - b; + }); + if (groups.length > 1) { + var buckets = [item.items = []]; + for (var j = 0; j < groups.length; j++) { + var newItem = {type: item.type, items: []} + buckets.push(newItem.items); + i++; + data.body.splice(i, 0, newItem); + } + for (var j = 0; j < subitems.length; j++) { + var code = subitems[j].code; + var q = 0; + while (q < groups.length && groups[q] <= code) { + q++; + } + buckets[q].push(subitems[j]); + } + } + } + } + i++; + } +}; + +function incHex(a) { + var c = 1, s = ''; + for (var i = a.length - 1; i >= 0; i--) { + c += parseInt(a[i], 16); + if (c >= 16) { + s = '0' + s; + c = 1; + } else { + s = c.toString(16) + s; + c = 0; + } + } + return s; +} diff --git a/external/cmapscompress/parse.js b/external/cmapscompress/parse.js new file mode 100644 index 000000000..df71a2186 --- /dev/null +++ b/external/cmapscompress/parse.js @@ -0,0 +1,101 @@ +/* Copyright 2014 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +exports.parseAdobeCMap = function (content) { + var m = /(\bbegincmap\b[\s\S]*?)\bendcmap\b/.exec(content); + if (!m) { + throw new Error('cmap was not found'); + } + + var body = m[1].replace(/\r\n?/g, '\n'); + var result = { + type: 1, + wmode: 0, + comment: 'Copyright 1990-2009 Adobe Systems Incorporated.\nAll rights reserved.\nhttp://sourceforge.net/adobe/cmap/wiki/License/', + usecmap: null, + body: [] + }; + m = /\/CMapType\s+(\d+)+\s+def\b/.exec(body); + result.type = +m[1]; + m = /\/WMode\s+(\d+)+\s+def\b/.exec(body); + result.wmode = +m[1]; + m = /\/([\w\-]+)\s+usecmap\b/.exec(body); + if (m) { + result.usecmap = m[1]; + } + var re = /(\d+)\s+(begincodespacerange|beginnotdefrange|begincidchar|begincidrange|beginbfchar|beginbfrange)\n([\s\S]*?)\n(endcodespacerange|endnotdefrange|endcidchar|endcidrange|endbfchar|endbfrange)/g; + while (m = re.exec(body)) { + var lines = m[3].toLowerCase().split('\n'); + var m2; + switch (m[2]) { + case 'begincodespacerange': + result.body.push({ + type: 0, + items: lines.map(function (line) { + var m = /<(\w+)>\s+<(\w+)>/.exec(line); + return {start: m[1], end: m[2]}; + }) + }); + break; + case 'beginnotdefrange': + result.body.push({ + type: 1, + items: lines.map(function (line) { + var m = /<(\w+)>\s+<(\w+)>\s+(\d+)/.exec(line); + return {start: m[1], end: m[2], code: +m[3]}; + }) + }); + break; + case 'begincidchar': + result.body.push({ + type: 2, + items: lines.map(function (line) { + var m = /<(\w+)>\s+(\d+)/.exec(line); + return {char: m[1], code: +m[2]}; + }) + }); + break; + case 'begincidrange': + result.body.push({ + type: 3, + items: lines.map(function (line) { + var m = /<(\w+)>\s+<(\w+)>\s+(\d+)/.exec(line); + return {start: m[1], end: m[2], code: +m[3]}; + }) + }); + break; + case 'beginbfchar': + result.body.push({ + type: 4, + items: lines.map(function (line) { + var m = /<(\w+)>\s+<(\w+)>/.exec(line); + return {char: m[1], code: m[2]}; + }) + }); + break; + case 'beginbfrange': + result.body.push({ + type: 5, + items: lines.map(function (line) { + var m = /<(\w+)>\s+<(\w+)>\s+<(\w+)>/.exec(line); + return {start: m[1], end: m[2], code: m[3]}; + }) + }); + break; + } + } + + return result; +}; \ No newline at end of file diff --git a/make.js b/make.js index 80cd86de8..936ff5184 100644 --- a/make.js +++ b/make.js @@ -90,6 +90,7 @@ var COMMON_WEB_FILES = target.generic = function() { target.bundle({}); target.locale(); + target.cmaps(); cd(ROOT_DIR); echo(); @@ -107,10 +108,10 @@ target.generic = function() { copy: [ [COMMON_WEB_FILES, GENERIC_DIR + '/web'], ['external/webL10n/l10n.js', GENERIC_DIR + '/web'], - ['external/cmaps/', GENERIC_DIR + '/web/cmaps'], ['web/viewer.css', GENERIC_DIR + '/web'], ['web/compatibility.js', GENERIC_DIR + '/web'], ['web/compressed.tracemonkey-pldi-09.pdf', GENERIC_DIR + '/web'], + ['web/cmaps', GENERIC_DIR + '/web'], ['web/locale', GENERIC_DIR + '/web'] ], preprocess: [ @@ -228,6 +229,25 @@ target.locale = function() { chromeManifestContent.to(CHROME_MANIFEST_OUTPUT); }; +// +// make cmaps +// Compresses cmap files +// +target.cmaps = function (args) { + var CMAP_INPUT = 'external/cmaps'; + var VIEWER_CMAP_OUTPUT = 'web/cmaps'; + cd(ROOT_DIR); + echo(); + echo('### Building cmaps'); + + rm('-rf', VIEWER_CMAP_OUTPUT); + mkdir('-p', VIEWER_CMAP_OUTPUT); + + var compressCmaps = + require('./external/cmapscompress/compress.js').compressCmaps; + compressCmaps(CMAP_INPUT, VIEWER_CMAP_OUTPUT, true); +}; + // // make bundle // Bundles all source files into one wrapper 'pdf.js' file, in the given order. @@ -410,6 +430,7 @@ target.minified = function() { target.bundle({}); target.locale(); + target.cmaps(); cd(ROOT_DIR); echo(); @@ -428,6 +449,7 @@ target.minified = function() { [COMMON_WEB_FILES, MINIFIED_DIR + '/web'], ['web/viewer.css', MINIFIED_DIR + '/web'], ['web/compressed.tracemonkey-pldi-09.pdf', MINIFIED_DIR + '/web'], + ['web/cmaps', MINIFIED_DIR + '/web'], ['web/locale', MINIFIED_DIR + '/web'] ], preprocess: [ @@ -492,6 +514,7 @@ target.extension = function() { echo('### Building extensions'); target.locale(); + target.cmaps(); target.firefox(); target.chromium(); }; @@ -544,6 +567,7 @@ target.firefox = function() { FIREFOX_AMO_EXTENSION_NAME = 'pdf.js.amo.xpi'; target.locale(); + target.cmaps(); target.bundle({ excludes: ['core/network.js'], defines: defines }); cd(ROOT_DIR); @@ -574,7 +598,7 @@ target.firefox = function() { defines: defines, copy: [ [COMMON_WEB_FILES, FIREFOX_BUILD_CONTENT_DIR + '/web'], - ['external/cmaps/', FIREFOX_BUILD_CONTENT_DIR + '/web/cmaps'], + ['web/cmaps/', FIREFOX_BUILD_CONTENT_DIR + '/web/cmaps'], [FIREFOX_EXTENSION_DIR + 'tools/l10n.js', FIREFOX_BUILD_CONTENT_DIR + '/web'], ['web/default_preferences.js', FIREFOX_BUILD_CONTENT_DIR] @@ -691,7 +715,7 @@ target.mozcentral = function() { defines: defines, copy: [ [COMMON_WEB_FILES, MOZCENTRAL_CONTENT_DIR + '/web'], - ['external/cmaps/', MOZCENTRAL_CONTENT_DIR + '/web/cmaps'], + ['web/cmaps/', MOZCENTRAL_CONTENT_DIR + '/web/cmaps'], ['extensions/firefox/tools/l10n.js', MOZCENTRAL_CONTENT_DIR + '/web'], ['web/default_preferences.js', MOZCENTRAL_CONTENT_DIR] ], @@ -746,6 +770,7 @@ target.mozcentral = function() { target.b2g = function() { target.locale(); + target.cmaps(); echo(); echo('### Building B2G (Firefox OS App)'); @@ -763,10 +788,10 @@ target.b2g = function() { var setup = { defines: defines, copy: [ - ['external/cmaps/', B2G_BUILD_CONTENT_DIR + '/web/cmaps'], ['extensions/b2g/images', B2G_BUILD_CONTENT_DIR + '/web'], ['extensions/b2g/viewer.html', B2G_BUILD_CONTENT_DIR + '/web'], ['extensions/b2g/viewer.css', B2G_BUILD_CONTENT_DIR + '/web'], + ['web/cmaps/', B2G_BUILD_CONTENT_DIR + '/web/cmaps'], ['web/locale', B2G_BUILD_CONTENT_DIR + '/web'], ['external/webL10n/l10n.js', B2G_BUILD_CONTENT_DIR + '/web'] ], @@ -784,6 +809,9 @@ target.b2g = function() { // make chrome // target.chromium = function() { + target.locale(); + target.cmaps(); + cd(ROOT_DIR); echo(); echo('### Building Chromium extension'); @@ -804,7 +832,6 @@ target.chromium = function() { var setup = { defines: defines, copy: [ - ['external/cmaps/', CHROME_BUILD_CONTENT_DIR + '/web/cmaps'], [COMMON_WEB_FILES, CHROME_BUILD_CONTENT_DIR + '/web'], [['extensions/chromium/*.json', 'extensions/chromium/*.html', @@ -814,6 +841,7 @@ target.chromium = function() { CHROME_BUILD_DIR], ['external/webL10n/l10n.js', CHROME_BUILD_CONTENT_DIR + '/web'], ['web/viewer.css', CHROME_BUILD_CONTENT_DIR + '/web'], + ['web/cmaps/', CHROME_BUILD_CONTENT_DIR + '/web/cmaps'], ['web/locale', CHROME_BUILD_CONTENT_DIR + '/web'] ], preprocess: [ @@ -931,6 +959,8 @@ target.test = function() { // (Special tests for the Github bot) // target.bottest = function() { + target.cmaps(); + target.unittest({}, function() { target.fonttest({}, function() { target.browsertest({noreftest: true}); @@ -1011,6 +1041,8 @@ target.fonttest = function(options, callback) { // make botmakeref // target.botmakeref = function() { + target.cmaps(); + cd(ROOT_DIR); echo(); echo('### Creating reference images'); diff --git a/src/core/cmap.js b/src/core/cmap.js index 2b45dad58..a88667ffb 100644 --- a/src/core/cmap.js +++ b/src/core/cmap.js @@ -15,7 +15,7 @@ * limitations under the License. */ /* globals Util, isString, isInt, warn, error, isCmd, isEOF, isName, Lexer, - isStream, StringStream */ + isStream, StringStream, PDFJS, assert */ 'use strict'; @@ -275,6 +275,314 @@ var IdentityCMap = (function IdentityCMapClosure() { return IdentityCMap; })(); +var BinaryCMapReader = (function BinaryCMapReaderClosure() { + function fetchBinaryData(url) { + var nonBinaryRequest = PDFJS.disableWorker; + var request = new XMLHttpRequest(); + request.open('GET', url, false); + if (!nonBinaryRequest) { + try { + request.responseType = 'arraybuffer'; + nonBinaryRequest = request.responseType !== 'arraybuffer'; + } catch (e) { + nonBinaryRequest = true; + } + } + if (nonBinaryRequest && request.overrideMimeType) { + request.overrideMimeType('text/plain; charset=x-user-defined'); + } + request.send(null); + if (request.status === 0 && /^https?:/i.test(url)) { + error('Unable to get binary cMap at: ' + url); + } + if (nonBinaryRequest) { + var data = Array.prototype.map.call(request.responseText, function (ch) { + return ch.charCodeAt(0) & 255; + }); + return new Uint8Array(data); + } + return new Uint8Array(request.response); + } + + function hexToInt(a, size) { + var n = 0; + for (var i = 0; i <= size; i++) { + n = (n << 8) | a[i]; + } + return n >>> 0; + } + + function hexToStr(a, size) { + return String.fromCharCode.apply(null, a.subarray(0, size + 1)); + } + + function addHex(a, b, size) { + var c = 0; + for (var i = size; i >= 0; i--) { + c += a[i] + b[i]; + a[i] = c & 255; + c >>= 8; + } + } + + function incHex(a, size) { + var c = 1; + for (var i = size; i >= 0 && c > 0; i--) { + c += a[i]; + a[i] = c & 255; + c >>= 8; + } + } + + var MAX_NUM_SIZE = 16; + var MAX_ENCODED_NUM_SIZE = 19; // ceil(MAX_NUM_SIZE * 7 / 8) + + function BinaryCMapStream(data) { + this.buffer = data; + this.pos = 0; + this.end = data.length; + this.tmpBuf = new Uint8Array(MAX_ENCODED_NUM_SIZE); + } + + BinaryCMapStream.prototype = { + readByte: function () { + if (this.pos >= this.end) { + return -1; + } + return this.buffer[this.pos++]; + }, + readNumber: function () { + var n = 0; + var last; + do { + var b = this.readByte(); + if (b < 0) { + error('unexpected EOF in bcmap'); + } + last = !(b & 0x80); + n = (n << 7) | (b & 0x7F); + } while (!last); + return n; + }, + readSigned: function () { + var n = this.readNumber(); + return (n & 1) ? ~(n >>> 1) : n >>> 1; + }, + readHex: function (num, size) { + num.set(this.buffer.subarray(this.pos, + this.pos + size + 1)); + this.pos += size + 1; + }, + readHexNumber: function (num, size) { + var last; + var stack = this.tmpBuf, sp = 0; + do { + var b = this.readByte(); + if (b < 0) { + error('unexpected EOF in bcmap'); + } + last = !(b & 0x80); + stack[sp++] = b & 0x7F; + } while (!last); + var i = size, buffer = 0, bufferSize = 0; + while (i >= 0) { + while (bufferSize < 8 && stack.length > 0) { + buffer = (stack[--sp] << bufferSize) | buffer; + bufferSize += 7; + } + num[i] = buffer & 255; + i--; + buffer >>= 8; + bufferSize -= 8; + } + }, + readHexSigned: function (num, size) { + this.readHexNumber(num, size); + var sign = num[size] & 1 ? 255 : 0; + var c = 0; + for (var i = 0; i <= size; i++) { + c = ((c & 1) << 8) | num[i]; + num[i] = (c >> 1) ^ sign; + } + }, + readString: function () { + var len = this.readNumber(); + var s = ''; + for (var i = 0; i < len; i++) { + s += String.fromCharCode(this.readNumber()); + } + return s; + } + }; + + function processBinaryCMap(url, cMap, extend) { + var data = fetchBinaryData(url); + var stream = new BinaryCMapStream(data); + + var header = stream.readByte(); + cMap.vertical = !!(header & 1); + + var useCMap = null; + var start = new Uint8Array(MAX_NUM_SIZE); + var end = new Uint8Array(MAX_NUM_SIZE); + var char = new Uint8Array(MAX_NUM_SIZE); + var charCode = new Uint8Array(MAX_NUM_SIZE); + var tmp = new Uint8Array(MAX_NUM_SIZE); + var code; + + var b; + while ((b = stream.readByte()) >= 0) { + var type = b >> 5; + if (type === 7) { // metadata, e.g. comment or usecmap + switch (b & 0x1F) { + case 0: + stream.readString(); // skipping comment + break; + case 1: + useCMap = stream.readString(); + break; + } + continue; + } + var sequence = !!(b & 0x10); + var dataSize = b & 15; + + assert(dataSize + 1 <= MAX_NUM_SIZE); + + var ucs2DataSize = 1; + var subitemsCount = stream.readNumber(); + switch (type) { + case 0: // codespacerange + stream.readHex(start, dataSize); + stream.readHexNumber(end, dataSize); + addHex(end, start, dataSize); + cMap.addCodespaceRange(dataSize + 1, hexToInt(start, dataSize), + hexToInt(end, dataSize)); + for (var i = 1; i < subitemsCount; i++) { + incHex(end, dataSize); + stream.readHexNumber(start, dataSize); + addHex(start, end, dataSize); + stream.readHexNumber(end, dataSize); + addHex(end, start, dataSize); + cMap.addCodespaceRange(dataSize + 1, hexToInt(start, dataSize), + hexToInt(end, dataSize)); + } + break; + case 1: // notdefrange + stream.readHex(start, dataSize); + stream.readHexNumber(end, dataSize); + addHex(end, start, dataSize); + code = stream.readNumber(); + // undefined range, skipping + for (var i = 1; i < subitemsCount; i++) { + incHex(end, dataSize); + stream.readHexNumber(start, dataSize); + addHex(start, end, dataSize); + stream.readHexNumber(end, dataSize); + addHex(end, start, dataSize); + code = stream.readNumber(); + // nop + } + break; + case 2: // cidchar + stream.readHex(char, dataSize); + code = stream.readNumber(); + cMap.mapOne(hexToInt(char, dataSize), String.fromCharCode(code)); + for (var i = 1; i < subitemsCount; i++) { + incHex(char, dataSize); + if (!sequence) { + stream.readHexNumber(tmp, dataSize); + addHex(char, tmp, dataSize); + } + code = stream.readSigned() + (code + 1); + cMap.mapOne(hexToInt(char, dataSize), String.fromCharCode(code)); + } + break; + case 3: // cidrange + stream.readHex(start, dataSize); + stream.readHexNumber(end, dataSize); + addHex(end, start, dataSize); + code = stream.readNumber(); + cMap.mapRange(hexToInt(start, dataSize), hexToInt(end, dataSize), + String.fromCharCode(code)); + for (var i = 1; i < subitemsCount; i++) { + incHex(end, dataSize); + if (!sequence) { + stream.readHexNumber(start, dataSize); + addHex(start, end, dataSize); + } else { + start.set(end); + } + stream.readHexNumber(end, dataSize); + addHex(end, start, dataSize); + code = stream.readNumber(); + cMap.mapRange(hexToInt(start, dataSize), hexToInt(end, dataSize), + String.fromCharCode(code)); + } + break; + case 4: // bfchar + stream.readHex(char, ucs2DataSize); + stream.readHex(charCode, dataSize); + cMap.mapOne(hexToInt(char, ucs2DataSize), + hexToStr(charCode, dataSize)); + for (var i = 1; i < subitemsCount; i++) { + incHex(char, ucs2DataSize); + if (!sequence) { + stream.readHexNumber(tmp, ucs2DataSize); + addHex(char, tmp, ucs2DataSize); + } + incHex(charCode, dataSize); + stream.readHexSigned(tmp, dataSize); + addHex(charCode, tmp, dataSize); + cMap.mapOne(hexToInt(char, ucs2DataSize), + hexToStr(charCode, dataSize)); + } + break; + case 5: // bfrange + stream.readHex(start, ucs2DataSize); + stream.readHexNumber(end, ucs2DataSize); + addHex(end, start, ucs2DataSize); + stream.readHex(charCode, dataSize); + cMap.mapRange(hexToInt(start, ucs2DataSize), + hexToInt(end, ucs2DataSize), + hexToStr(charCode, dataSize)); + for (var i = 1; i < subitemsCount; i++) { + incHex(end, ucs2DataSize); + if (!sequence) { + stream.readHexNumber(start, ucs2DataSize); + addHex(start, end, ucs2DataSize); + } else { + start.set(end); + } + stream.readHexNumber(end, ucs2DataSize); + addHex(end, start, ucs2DataSize); + stream.readHex(charCode, dataSize); + cMap.mapRange(hexToInt(start, ucs2DataSize), + hexToInt(end, ucs2DataSize), + hexToStr(charCode, dataSize)); + } + break; + default: + error('Unknown type: ' + type); + break; + } + } + + if (useCMap) { + extend(useCMap); + } + return cMap; + } + + function BinaryCMapReader() {} + + BinaryCMapReader.prototype = { + read: processBinaryCMap + }; + + return BinaryCMapReader; +})(); + var CMapFactory = (function CMapFactoryClosure() { function strToInt(str) { var a = 0; @@ -417,7 +725,7 @@ var CMapFactory = (function CMapFactoryClosure() { } } - function parseCMap(cMap, lexer, builtInCMapUrl, useCMap) { + function parseCMap(cMap, lexer, builtInCMapParams, useCMap) { var previous; var embededUseCMap; objLoop: while (true) { @@ -463,28 +771,41 @@ var CMapFactory = (function CMapFactoryClosure() { useCMap = embededUseCMap; } if (useCMap) { - cMap.useCMap = createBuiltInCMap(useCMap, builtInCMapUrl); - // If there aren't any code space ranges defined clone all the parent ones - // into this cMap. - if (cMap.numCodespaceRanges === 0) { - var useCodespaceRanges = cMap.useCMap.codespaceRanges; - for (var i = 0; i < useCodespaceRanges.length; i++) { - cMap.codespaceRanges[i] = useCodespaceRanges[i].slice(); - } - cMap.numCodespaceRanges = cMap.useCMap.numCodespaceRanges; - } - // Merge the map into the current one, making sure not to override - // any previously defined entries. - for (var key in cMap.useCMap.map) { - if (key in cMap.map) { - continue; - } - cMap.map[key] = cMap.useCMap.map[key]; - } + extendCMap(cMap, builtInCMapParams, useCMap); } } - function createBuiltInCMap(name, builtInCMapUrl) { + function extendCMap(cMap, builtInCMapParams, useCMap) { + cMap.useCMap = createBuiltInCMap(useCMap, builtInCMapParams); + // If there aren't any code space ranges defined clone all the parent ones + // into this cMap. + if (cMap.numCodespaceRanges === 0) { + var useCodespaceRanges = cMap.useCMap.codespaceRanges; + for (var i = 0; i < useCodespaceRanges.length; i++) { + cMap.codespaceRanges[i] = useCodespaceRanges[i].slice(); + } + cMap.numCodespaceRanges = cMap.useCMap.numCodespaceRanges; + } + // Merge the map into the current one, making sure not to override + // any previously defined entries. + for (var key in cMap.useCMap.map) { + if (key in cMap.map) { + continue; + } + cMap.map[key] = cMap.useCMap.map[key]; + } + } + + function parseBinaryCMap(name, builtInCMapParams) { + var url = builtInCMapParams.url + name + '.bcmap'; + var cMap = new CMap(true); + new BinaryCMapReader().read(url, cMap, function (useCMap) { + extendCMap(cMap, builtInCMapParams, useCMap); + }); + return cMap; + } + + function createBuiltInCMap(name, builtInCMapParams) { if (name === 'Identity-H') { return new IdentityCMap(false, 2); } else if (name === 'Identity-V') { @@ -493,9 +814,14 @@ var CMapFactory = (function CMapFactoryClosure() { if (BUILT_IN_CMAPS.indexOf(name) === -1) { error('Unknown cMap name: ' + name); } + assert (builtInCMapParams, 'buildin cmap parameters are not provided'); + + if (builtInCMapParams.packed) { + return parseBinaryCMap(name, builtInCMapParams); + } var request = new XMLHttpRequest(); - var url = builtInCMapUrl + name; + var url = builtInCMapParams.url + name; request.open('GET', url, false); request.send(null); if (request.status === 0 && /^https?:/i.test(url)) { @@ -503,19 +829,19 @@ var CMapFactory = (function CMapFactoryClosure() { } var cMap = new CMap(true); var lexer = new Lexer(new StringStream(request.responseText)); - parseCMap(cMap, lexer, builtInCMapUrl, null); + parseCMap(cMap, lexer, builtInCMapParams, null); return cMap; } return { - create: function (encoding, builtInCMapUrl, useCMap) { + create: function (encoding, builtInCMapParams, useCMap) { if (isName(encoding)) { - return createBuiltInCMap(encoding.name, builtInCMapUrl); + return createBuiltInCMap(encoding.name, builtInCMapParams); } else if (isStream(encoding)) { var cMap = new CMap(); var lexer = new Lexer(encoding); try { - parseCMap(cMap, lexer, builtInCMapUrl, useCMap); + parseCMap(cMap, lexer, builtInCMapParams, useCMap); } catch (e) { warn('Invalid CMap data. ' + e); } diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 226532187..e305b9587 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -1259,7 +1259,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { if (isName(cidEncoding)) { properties.cidEncoding = cidEncoding.name; } - properties.cMap = CMapFactory.create(cidEncoding, PDFJS.cMapUrl, null); + properties.cMap = CMapFactory.create(cidEncoding, + { url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null); properties.vertical = properties.cMap.vertical; } this.extractDataStructures(dict, baseDict, xref, properties); diff --git a/src/core/fonts.js b/src/core/fonts.js index ba4bba527..b9b9d2999 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -4223,7 +4223,8 @@ var Font = (function FontClosure() { var ucs2CMapName = new Name(registry + '-' + ordering + '-UCS2'); // d) Obtain the CMap with the name constructed in step (c) (available // from the ASN Web site; see the Bibliography). - var ucs2CMap = CMapFactory.create(ucs2CMapName, PDFJS.cMapUrl, null); + var ucs2CMap = CMapFactory.create(ucs2CMapName, + { url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null); var cMap = properties.cMap; var toUnicode = []; for (var charcode in cMap.map) { diff --git a/src/core/worker.js b/src/core/worker.js index 1539f24bc..4e26e70aa 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -241,6 +241,7 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = { PDFJS.verbosity = data.verbosity; PDFJS.cMapUrl = data.cMapUrl === undefined ? null : data.cMapUrl; + PDFJS.cMapPacked = data.cMapPacked === true; getPdfManager(data).then(function () { pdfManager.onLoadedStream().then(function(stream) { diff --git a/src/display/api.js b/src/display/api.js index 5f3a12969..8ea7d7e7f 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -36,6 +36,12 @@ PDFJS.maxImageSize = PDFJS.maxImageSize === undefined ? -1 : PDFJS.maxImageSize; */ PDFJS.cMapUrl = PDFJS.cMapUrl === undefined ? null : PDFJS.cMapUrl; +/** + * Specifies if CMaps are binary packed. + * @var {boolean} + */ +PDFJS.cMapPacked = PDFJS.cMapPacked === undefined ? false : PDFJS.cMapPacked; + /* * By default fonts are converted to OpenType fonts and loaded via font face * rules. If disabled, the font will be rendered using a built in font renderer @@ -942,6 +948,7 @@ var WorkerTransport = (function WorkerTransportClosure() { disableRange: PDFJS.disableRange, maxImageSize: PDFJS.maxImageSize, cMapUrl: PDFJS.cMapUrl, + cMapPacked: PDFJS.cMapPacked, disableFontFace: PDFJS.disableFontFace, disableCreateObjectURL: PDFJS.disableCreateObjectURL, verbosity: PDFJS.verbosity diff --git a/test/driver.js b/test/driver.js index 464eb2aa3..f27797ad2 100644 --- a/test/driver.js +++ b/test/driver.js @@ -28,7 +28,8 @@ // "firefox-bin: Fatal IO error 12 (Cannot allocate memory) on X server :1." // PDFJS.disableWorker = true; PDFJS.enableStats = true; -PDFJS.cMapUrl = '../external/cmaps/'; +PDFJS.cMapUrl = '../web/cmaps/'; +PDFJS.cMapPacked = true; var appPath, masterMode, browser, canvas, dummyCanvas, currentTaskIdx, manifest, stdout; diff --git a/test/test.py b/test/test.py index 630a30bcf..4d51a7ce4 100644 --- a/test/test.py +++ b/test/test.py @@ -105,6 +105,7 @@ MIMEs = { '.ico': 'image/x-icon', '.png': 'image/png', '.log': 'text/plain', + '.bcmap': 'application/octet-stream', '.properties': 'text/plain' } diff --git a/web/.gitignore b/web/.gitignore index 187ce0e6d..50260bdff 100644 --- a/web/.gitignore +++ b/web/.gitignore @@ -1,3 +1,4 @@ viewer-production.html locale.properties -locale/ \ No newline at end of file +locale/ +cmaps/ diff --git a/web/viewer.js b/web/viewer.js index 84156ca2d..f42c7f077 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -66,6 +66,7 @@ PDFJS.imageResourcesPath = './images/'; PDFJS.cMapUrl = '../external/cmaps/'; //#else //PDFJS.cMapUrl = '../web/cmaps/'; +//PDFJS.cMapPacked = true; //#endif var mozL10n = document.mozL10n || document.webL10n;