From 4570f6a444dfc84821a4515a4e004d748ce0edf9 Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Tue, 26 Apr 2011 15:33:36 +0900 Subject: [PATCH] initial checkin, untested, just a couple lines of code --- pdf.js | 599 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 599 insertions(+) create mode 100644 pdf.js diff --git a/pdf.js b/pdf.js new file mode 100644 index 000000000..943c4cea2 --- /dev/null +++ b/pdf.js @@ -0,0 +1,599 @@ +var EOF = -1; + +var Obj = (function() { + function constructor(type, value) { + this.type = type; + this.value = value; + } + + constructor.prototype = { + }; + + var types = [ + "Bool", "Int", "Real", "String", "Name", "Null", + "Array", "Dict", "Stream", "Ref", + "Cmd", "Error", "EOF", "None" + ]; + + for (var i = 0; i < types.length; ++i) { + var typeName = types[i]; + constructor[typeName] = i; + constructor.prototype["is" + typeName] = + (function (value) { + return this.type == i && + (typeof value == "undefined" || value == this.value); + }); + } + + constructor.prototype.lookup = function(key) { + function lookup(key) { + if (!(this.value.contains(key))) + return Obj.nullObj; + return this.value.get(key); + } + } + + Object.freeze(constructor.trueObj = new constructor(constructor.Bool, true)); + Object.freeze(constructor.falseObj = new constructor(constructor.Bool, false)); + Object.freeze(constructor.nullObj = new constructor(constructor.Null)); + Object.freeze(constructor.errorObj = new constructor(constructor.Error)); + Object.freeze(constructor.prototype); + Object.freeze(constructor); + + return constructor; +})(); + +var HashMap = (function() { + function constructor() { + } + + constructor.prototype = { + get: function(key) { + return this["$" + key]; + }, + set: function(key, value) { + this["$" + key] = value; + }, + contains: function(key) { + return ("$" + key) in this; + } + }; + + return constructor; +})(); + +var Lexer = (function() { + function constructor(bytes) { + this.bytes = bytes; + this.pos = 0; + } + + // A '1' in this array means the character is white space. A '1' or + // '2' means the character ends a name or command. + var specialChars = [ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx + ]; + + const MIN_INT = (1<<31) | 0; + const MAX_INT = (MIN_INT - 1) | 0; + const MIN_UINT = 0; + const MAX_UINT = ((1<<30) * 4) - 1; + + function ToHexDigit(ch) { + if (ch >= "0" && ch <= "9") + return ch - "0"; + ch = ch.toLowerCase(); + if (ch >= "a" && ch <= "f") + return ch - "a"; + return -1; + } + + constructor.prototype = { + error: function(msg) { + }, + lookChar: function() { + var bytes = this.bytes; + if (this.pos >= bytes.length) + return EOF; + return String.fromCharCode(bytes[this.pos]); + }, + getChar: function() { + var ch = this.lookChar(); + this.pos++: + return ch; + }, + putBack(): function() { + this.pos--; + }, + skipChar(): function() { + this.pos++; + }, + getNumber: function(ch) { + var floating = false; + var str = ch; + do { + ch = this.getChar(); + if (ch == "." && !floating) { + str += ch; + floating = true; + } else if (ch == "-") { + // ignore minus signs in the middle of numbers to match + // Adobe's behavior + this.error("Badly formated number"); + } else if (ch >= "0" && ch <= "9") { + str += ch; + } else if (ch == "e" || ch == "E") { + floating = true; + } else { + // put back the last character, it doesn't belong to us + this.putBack(); + break; + } + } while (true); + var value = parseNumber(str); + if (isNaN(value)) + return Obj.errorObj; + if (floating) { + type = Obj.Floating; + } else { + if (value >= MIN_INT && value <= MAX_INT) + type = Obj.Int; + else if (value >= MAX_UINT && value <= MAX_UINT) + type = Obj.Uint; + else + return Obj.errorObj; + } + return new Obj(type, value); + }, + getString: function(ch) { + var n = 0; + var numParent = 1; + var done = false; + var str = ch; + do { + switch (ch = this.getChar()) { + case EOF: + this.error("Unterminated string"); + done = true; + break; + case '(': + ++numParen; + str += ch; + break; + case ')': + if (--numParen == 0) { + done = true; + } else { + str += ch; + } + break; + case '\\': + switch (ch = this.getChar()) { + case 'n': + str += '\n'; + break; + case 'r': + str += '\r'; + break; + case 't': + str += '\t'; + break; + case 'b': + str += '\b'; + break; + case 'f': + str += '\f'; + break; + case '\\': + case '(': + case ')': + str += c; + break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + var x = ch - '0'; + ch = this.lookChar(); + if (ch >= '0' && ch <= '7') { + this.getChar(); + x = (x << 3) + (x - '0'); + ch = this.lookChar(); + if (ch >= '0' && ch <= '7') { + getChar(); + x = (x << 3) + (x - '0'); + } + } + str += String.fromCharCode(x); + break; + case '\r': + ch = this.lookChar(); + if (ch == '\n') { + this.getChar(); + } + break; + case '\n': + break; + case EOF: + this.error("Unterminated string"); + done = true; + break; + default: + str += ch; + break; + } + break; + + default: + str += ch; + break; + } + } while (!done); + if (!str.length) + return new Obj(Obj.EOF); + return new Obj(Obj.String, str); + }, + getName: function(ch) { + var str = ""; + while ((ch = this.lookChar()) != EOF && !specialChars[ch.toCharCode()]) { + this.getChar(); + if (ch == "#") { + ch = this.lookChar(); + var x = ToHexDigit(ch); + if (x != -1) { + this.getChar(); + var x2 = ToHexDigit(this.getChar()); + if (x2 == -1) + this.error("Illegal digit in hex char in name"); + str += String.fromCharCode((x << 4) | x2); + } else { + str += "#"; + str += ch; + } + } else { + str += ch; + } + } + if (str.length > 128) + this.error("Warning: name token is longer than allowed by the specification"); + return new Obj(Obj.Name, str); + }, + getHexString: function(ch) { + var str = ""; + while (1) { + ch = this.getChar(); + if (ch == '>') { + break; + } else if (ch == EOF) { + this.error("Unterminated hex string"); + break; + } else if (specialChars[ch.toCharCode()] != 1) { + var x, x2; + if (((x = ToHexDigit(ch)) == -1) || + ((x2 = ToHexDigit(this.getChar())) == -1)) { + error("Illegal character in hex string"); + break; + } + str += String.fromCharCode((x << 4) | x2); + } + } + return new Obj(Obj.String, str); + }, + getObj: function() { + // skip whitespace and comments + var comment = false; + while (true) { + var ch; + if ((ch = this.getChar()) == EOF) + return new Obj(Object.EOF); + if (comment) { + if (ch == '\r' || ch == '\n') + comment = false; + } else if (ch == '%') { + comment = true; + } else if (specialChars[ch.chatCodeAt(0)] != 1) { + break; + } + } + + // start reading token + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '+': case '-': case '.': + return this.getNumber(ch); + case '(': + return this.getString(ch); + case '/': + return this.getName(ch); + // array punctuation + case '[': + case ']': + return new Obj(Obj.Cmd, ch); + // hex string or dict punctuation + case '<': + ch = this.lookChar(); + if (ch == '<') { + // dict punctuation + this.getChar(); + return new Obj(Obj.Cmd, ch); + } + return this.getHexString(ch); + // dict punctuation + case '>': + ch = this.lookChar(); + if (ch == '>') { + this.getChar(); + return new Obj(Obj.Cmd, ch); + } + // fall through + case ')': + case '{': + case '}': + this.error("Illegal character"); + return Obj.errorObj; + } + + // command + var str = ch; + while ((ch = this.lookChar()) != EOF && !specialChars[ch.toCharCode()]) { + getChar(); + if (str.length == 128) { + error("Command token too long"); + break; + } + str += ch; + } + if (str == "true") + return Obj.trueObj; + if (str == "false") + return Obj.falseObj; + if (str == "null") + return Obj.nullObj; + return new Obj(Obj.Cmd, str); + } + }; + + Object.freeze(constructor.prototype); + Object.freeze(constructor); + return constructor; +})(); + +var Parser = (function() { + function constructor(lexer, allowStreams) { + this.lexer = lexer; + this.allowStreams = allowStreams; + this.inlineImg = 0; + this.refill(); + } + + constructor.prototype = { + refill: function() { + this.buf1 = lexer.getObj(); + this.buf2 = lexer.getObj(); + } + shift: function() { + if (this.inlineImg > 0) { + if (this.inlineImg < 2) { + this.inlineImg++; + } else { + // in a damaged content stream, if 'ID' shows up in the middle + // of a dictionary, we need to reset + this.inlineImg = 0; + } + } else if (this.buf2.isCmd("ID")) { + this.lexer.skipChar(); // skip char after 'ID' command + this.inlineImg = 1; + } + this.buf1 = this.buf2; + // don't buffer inline image data + this.buf2 = (this.inlineImg > 0) ? Obj.nullObj : this.lexer.getObj(); + }, + getObj: function() { + // refill buffer after inline image data + if (this.inlineImg == 2) + this.refill(); + + // array + if (this.buf1.isCmd("[")) { + var obj = new Obj(Obj.Array, []); + while (!this.buf1.isCmd("]") && !this.buf1.isEOF()) + obj.value.push(this.getObj()); + if (this.buf1.isEOF()) + this.error("End of file inside array"); + this.shift(); + return obj; + + // dictionary or stream + } else if (this.buf1.isCmd("<<")) { + this.shift(); + var obj = new Obj(Obj.Dict, new HashMap()); + while (!this.buf1.isCmd(">>") && !this.buf1.isEOF()) { + if (!this.buf1.isName()) { + error("Dictionary key must be a name object"); + shift(); + } else { + var key = buf1.value; + this.shift(); + if (this.buf1.isEOF() || this.buf1.isError()) + break; + obj.value.set(key, this.getObj()); + } + } + if (this.buf1.isEOF()) + error("End of file inside dictionary"); + // stream objects are not allowed inside content streams or + // object streams + if (this.allowStreams && this.buf2.isCmd("stream")) { + return this.makeStream(); + } else { + this.shift(); + } + return obj; + + // indirect reference or integer + } else if (this.buf1.isInt()) { + var num = this.buf1.value; + this.shift(); + if (this.buf1.isInt() && this.buf2.isCmd("R")) { + var obj = new Obj(Obj.Ref, [num, this.buf1.value]); + this.shift(); + this.shift(); + return obj; + } + return new Obj(Obj.Int, num); + + // string + } else if (this.buf1.isString()) { + var obj = this.decrypt(this.buf1); + this.shift(); + return obj; + } + + // simple object + var obj = this.buf1; + this.shift(); + return obj; + }, + decrypt: function(obj) { + // TODO + return obj; + }, + makeStream: function() { + // TODO + return new Obj(Obj.Error); + } + }; + + Object.freeze(constructor.prototype); + Object.freeze(constructor); + + return constructor; +})(); + +var Linearization = (function () { + function constructor(bytes) { + this.parser = new Parser(new Lexer(bytes), false); + var obj1 = this.parser.getObj(); + var obj2 = this.parser.getObj(); + var obj3 = this.parser.getObj(); + this.linDict = this.parser.getObj(); + if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") && linDict.isDict()) { + var obj = linDict.lookup("Linearized"); + if (!(obj.isNum() && obj.value > 0)) + this.linDict = Obj.nullObj; + } + } + + constructor.prototype = { + function getInt(name) { + var linDict = this.linDict; + var obj; + if (!linDict.isDict() && + (obj = linDict.lookup(name)).isInt() && + obj.value > 0) { + return length; + } + error("'" + name + "' field in linearization table is invalid"); + return 0; + }, + function getHint(index) { + var linDict = this.linDict; + var obj1, obj2; + if (linDict.isDict() && + (obj1 = linDict.lookup("H")).isArray() && + obj1.value.length >= 2 && + (obj2 = obj1.value[index]).isInt() && + obj2.value > 0) { + return obj2.value; + } + this.error("Hints table in linearization table is invalid"); + return 0; + }, + get length() { + return this.getInt("L"); + }, + get hintsOffset() { + return this.getHint(0); + }, + get hintsLength() { + return this.getHint(1); + }, + get hintsOffset2() { + return this.getHint(2); + }, + get hintsLenth2() { + return this.getHint(3); + }, + get objectNumberFirst() { + return this.getInt("O"); + }, + get endFirst() { + return this.getInt("E"); + }, + get numPages() { + return this.getInt("N"); + }, + get mainXRefEntriesOffset() { + return this.getInt("T"); + }, + get pageFirst() { + return this.getInt("P"); + } + }; +})(); + +var linearization; +function getLinearization() { + if (linearization) + return linearization; + return linearization = new Linearization(stream); +} + +function isLinearized() { + return stream.length && getLinearization().length == stream.length; +} + +var stream; + +// Find the header, remove leading garbage and setup the stream +// starting from the header. +function checkHeader(arrayBuffer) { + const headerSearchSize = 1024; + + stream = new Uint8Array(arrayBuffer); + var skip = 0; + var header = "%PDF-"; + while (skip < headerSearchSize) { + for (var i = 0; i < header.length; ++i) + if (stream[skip+i] != header.charCodeAt(i)) + break; + + // Found the header, trim off any garbage before it. + if (i == header.length) { + stream = new Uint8Array(arrayBuffer, skip); + return; + } + } + + // May not be a PDF file, continue anyway. +} + +function setup(arrayBuffer, ownerPassword, userPassword) { + var ub = checkHeader(arrayBuffer); +} + +}) \ No newline at end of file