From 7a0ba61a2a336deb8ae43dae49746f1780705f86 Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Tue, 14 Jun 2011 18:31:14 -0700 Subject: [PATCH 01/10] compile PDF command streams into JS code --- pdf.js | 136 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 82 insertions(+), 54 deletions(-) diff --git a/pdf.js b/pdf.js index 418db2462..bea42ad2e 100644 --- a/pdf.js +++ b/pdf.js @@ -1395,8 +1395,7 @@ var Page = (function() { gfx.beginDrawing({ x: mediaBox[0], y: mediaBox[1], width: mediaBox[2] - mediaBox[0], height: mediaBox[3] - mediaBox[1] }); - gfx.interpret(new Parser(new Lexer(contents), false), - xref, resources); + gfx.execute(contents, xref, resources); gfx.endDrawing(); } }; @@ -1605,65 +1604,65 @@ var CanvasGraphics = (function() { this.xobjs = null; this.map = { // Graphics state - w: this.setLineWidth, - J: this.setLineCap, - j: this.setLineJoin, - d: this.setDash, - ri: this.setRenderingIntent, - i: this.setFlatness, - gs: this.setGState, - q: this.save, - Q: this.restore, - cm: this.transform, + w: "setLineWidth", + J: "setLineCap", + j: "setLineJoin", + d: "setDash", + ri: "setRenderingIntent", + i: "setFlatness", + gs: "setGState", + q: "save", + Q: "restore", + cm: "transform", // Path - m: this.moveTo, - l: this.lineTo, - c: this.curveTo, - h: this.closePath, - re: this.rectangle, - S: this.stroke, - f: this.fill, - "f*": this.eoFill, - B: this.fillStroke, - b: this.closeFillStroke, - n: this.endPath, + m: "moveTo", + l: "lineTo", + c: "curveTo", + h: "closePath", + re: "rectangle", + S: "stroke", + f: "fill", + "f*": "eoFill", + B: "fillStroke", + b: "closeFillStroke", + n: "endPath", // Clipping - W: this.clip, - "W*": this.eoClip, + W: "clip", + "W*": "eoClip", // Text - BT: this.beginText, - ET: this.endText, - TL: this.setLeading, - Tf: this.setFont, - Td: this.moveText, - Tm: this.setTextMatrix, - "T*": this.nextLine, - Tj: this.showText, - TJ: this.showSpacedText, + BT: "beginText", + ET: "endText", + TL: "setLeading", + Tf: "setFont", + Td: "moveText", + Tm: "setTextMatrix", + "T*": "nextLine", + Tj: "showText", + TJ: "showSpacedText", // Type3 fonts // Color - CS: this.setStrokeColorSpace, - cs: this.setFillColorSpace, - SC: this.setStrokeColor, - SCN: this.setStrokeColorN, - sc: this.setFillColor, - scn: this.setFillColorN, - G: this.setStrokeGray, - g: this.setFillGray, - RG: this.setStrokeRGBColor, - rg: this.setFillRGBColor, + CS: "setStrokeColorSpace", + cs: "setFillColorSpace", + SC: "setStrokeColor", + SCN: "setStrokeColorN", + sc: "setFillColor", + scn: "setFillColorN", + G: "setStrokeGray", + g: "setFillGray", + RG: "setStrokeRGBColor", + rg: "setFillRGBColor", // Shading - sh: this.shadingFill, + sh: "shadingFill", // Images // XObjects - Do: this.paintXObject, + Do: "paintXObject", // Marked content // Compatibility @@ -1683,13 +1682,38 @@ var CanvasGraphics = (function() { this.ctx.translate(0, -mediaBox.height); }, - interpret: function(parser, xref, resources) { + execute: function(stream, xref, resources) { + if (!stream.execute) + this.compile(stream, xref, resources); + var savedXref = this.xref, savedRes = this.res, savedXobjs = this.xobjs; this.xref = xref; this.res = resources || new Dict(); this.xobjs = this.res.get("XObject") || new Dict(); this.xobjs = this.xref.fetchIfRef(this.xobjs); + stream.execute(this, stream.objpool); + + this.xobjs = savedXobjs; + this.res = savedRes; + this.xref = savedXref; + }, + + compile: function(stream, xref, resources) { + var parser = new Parser(new Lexer(stream), false); + var objpool = []; + + function emitArg(arg) { + if (typeof arg == "object" || typeof arg == "string") { + var index = objpool.length; + objpool[index] = arg; + return "objpool[" + index + "]"; + } + return arg; + } + + var src = "{\n"; + var args = []; var map = this.map; var obj; @@ -1699,7 +1723,12 @@ var CanvasGraphics = (function() { var fn = map[cmd]; assertWellFormed(fn, "Unknown command '" + cmd + "'"); // TODO figure out how to type-check vararg functions - fn.apply(this, args); + + src += "gfx."; + src += fn; + src += "("; + src += args.map(emitArg).join(","); + src += ");\n"; args.length = 0; } else { @@ -1708,9 +1737,10 @@ var CanvasGraphics = (function() { } } - this.xobjs = savedXobjs; - this.res = savedRes; - this.xref = savedXref; + src += "}"; + + stream.execute = new Function("gfx", "objpool", src); + stream.objpool = objpool; }, endDrawing: function() { @@ -2026,9 +2056,7 @@ var CanvasGraphics = (function() { this.clip(); this.endPath(); } - - this.interpret(new Parser(new Lexer(form), false), - this.xref, form.dict.get("Resources")); + this.execute(form, this.xref, form.dict.get("Resources")); this.restore(); }, From e7d6b47099ebccb589bf5d109e7a83799c77dfa3 Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Tue, 14 Jun 2011 20:36:45 -0700 Subject: [PATCH 02/10] return ready-to-run closure from compile that captures its objpool --- pdf.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pdf.js b/pdf.js index bea42ad2e..20299909e 100644 --- a/pdf.js +++ b/pdf.js @@ -1684,7 +1684,7 @@ var CanvasGraphics = (function() { execute: function(stream, xref, resources) { if (!stream.execute) - this.compile(stream, xref, resources); + stream.execute = this.compile(stream, xref, resources); var savedXref = this.xref, savedRes = this.res, savedXobjs = this.xobjs; this.xref = xref; @@ -1692,7 +1692,7 @@ var CanvasGraphics = (function() { this.xobjs = this.res.get("XObject") || new Dict(); this.xobjs = this.xref.fetchIfRef(this.xobjs); - stream.execute(this, stream.objpool); + stream.execute(this); this.xobjs = savedXobjs; this.res = savedRes; @@ -1724,7 +1724,7 @@ var CanvasGraphics = (function() { assertWellFormed(fn, "Unknown command '" + cmd + "'"); // TODO figure out how to type-check vararg functions - src += "gfx."; + src += "this."; src += fn; src += "("; src += args.map(emitArg).join(","); @@ -1739,8 +1739,8 @@ var CanvasGraphics = (function() { src += "}"; - stream.execute = new Function("gfx", "objpool", src); - stream.objpool = objpool; + var fn = new Function("objpool", src); + return function (gfx) { fn.call(gfx, objpool); }; }, endDrawing: function() { From e8ce0b361d524aa3ee5b91f8b0dccd10f85d51f9 Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Tue, 14 Jun 2011 22:54:49 -0700 Subject: [PATCH 03/10] eagerly compile XForm objects --- pdf.js | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/pdf.js b/pdf.js index 20299909e..48e20c455 100644 --- a/pdf.js +++ b/pdf.js @@ -1689,8 +1689,7 @@ var CanvasGraphics = (function() { var savedXref = this.xref, savedRes = this.res, savedXobjs = this.xobjs; this.xref = xref; this.res = resources || new Dict(); - this.xobjs = this.res.get("XObject") || new Dict(); - this.xobjs = this.xref.fetchIfRef(this.xobjs); + this.xobjs = xref.fetchIfRef(this.res.get("XObject")) || new Dict(); stream.execute(this); @@ -1700,6 +1699,8 @@ var CanvasGraphics = (function() { }, compile: function(stream, xref, resources) { + var xobjs = xref.fetchIfRef(resources.get("XObject")) || new Dict(); + var parser = new Parser(new Lexer(stream), false); var objpool = []; @@ -1724,6 +1725,22 @@ var CanvasGraphics = (function() { assertWellFormed(fn, "Unknown command '" + cmd + "'"); // TODO figure out how to type-check vararg functions + if (cmd == "Do") { // eagerly compile XForm objects + var name = args[0].name; + var xobj = xobjs.get(name); + if (xobj) { + xobj = xref.fetchIfRef(xobj); + assertWellFormed(IsStream(xobj), "XObject should be a stream"); + + var type = xobj.dict.get("Subtype"); + assertWellFormed(IsName(type), "XObject should have a Name subtype"); + + if ("Form" == type.name) { + this.compile(xobj, xref, xobj.dict.get("Resources")); + } + } + } + src += "this."; src += fn; src += "("; From d1b9e4054a322a5a91cddbae30e9e1ab6937ba7c Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Tue, 14 Jun 2011 23:16:53 -0700 Subject: [PATCH 04/10] cache results of compilation --- pdf.js | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/pdf.js b/pdf.js index 48e20c455..d65151e46 100644 --- a/pdf.js +++ b/pdf.js @@ -1395,7 +1395,9 @@ var Page = (function() { gfx.beginDrawing({ x: mediaBox[0], y: mediaBox[1], width: mediaBox[2] - mediaBox[0], height: mediaBox[3] - mediaBox[1] }); - gfx.execute(contents, xref, resources); + if (!this.code) + this.code = gfx.compile(contents, xref, resources); + gfx.execute(this.code, xref, resources); gfx.endDrawing(); } }; @@ -1682,16 +1684,13 @@ var CanvasGraphics = (function() { this.ctx.translate(0, -mediaBox.height); }, - execute: function(stream, xref, resources) { - if (!stream.execute) - stream.execute = this.compile(stream, xref, resources); - + execute: function(code, xref, resources) { var savedXref = this.xref, savedRes = this.res, savedXobjs = this.xobjs; this.xref = xref; this.res = resources || new Dict(); this.xobjs = xref.fetchIfRef(this.res.get("XObject")) || new Dict(); - stream.execute(this); + code(this); this.xobjs = savedXobjs; this.res = savedRes; @@ -1699,6 +1698,7 @@ var CanvasGraphics = (function() { }, compile: function(stream, xref, resources) { + console.log("compiling"); var xobjs = xref.fetchIfRef(resources.get("XObject")) || new Dict(); var parser = new Parser(new Lexer(stream), false); @@ -1725,7 +1725,7 @@ var CanvasGraphics = (function() { assertWellFormed(fn, "Unknown command '" + cmd + "'"); // TODO figure out how to type-check vararg functions - if (cmd == "Do") { // eagerly compile XForm objects + if (cmd == "Do" && !args[0].code) { // eagerly compile XForm objects var name = args[0].name; var xobj = xobjs.get(name); if (xobj) { @@ -1736,7 +1736,7 @@ var CanvasGraphics = (function() { assertWellFormed(IsName(type), "XObject should have a Name subtype"); if ("Form" == type.name) { - this.compile(xobj, xref, xobj.dict.get("Resources")); + args[0].code = this.compile(xobj, xref, xobj.dict.get("Resources")); } } } @@ -2050,9 +2050,9 @@ var CanvasGraphics = (function() { var type = xobj.dict.get("Subtype"); assertWellFormed(IsName(type), "XObject should have a Name subtype"); if ("Image" == type.name) { - this.paintImageXObject(xobj, false); + this.paintImageXObject(obj, xobj, false); } else if ("Form" == type.name) { - this.paintFormXObject(xobj); + this.paintFormXObject(obj, xobj); } else if ("PS" == type.name) { warn("(deprecated) PostScript XObjects are not supported"); } else { @@ -2060,25 +2060,26 @@ var CanvasGraphics = (function() { } }, - paintFormXObject: function(form) { + paintFormXObject: function(ref, stream) { this.save(); - var matrix = form.dict.get("Matrix"); + var matrix = stream.dict.get("Matrix"); if (matrix && IsArray(matrix) && 6 == matrix.length) this.transform.apply(this, matrix); - var bbox = form.dict.get("BBox"); + var bbox = stream.dict.get("BBox"); if (bbox && IsArray(bbox) && 4 == bbox.length) { this.rectangle.apply(this, bbox); this.clip(); this.endPath(); } - this.execute(form, this.xref, form.dict.get("Resources")); + + this.execute(ref.code, this.xref, stream.dict.get("Resources")); this.restore(); }, - paintImageXObject: function(image, inline) { + paintImageXObject: function(ref, image, inline) { this.save(); if (image.getParams) { // JPX/JPEG2000 streams directly contain bits per component From 662fab04ca6552f9fcc35c167b25ccf0f0ee3515 Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Tue, 14 Jun 2011 23:22:19 -0700 Subject: [PATCH 05/10] pdf is using a dumb name, content makes much more sense than Contents --- pdf.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pdf.js b/pdf.js index d65151e46..23f249781 100644 --- a/pdf.js +++ b/pdf.js @@ -1373,8 +1373,8 @@ var Page = (function() { } constructor.prototype = { - get contents() { - return shadow(this, "contents", this.pageDict.get("Contents")); + get content() { + return shadow(this, "content", this.pageDict.get("Contents")); }, get resources() { return shadow(this, "resources", this.pageDict.get("Resources")); @@ -1387,16 +1387,16 @@ var Page = (function() { }, display: function(gfx) { var xref = this.xref; - var contents = xref.fetchIfRef(this.contents); + var content = xref.fetchIfRef(this.content); var resources = xref.fetchIfRef(this.resources); var mediaBox = xref.fetchIfRef(this.mediaBox); - assertWellFormed(IsStream(contents) && IsDict(resources), - "invalid page contents or resources"); + assertWellFormed(IsStream(content) && IsDict(resources), + "invalid page content or resources"); gfx.beginDrawing({ x: mediaBox[0], y: mediaBox[1], width: mediaBox[2] - mediaBox[0], height: mediaBox[3] - mediaBox[1] }); if (!this.code) - this.code = gfx.compile(contents, xref, resources); + this.code = gfx.compile(content, xref, resources); gfx.execute(this.code, xref, resources); gfx.endDrawing(); } From d94b3006a3b5e7bedac3e27e2c9139d2b41a3597 Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Tue, 14 Jun 2011 23:34:11 -0700 Subject: [PATCH 06/10] eagerly translate all fonts (a no-op currently) --- pdf.js | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pdf.js b/pdf.js index 23f249781..5aec4e76a 100644 --- a/pdf.js +++ b/pdf.js @@ -1677,6 +1677,10 @@ var CanvasGraphics = (function() { const EO_CLIP = {}; constructor.prototype = { + translateFont: function(fontDict) { + return fontDict; + }, + beginDrawing: function(mediaBox) { var cw = this.ctx.canvas.width, ch = this.ctx.canvas.height; this.ctx.save(); @@ -1698,7 +1702,6 @@ var CanvasGraphics = (function() { }, compile: function(stream, xref, resources) { - console.log("compiling"); var xobjs = xref.fetchIfRef(resources.get("XObject")) || new Dict(); var parser = new Parser(new Lexer(stream), false); @@ -1739,6 +1742,15 @@ var CanvasGraphics = (function() { args[0].code = this.compile(xobj, xref, xobj.dict.get("Resources")); } } + } else if (cmd == "Tf") { // eagerly collect all fonts + var fontRes = resources.get("Font"); + if (fontRes) { + fontRes = xref.fetchIfRef(fontRes); + var font = xref.fetchIfRef(fontRes.get(args[0].name)); + assertWellFormed(IsDict(font)); + if (!font.translated) + font.translated = this.translateFont(font); + } } src += "this."; From cf4bca7813da34114218f51369d874e6327abcfe Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Tue, 14 Jun 2011 23:41:26 -0700 Subject: [PATCH 07/10] completed async font loading framework --- pdf.js | 25 ++++++++++++++++++++----- test.html | 7 +++++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/pdf.js b/pdf.js index 5aec4e76a..633437e7e 100644 --- a/pdf.js +++ b/pdf.js @@ -1385,6 +1385,14 @@ var Page = (function() { ? obj : null)); }, + compile: function(gfx, fonts) { + if (!this.code) { + var xref = this.xref; + var content = xref.fetchIfRef(this.content); + var resources = xref.fetchIfRef(this.resources); + this.code = gfx.compile(content, xref, resources, fonts); + } + }, display: function(gfx) { var xref = this.xref; var content = xref.fetchIfRef(this.content); @@ -1395,8 +1403,6 @@ var Page = (function() { gfx.beginDrawing({ x: mediaBox[0], y: mediaBox[1], width: mediaBox[2] - mediaBox[0], height: mediaBox[3] - mediaBox[1] }); - if (!this.code) - this.code = gfx.compile(content, xref, resources); gfx.execute(this.code, xref, resources); gfx.endDrawing(); } @@ -1701,7 +1707,7 @@ var CanvasGraphics = (function() { this.xref = savedXref; }, - compile: function(stream, xref, resources) { + compile: function(stream, xref, resources, fonts) { var xobjs = xref.fetchIfRef(resources.get("XObject")) || new Dict(); var parser = new Parser(new Lexer(stream), false); @@ -1739,7 +1745,10 @@ var CanvasGraphics = (function() { assertWellFormed(IsName(type), "XObject should have a Name subtype"); if ("Form" == type.name) { - args[0].code = this.compile(xobj, xref, xobj.dict.get("Resources")); + args[0].code = this.compile(xobj, + xref, + xobj.dict.get("Resources"), + fonts); } } } else if (cmd == "Tf") { // eagerly collect all fonts @@ -1748,8 +1757,14 @@ var CanvasGraphics = (function() { fontRes = xref.fetchIfRef(fontRes); var font = xref.fetchIfRef(fontRes.get(args[0].name)); assertWellFormed(IsDict(font)); - if (!font.translated) + if (!font.translated) { font.translated = this.translateFont(font); + if (fonts && font.translated) { + // keep track of each font we translated so the caller can + // load them asynchronously before calling display on a page + fonts.push(font.translated); + } + } } } diff --git a/test.html b/test.html index f78f22ce2..e59d0577e 100644 --- a/test.html +++ b/test.html @@ -95,6 +95,13 @@ function displayPage(num) { ctx.restore(); var gfx = new CanvasGraphics(ctx); + + // page.compile will collect all fonts for us, once we have loaded them + // we can trigger the actual page rendering with page.display + var fonts = []; + page.compile(gfx, fonts); + + // This should be called when font loading is complete page.display(gfx); var t2 = Date.now(); From 02df7f8e5888f9b7c78b55157cd596423462c484 Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Tue, 14 Jun 2011 23:44:59 -0700 Subject: [PATCH 08/10] clarify API a bit and hand in xref and resources to ease translation --- pdf.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pdf.js b/pdf.js index 633437e7e..6e6935f13 100644 --- a/pdf.js +++ b/pdf.js @@ -1683,8 +1683,8 @@ var CanvasGraphics = (function() { const EO_CLIP = {}; constructor.prototype = { - translateFont: function(fontDict) { - return fontDict; + translateFont: function(fontDict, xref, resources) { + return "translated"; }, beginDrawing: function(mediaBox) { @@ -1758,7 +1758,7 @@ var CanvasGraphics = (function() { var font = xref.fetchIfRef(fontRes.get(args[0].name)); assertWellFormed(IsDict(font)); if (!font.translated) { - font.translated = this.translateFont(font); + font.translated = this.translateFont(font, xref, resources); if (fonts && font.translated) { // keep track of each font we translated so the caller can // load them asynchronously before calling display on a page From 595f00f82a269b96a3e15f604b9c24389c11f5e7 Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Wed, 15 Jun 2011 00:20:26 -0700 Subject: [PATCH 09/10] measure load/compile/render times --- test.html | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test.html b/test.html index e59d0577e..9cadf2e1f 100644 --- a/test.html +++ b/test.html @@ -101,12 +101,14 @@ function displayPage(num) { var fonts = []; page.compile(gfx, fonts); + var t2 = Date.now(); + // This should be called when font loading is complete page.display(gfx); - var t2 = Date.now(); + var t3 = Date.now(); - infoDisplay.innerHTML = "Time to render: "+ (t1 - t0) + "/" + (t2 - t1) + " ms"; + infoDisplay.innerHTML = "Time to load/compile/render: "+ (t1 - t0) + "/" + (t2 - t1) + "/" + (t3 - t2) + " ms"; } function nextPage() { From 815544ab814eb1c659cab1e9cdc082ae1b6ce37e Mon Sep 17 00:00:00 2001 From: Andreas Gal Date: Wed, 15 Jun 2011 00:37:15 -0700 Subject: [PATCH 10/10] fixes suggested by @brendaneich --- pdf.js | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/pdf.js b/pdf.js index 6e6935f13..3468c7b88 100644 --- a/pdf.js +++ b/pdf.js @@ -590,7 +590,7 @@ function IsString(v) { } function IsNull(v) { - return v == null; + return v === null; } function IsName(v) { @@ -617,27 +617,6 @@ function IsRef(v) { return v instanceof Ref; } -function IsFunction(v) { - var fnDict; - if (typeof v != "object") - return false; - else if (IsDict(v)) - fnDict = v; - else if (IsStream(v)) - fnDict = v.dict; - else - return false; - return fnDict.has("FunctionType"); -} - -function IsFunctionDict(v) { - return IsFunction(v) && IsDict(v); -} - -function IsFunctionStream(v) { - return IsFunction(v) && IsStream(v); -} - var EOF = {}; function IsEOF(v) { @@ -841,10 +820,12 @@ var Lexer = (function() { ch = stream.getChar(); if (ch == '>') { break; - } else if (!ch) { + } + if (!ch) { warn("Unterminated hex string"); break; - } else if (specialChars[ch.charCodeAt(0)] != 1) { + } + if (specialChars[ch.charCodeAt(0)] != 1) { var x, x2; if (((x = ToHexDigit(ch)) == -1) || ((x2 = ToHexDigit(stream.getChar())) == -1)) { @@ -1722,7 +1703,7 @@ var CanvasGraphics = (function() { return arg; } - var src = "{\n"; + var src = ""; var args = []; var map = this.map; @@ -1781,9 +1762,7 @@ var CanvasGraphics = (function() { } } - src += "}"; - - var fn = new Function("objpool", src); + var fn = Function("objpool", src); return function (gfx) { fn.call(gfx, objpool); }; },