parse the page tree and extract pages and their content
This commit is contained in:
parent
7c604ae280
commit
cebd567fa1
79
pdf.js
79
pdf.js
@ -963,6 +963,9 @@ var Dict = (function() {
|
|||||||
get: function(key) {
|
get: function(key) {
|
||||||
return this.map[key];
|
return this.map[key];
|
||||||
},
|
},
|
||||||
|
has: function(key) {
|
||||||
|
return key in this.map;
|
||||||
|
},
|
||||||
set: function(key, value) {
|
set: function(key, value) {
|
||||||
this.map[key] = value;
|
this.map[key] = value;
|
||||||
}
|
}
|
||||||
@ -1011,8 +1014,8 @@ function IsCmd(v, cmd) {
|
|||||||
return v instanceof Cmd && (!cmd || v.cmd == cmd);
|
return v instanceof Cmd && (!cmd || v.cmd == cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
function IsDict(v) {
|
function IsDict(v, type) {
|
||||||
return v instanceof Dict;
|
return v instanceof Dict && (!type || v.get("Type").name == type);
|
||||||
}
|
}
|
||||||
|
|
||||||
function IsArray(v) {
|
function IsArray(v) {
|
||||||
@ -1495,8 +1498,8 @@ var Parser = (function() {
|
|||||||
},
|
},
|
||||||
makeFilter: function(stream, name, params) {
|
makeFilter: function(stream, name, params) {
|
||||||
print(name);
|
print(name);
|
||||||
for (i in params.map)
|
if (params)
|
||||||
print(i + ": " + params.map[i]);
|
error("filter params not supported yet");
|
||||||
// TODO
|
// TODO
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
@ -1707,7 +1710,7 @@ var XRef = (function() {
|
|||||||
if (e.gen != gen)
|
if (e.gen != gen)
|
||||||
throw("inconsistent generation in XRef");
|
throw("inconsistent generation in XRef");
|
||||||
var stream = this.stream.makeSubStream(e.offset);
|
var stream = this.stream.makeSubStream(e.offset);
|
||||||
var parser = new Parser(new Lexer(stream));
|
var parser = new Parser(new Lexer(stream), true);
|
||||||
var obj1 = parser.getObj();
|
var obj1 = parser.getObj();
|
||||||
var obj2 = parser.getObj();
|
var obj2 = parser.getObj();
|
||||||
var obj3 = parser.getObj();
|
var obj3 = parser.getObj();
|
||||||
@ -1737,6 +1740,27 @@ var XRef = (function() {
|
|||||||
return constructor;
|
return constructor;
|
||||||
})();
|
})();
|
||||||
|
|
||||||
|
var Page = (function() {
|
||||||
|
function constructor(xref, pageNumber, pageDict) {
|
||||||
|
this.xref = xref;
|
||||||
|
this.pageNumber = pageNumber;
|
||||||
|
this.pageDict = pageDict;
|
||||||
|
}
|
||||||
|
|
||||||
|
constructor.prototype = {
|
||||||
|
get contents() {
|
||||||
|
var obj = this.pageDict.get("Contents");
|
||||||
|
if (IsRef(obj))
|
||||||
|
obj = this.xref.fetch(obj);
|
||||||
|
if (!(IsArray(obj) || IsStream(obj)))
|
||||||
|
error("invalid page contents object");
|
||||||
|
return this.contents = obj;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return constructor;
|
||||||
|
})();
|
||||||
|
|
||||||
var Catalog = (function() {
|
var Catalog = (function() {
|
||||||
function constructor(xref) {
|
function constructor(xref) {
|
||||||
this.xref = xref;
|
this.xref = xref;
|
||||||
@ -1747,7 +1771,7 @@ var Catalog = (function() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
constructor.prototype = {
|
constructor.prototype = {
|
||||||
get pagesDict() {
|
get toplevelPagesDict() {
|
||||||
var obj = this.catDict.get("Pages");
|
var obj = this.catDict.get("Pages");
|
||||||
if (!IsRef(obj))
|
if (!IsRef(obj))
|
||||||
error("invalid top-level pages reference");
|
error("invalid top-level pages reference");
|
||||||
@ -1755,14 +1779,41 @@ var Catalog = (function() {
|
|||||||
if (!IsDict(obj))
|
if (!IsDict(obj))
|
||||||
error("invalid top-level pages dictionary");
|
error("invalid top-level pages dictionary");
|
||||||
// shadow the prototype getter
|
// shadow the prototype getter
|
||||||
return this.pagesDict = obj;
|
return this.toplevelPagesDict = obj;
|
||||||
},
|
},
|
||||||
get numPages() {
|
get numPages() {
|
||||||
obj = this.pagesDict.get("Count");
|
obj = this.toplevelPagesDict.get("Count");
|
||||||
if (!IsInt(obj))
|
if (!IsInt(obj))
|
||||||
error("page count in top level pages object is not an integer");
|
error("page count in top level pages object is not an integer");
|
||||||
// shadow the prototype getter
|
// shadow the prototype getter
|
||||||
return this.numPages = obj;
|
return this.numPages = obj;
|
||||||
|
},
|
||||||
|
traverseKids: function(pagesDict) {
|
||||||
|
var pageCache = this.pageCache;
|
||||||
|
var kids = pagesDict.get("Kids");
|
||||||
|
if (!IsArray(kids))
|
||||||
|
error("page dictionary kids object is not an array");
|
||||||
|
for (var i = 0; i < kids.length; ++i) {
|
||||||
|
var kid = kids[i];
|
||||||
|
if (!IsRef(kid))
|
||||||
|
error("page dictionary kid is not a reference");
|
||||||
|
var obj = this.xref.fetch(kid);
|
||||||
|
if (IsDict(obj, "Page") || (IsDict(obj) && !obj.has("Kids"))) {
|
||||||
|
pageCache.push(new Page(this.xref, pageCache.length, obj));
|
||||||
|
} else if (IsDict(obj)) { // must be a child page dictionary
|
||||||
|
this.traverseKids(obj);
|
||||||
|
} else {
|
||||||
|
error("page dictionary kid reference points to wrong type of object");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
getPage: function(n) {
|
||||||
|
var pageCache = this.pageCache;
|
||||||
|
if (!pageCache) {
|
||||||
|
pageCache = this.pageCache = [];
|
||||||
|
this.traverseKids(this.toplevelPagesDict);
|
||||||
|
}
|
||||||
|
return this.pageCache[n];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1871,9 +1922,12 @@ var PDFDoc = (function() {
|
|||||||
// overwrite the prototype getter
|
// overwrite the prototype getter
|
||||||
return this.numPages = num;
|
return this.numPages = num;
|
||||||
},
|
},
|
||||||
getPage: function(page) {
|
getPage: function(n) {
|
||||||
print(this.numPages);
|
var linearization = this.linearization;
|
||||||
// TODO
|
if (linearization) {
|
||||||
|
error("linearized page access not implemented");
|
||||||
|
}
|
||||||
|
return this.catalog.getPage(n);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -2665,7 +2719,8 @@ function runParseTests() {
|
|||||||
//var data = snarf("simple_graphics.pdf", "binary");
|
//var data = snarf("simple_graphics.pdf", "binary");
|
||||||
var data = snarf("/tmp/paper.pdf", "binary");
|
var data = snarf("/tmp/paper.pdf", "binary");
|
||||||
var pdf = new PDFDoc(new Stream(data));
|
var pdf = new PDFDoc(new Stream(data));
|
||||||
pdf.getPage(1);
|
var page = pdf.getPage(1);
|
||||||
|
var contents = page.contents;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ("arguments" in this) {
|
if ("arguments" in this) {
|
||||||
|
Loading…
Reference in New Issue
Block a user