parse the page tree and extract pages and their content
This commit is contained in:
parent
7c604ae280
commit
cebd567fa1
79
pdf.js
79
pdf.js
@ -963,6 +963,9 @@ var Dict = (function() {
|
||||
get: function(key) {
|
||||
return this.map[key];
|
||||
},
|
||||
has: function(key) {
|
||||
return key in this.map;
|
||||
},
|
||||
set: function(key, value) {
|
||||
this.map[key] = value;
|
||||
}
|
||||
@ -1011,8 +1014,8 @@ function IsCmd(v, cmd) {
|
||||
return v instanceof Cmd && (!cmd || v.cmd == cmd);
|
||||
}
|
||||
|
||||
function IsDict(v) {
|
||||
return v instanceof Dict;
|
||||
function IsDict(v, type) {
|
||||
return v instanceof Dict && (!type || v.get("Type").name == type);
|
||||
}
|
||||
|
||||
function IsArray(v) {
|
||||
@ -1495,8 +1498,8 @@ var Parser = (function() {
|
||||
},
|
||||
makeFilter: function(stream, name, params) {
|
||||
print(name);
|
||||
for (i in params.map)
|
||||
print(i + ": " + params.map[i]);
|
||||
if (params)
|
||||
error("filter params not supported yet");
|
||||
// TODO
|
||||
return stream;
|
||||
}
|
||||
@ -1707,7 +1710,7 @@ var XRef = (function() {
|
||||
if (e.gen != gen)
|
||||
throw("inconsistent generation in XRef");
|
||||
var stream = this.stream.makeSubStream(e.offset);
|
||||
var parser = new Parser(new Lexer(stream));
|
||||
var parser = new Parser(new Lexer(stream), true);
|
||||
var obj1 = parser.getObj();
|
||||
var obj2 = parser.getObj();
|
||||
var obj3 = parser.getObj();
|
||||
@ -1737,6 +1740,27 @@ var XRef = (function() {
|
||||
return constructor;
|
||||
})();
|
||||
|
||||
var Page = (function() {
|
||||
function constructor(xref, pageNumber, pageDict) {
|
||||
this.xref = xref;
|
||||
this.pageNumber = pageNumber;
|
||||
this.pageDict = pageDict;
|
||||
}
|
||||
|
||||
constructor.prototype = {
|
||||
get contents() {
|
||||
var obj = this.pageDict.get("Contents");
|
||||
if (IsRef(obj))
|
||||
obj = this.xref.fetch(obj);
|
||||
if (!(IsArray(obj) || IsStream(obj)))
|
||||
error("invalid page contents object");
|
||||
return this.contents = obj;
|
||||
}
|
||||
};
|
||||
|
||||
return constructor;
|
||||
})();
|
||||
|
||||
var Catalog = (function() {
|
||||
function constructor(xref) {
|
||||
this.xref = xref;
|
||||
@ -1747,7 +1771,7 @@ var Catalog = (function() {
|
||||
}
|
||||
|
||||
constructor.prototype = {
|
||||
get pagesDict() {
|
||||
get toplevelPagesDict() {
|
||||
var obj = this.catDict.get("Pages");
|
||||
if (!IsRef(obj))
|
||||
error("invalid top-level pages reference");
|
||||
@ -1755,14 +1779,41 @@ var Catalog = (function() {
|
||||
if (!IsDict(obj))
|
||||
error("invalid top-level pages dictionary");
|
||||
// shadow the prototype getter
|
||||
return this.pagesDict = obj;
|
||||
return this.toplevelPagesDict = obj;
|
||||
},
|
||||
get numPages() {
|
||||
obj = this.pagesDict.get("Count");
|
||||
obj = this.toplevelPagesDict.get("Count");
|
||||
if (!IsInt(obj))
|
||||
error("page count in top level pages object is not an integer");
|
||||
// shadow the prototype getter
|
||||
return this.numPages = obj;
|
||||
},
|
||||
traverseKids: function(pagesDict) {
|
||||
var pageCache = this.pageCache;
|
||||
var kids = pagesDict.get("Kids");
|
||||
if (!IsArray(kids))
|
||||
error("page dictionary kids object is not an array");
|
||||
for (var i = 0; i < kids.length; ++i) {
|
||||
var kid = kids[i];
|
||||
if (!IsRef(kid))
|
||||
error("page dictionary kid is not a reference");
|
||||
var obj = this.xref.fetch(kid);
|
||||
if (IsDict(obj, "Page") || (IsDict(obj) && !obj.has("Kids"))) {
|
||||
pageCache.push(new Page(this.xref, pageCache.length, obj));
|
||||
} else if (IsDict(obj)) { // must be a child page dictionary
|
||||
this.traverseKids(obj);
|
||||
} else {
|
||||
error("page dictionary kid reference points to wrong type of object");
|
||||
}
|
||||
}
|
||||
},
|
||||
getPage: function(n) {
|
||||
var pageCache = this.pageCache;
|
||||
if (!pageCache) {
|
||||
pageCache = this.pageCache = [];
|
||||
this.traverseKids(this.toplevelPagesDict);
|
||||
}
|
||||
return this.pageCache[n];
|
||||
}
|
||||
};
|
||||
|
||||
@ -1871,9 +1922,12 @@ var PDFDoc = (function() {
|
||||
// overwrite the prototype getter
|
||||
return this.numPages = num;
|
||||
},
|
||||
getPage: function(page) {
|
||||
print(this.numPages);
|
||||
// TODO
|
||||
getPage: function(n) {
|
||||
var linearization = this.linearization;
|
||||
if (linearization) {
|
||||
error("linearized page access not implemented");
|
||||
}
|
||||
return this.catalog.getPage(n);
|
||||
}
|
||||
};
|
||||
|
||||
@ -2665,7 +2719,8 @@ function runParseTests() {
|
||||
//var data = snarf("simple_graphics.pdf", "binary");
|
||||
var data = snarf("/tmp/paper.pdf", "binary");
|
||||
var pdf = new PDFDoc(new Stream(data));
|
||||
pdf.getPage(1);
|
||||
var page = pdf.getPage(1);
|
||||
var contents = page.contents;
|
||||
}
|
||||
|
||||
if ("arguments" in this) {
|
||||
|
Loading…
Reference in New Issue
Block a user