parse the page tree and extract pages and their content
This commit is contained in:
		
							parent
							
								
									7c604ae280
								
							
						
					
					
						commit
						cebd567fa1
					
				
							
								
								
									
										79
									
								
								pdf.js
									
									
									
									
									
								
							
							
						
						
									
										79
									
								
								pdf.js
									
									
									
									
									
								
							| @ -963,6 +963,9 @@ var Dict = (function() { | |||||||
|         get: function(key) { |         get: function(key) { | ||||||
|             return this.map[key]; |             return this.map[key]; | ||||||
|         }, |         }, | ||||||
|  |         has: function(key) { | ||||||
|  |             return key in this.map; | ||||||
|  |         }, | ||||||
|         set: function(key, value) { |         set: function(key, value) { | ||||||
|             this.map[key] = value; |             this.map[key] = value; | ||||||
|         } |         } | ||||||
| @ -1011,8 +1014,8 @@ function IsCmd(v, cmd) { | |||||||
|     return v instanceof Cmd && (!cmd || v.cmd == cmd); |     return v instanceof Cmd && (!cmd || v.cmd == cmd); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function IsDict(v) { | function IsDict(v, type) { | ||||||
|     return v instanceof Dict; |     return v instanceof Dict && (!type || v.get("Type").name == type); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function IsArray(v) { | function IsArray(v) { | ||||||
| @ -1495,8 +1498,8 @@ var Parser = (function() { | |||||||
|         }, |         }, | ||||||
|         makeFilter: function(stream, name, params) { |         makeFilter: function(stream, name, params) { | ||||||
|             print(name); |             print(name); | ||||||
|             for (i in params.map) |             if (params) | ||||||
|                 print(i + ": " + params.map[i]); |                 error("filter params not supported yet"); | ||||||
|             // TODO
 |             // TODO
 | ||||||
|             return stream; |             return stream; | ||||||
|         } |         } | ||||||
| @ -1707,7 +1710,7 @@ var XRef = (function() { | |||||||
|                 if (e.gen != gen) |                 if (e.gen != gen) | ||||||
|                     throw("inconsistent generation in XRef"); |                     throw("inconsistent generation in XRef"); | ||||||
|                 var stream = this.stream.makeSubStream(e.offset); |                 var stream = this.stream.makeSubStream(e.offset); | ||||||
|                 var parser = new Parser(new Lexer(stream)); |                 var parser = new Parser(new Lexer(stream), true); | ||||||
|                 var obj1 = parser.getObj(); |                 var obj1 = parser.getObj(); | ||||||
|                 var obj2 = parser.getObj(); |                 var obj2 = parser.getObj(); | ||||||
|                 var obj3 = parser.getObj(); |                 var obj3 = parser.getObj(); | ||||||
| @ -1737,6 +1740,27 @@ var XRef = (function() { | |||||||
|     return constructor; |     return constructor; | ||||||
| })(); | })(); | ||||||
| 
 | 
 | ||||||
|  | var Page = (function() { | ||||||
|  |     function constructor(xref, pageNumber, pageDict) { | ||||||
|  |         this.xref = xref; | ||||||
|  |         this.pageNumber = pageNumber; | ||||||
|  |         this.pageDict = pageDict; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constructor.prototype = { | ||||||
|  |         get contents() { | ||||||
|  |             var obj = this.pageDict.get("Contents"); | ||||||
|  |             if (IsRef(obj)) | ||||||
|  |                 obj = this.xref.fetch(obj); | ||||||
|  |             if (!(IsArray(obj) || IsStream(obj))) | ||||||
|  |                 error("invalid page contents object"); | ||||||
|  |             return this.contents = obj; | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     return constructor; | ||||||
|  | })(); | ||||||
|  | 
 | ||||||
| var Catalog = (function() { | var Catalog = (function() { | ||||||
|     function constructor(xref) { |     function constructor(xref) { | ||||||
|         this.xref = xref; |         this.xref = xref; | ||||||
| @ -1747,7 +1771,7 @@ var Catalog = (function() { | |||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     constructor.prototype = { |     constructor.prototype = { | ||||||
|         get pagesDict() { |         get toplevelPagesDict() { | ||||||
|             var obj = this.catDict.get("Pages"); |             var obj = this.catDict.get("Pages"); | ||||||
|             if (!IsRef(obj)) |             if (!IsRef(obj)) | ||||||
|                 error("invalid top-level pages reference"); |                 error("invalid top-level pages reference"); | ||||||
| @ -1755,14 +1779,41 @@ var Catalog = (function() { | |||||||
|             if (!IsDict(obj)) |             if (!IsDict(obj)) | ||||||
|                 error("invalid top-level pages dictionary"); |                 error("invalid top-level pages dictionary"); | ||||||
|             // shadow the prototype getter
 |             // shadow the prototype getter
 | ||||||
|             return this.pagesDict = obj; |             return this.toplevelPagesDict = obj; | ||||||
|         }, |         }, | ||||||
|         get numPages() { |         get numPages() { | ||||||
|             obj = this.pagesDict.get("Count"); |             obj = this.toplevelPagesDict.get("Count"); | ||||||
|             if (!IsInt(obj)) |             if (!IsInt(obj)) | ||||||
|                 error("page count in top level pages object is not an integer"); |                 error("page count in top level pages object is not an integer"); | ||||||
|             // shadow the prototype getter
 |             // shadow the prototype getter
 | ||||||
|             return this.numPages = obj; |             return this.numPages = obj; | ||||||
|  |         }, | ||||||
|  |         traverseKids: function(pagesDict) { | ||||||
|  |             var pageCache = this.pageCache; | ||||||
|  |             var kids = pagesDict.get("Kids"); | ||||||
|  |             if (!IsArray(kids)) | ||||||
|  |                 error("page dictionary kids object is not an array"); | ||||||
|  |             for (var i = 0; i < kids.length; ++i) { | ||||||
|  |                 var kid = kids[i]; | ||||||
|  |                 if (!IsRef(kid)) | ||||||
|  |                     error("page dictionary kid is not a reference"); | ||||||
|  |                 var obj = this.xref.fetch(kid); | ||||||
|  |                 if (IsDict(obj, "Page") || (IsDict(obj) && !obj.has("Kids"))) { | ||||||
|  |                     pageCache.push(new Page(this.xref, pageCache.length, obj)); | ||||||
|  |                 } else if (IsDict(obj)) { // must be a child page dictionary
 | ||||||
|  |                     this.traverseKids(obj); | ||||||
|  |                 } else { | ||||||
|  |                     error("page dictionary kid reference points to wrong type of object"); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |         getPage: function(n) { | ||||||
|  |             var pageCache = this.pageCache; | ||||||
|  |             if (!pageCache) { | ||||||
|  |                 pageCache = this.pageCache = []; | ||||||
|  |                 this.traverseKids(this.toplevelPagesDict); | ||||||
|  |             } | ||||||
|  |             return this.pageCache[n]; | ||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
| @ -1871,9 +1922,12 @@ var PDFDoc = (function() { | |||||||
|             // overwrite the prototype getter
 |             // overwrite the prototype getter
 | ||||||
|             return this.numPages = num; |             return this.numPages = num; | ||||||
|         }, |         }, | ||||||
|         getPage: function(page) { |         getPage: function(n) { | ||||||
|             print(this.numPages); |             var linearization = this.linearization; | ||||||
|             // TODO
 |             if (linearization) { | ||||||
|  |                 error("linearized page access not implemented"); | ||||||
|  |             } | ||||||
|  |             return this.catalog.getPage(n); | ||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
| @ -2665,7 +2719,8 @@ function runParseTests() { | |||||||
|     //var data = snarf("simple_graphics.pdf", "binary");
 |     //var data = snarf("simple_graphics.pdf", "binary");
 | ||||||
|     var data = snarf("/tmp/paper.pdf", "binary"); |     var data = snarf("/tmp/paper.pdf", "binary"); | ||||||
|     var pdf = new PDFDoc(new Stream(data)); |     var pdf = new PDFDoc(new Stream(data)); | ||||||
|     pdf.getPage(1); |     var page = pdf.getPage(1); | ||||||
|  |     var contents = page.contents; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| if ("arguments" in this) { | if ("arguments" in this) { | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user