Attempt to handle corrupt PDF documents that inline Page dictionaries in a Kids array (issue 9540)
According to the specification, see https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G6.1942297, the contents of a Kids array should be indirect objects.
This commit is contained in:
		
							parent
							
								
									6662985a20
								
							
						
					
					
						commit
						d431ae069d
					
				| @ -501,7 +501,8 @@ var Catalog = (function CatalogClosure() { | ||||
|           } | ||||
| 
 | ||||
|           count = currentNode.get('Count'); | ||||
|           // Cache the Kids count, since it can reduce redundant lookups in long
 | ||||
|           if (Number.isInteger(count) && count >= 0) { | ||||
|             // Cache the Kids count, since it can reduce redundant lookups in
 | ||||
|             // documents where all nodes are found at *one* level of the tree.
 | ||||
|             var objId = currentNode.objId; | ||||
|             if (objId && !pageKidsCountCache.has(objId)) { | ||||
| @ -512,9 +513,23 @@ var Catalog = (function CatalogClosure() { | ||||
|               currentPageIndex += count; | ||||
|               continue; | ||||
|             } | ||||
|           } | ||||
| 
 | ||||
|           var kids = currentNode.get('Kids'); | ||||
|           if (!Array.isArray(kids)) { | ||||
|             // Prevent errors in corrupt PDF documents that violate the
 | ||||
|             // specification by *inlining* Page dicts directly in the Kids
 | ||||
|             // array, rather than using indirect objects (fixes issue9540.pdf).
 | ||||
|             if (isName(currentNode.get('Type'), 'Page') || | ||||
|                 (!currentNode.has('Type') && currentNode.has('Contents'))) { | ||||
|               if (currentPageIndex === pageIndex) { | ||||
|                 capability.resolve([currentNode, null]); | ||||
|                 return; | ||||
|               } | ||||
|               currentPageIndex++; | ||||
|               continue; | ||||
|             } | ||||
| 
 | ||||
|             capability.reject(new FormatError( | ||||
|               'page dictionary kids object is not an array')); | ||||
|             return; | ||||
| @ -574,11 +589,14 @@ var Catalog = (function CatalogClosure() { | ||||
|             if (!isRef(kid)) { | ||||
|               throw new FormatError('kid must be a Ref.'); | ||||
|             } | ||||
|             if (kid.num === kidRef.num) { | ||||
|             if (isRefsEqual(kid, kidRef)) { | ||||
|               found = true; | ||||
|               break; | ||||
|             } | ||||
|             kidPromises.push(xref.fetchAsync(kid).then(function (kid) { | ||||
|               if (!isDict(kid)) { | ||||
|                 throw new FormatError('kid node must be a Dict.'); | ||||
|               } | ||||
|               if (kid.has('Count')) { | ||||
|                 var count = kid.get('Count'); | ||||
|                 total += count; | ||||
|  | ||||
							
								
								
									
										1
									
								
								test/pdfs/issue9540.pdf.link
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/issue9540.pdf.link
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| https://github.com/mozilla/pdf.js/files/1793688/Problem.pdf | ||||
| @ -875,6 +875,13 @@ | ||||
|        "lastPage": 1, | ||||
|        "type": "eq" | ||||
|     }, | ||||
|     {  "id": "issue9540", | ||||
|        "file": "pdfs/issue9540.pdf", | ||||
|        "md5": "7de7979270c9136bdd737428185fbbed", | ||||
|        "rounds": 1, | ||||
|        "link": true, | ||||
|        "type": "eq" | ||||
|     }, | ||||
|     {  "id": "txt2pdf", | ||||
|        "file": "pdfs/txt2pdf.pdf", | ||||
|        "md5": "02cefa0f5e8d96313bb05163b2f88c8c", | ||||
|  | ||||
| @ -354,6 +354,9 @@ class PDFLinkService { | ||||
|    * @param {Object} pageRef - reference to the page. | ||||
|    */ | ||||
|   cachePageRef(pageNum, pageRef) { | ||||
|     if (!pageRef) { | ||||
|       return; | ||||
|     } | ||||
|     let refStr = pageRef.num + ' ' + pageRef.gen + ' R'; | ||||
|     this._pagesRefCache[refStr] = pageNum; | ||||
|   } | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user