Merge pull request #14400 from Snuffleupagus/getPageDict-async
[api-minor] Convert `Catalog.getPageDict` to an asynchronous method
This commit is contained in:
		
						commit
						e42d54e1b5
					
				| @ -34,7 +34,6 @@ import { | |||||||
|   XRefEntryException, |   XRefEntryException, | ||||||
| } from "./core_utils.js"; | } from "./core_utils.js"; | ||||||
| import { | import { | ||||||
|   createPromiseCapability, |  | ||||||
|   createValidAbsoluteUrl, |   createValidAbsoluteUrl, | ||||||
|   DocumentActionEventType, |   DocumentActionEventType, | ||||||
|   FormatError, |   FormatError, | ||||||
| @ -1091,8 +1090,7 @@ class Catalog { | |||||||
|     }); |     }); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   getPageDict(pageIndex) { |   async getPageDict(pageIndex) { | ||||||
|     const capability = createPromiseCapability(); |  | ||||||
|     const nodesToVisit = [this.toplevelPagesDict]; |     const nodesToVisit = [this.toplevelPagesDict]; | ||||||
|     const visitedNodes = new RefSet(); |     const visitedNodes = new RefSet(); | ||||||
| 
 | 
 | ||||||
| @ -1104,130 +1102,105 @@ class Catalog { | |||||||
|       pageKidsCountCache = this.pageKidsCountCache; |       pageKidsCountCache = this.pageKidsCountCache; | ||||||
|     let currentPageIndex = 0; |     let currentPageIndex = 0; | ||||||
| 
 | 
 | ||||||
|     function next() { |     while (nodesToVisit.length) { | ||||||
|       while (nodesToVisit.length) { |       const currentNode = nodesToVisit.pop(); | ||||||
|         const currentNode = nodesToVisit.pop(); |  | ||||||
| 
 | 
 | ||||||
|         if (currentNode instanceof Ref) { |       if (currentNode instanceof Ref) { | ||||||
|           const count = pageKidsCountCache.get(currentNode); |         const count = pageKidsCountCache.get(currentNode); | ||||||
|           // Skip nodes where the page can't be.
 |         // Skip nodes where the page can't be.
 | ||||||
|           if (count >= 0 && currentPageIndex + count <= pageIndex) { |         if (count >= 0 && currentPageIndex + count <= pageIndex) { | ||||||
|             currentPageIndex += count; |           currentPageIndex += count; | ||||||
|             continue; |           continue; | ||||||
|  |         } | ||||||
|  |         // Prevent circular references in the /Pages tree.
 | ||||||
|  |         if (visitedNodes.has(currentNode)) { | ||||||
|  |           throw new FormatError("Pages tree contains circular reference."); | ||||||
|  |         } | ||||||
|  |         visitedNodes.put(currentNode); | ||||||
|  | 
 | ||||||
|  |         const obj = await xref.fetchAsync(currentNode); | ||||||
|  |         if (obj instanceof Dict) { | ||||||
|  |           let type = obj.getRaw("Type"); | ||||||
|  |           if (type instanceof Ref) { | ||||||
|  |             type = await xref.fetchAsync(type); | ||||||
|           } |           } | ||||||
|           // Prevent circular references in the /Pages tree.
 |           if (isName(type, "Page") || !obj.has("Kids")) { | ||||||
|           if (visitedNodes.has(currentNode)) { |             // Cache the Page reference, since it can *greatly* improve
 | ||||||
|             capability.reject( |             // performance by reducing redundant lookups in long documents
 | ||||||
|               new FormatError("Pages tree contains circular reference.") |             // where all nodes are found at *one* level of the tree.
 | ||||||
|             ); |             if (currentNode && !pageKidsCountCache.has(currentNode)) { | ||||||
|             return; |               pageKidsCountCache.put(currentNode, 1); | ||||||
|           } |  | ||||||
|           visitedNodes.put(currentNode); |  | ||||||
| 
 |  | ||||||
|           xref.fetchAsync(currentNode).then(function (obj) { |  | ||||||
|             if (isDict(obj, "Page") || (isDict(obj) && !obj.has("Kids"))) { |  | ||||||
|               // Cache the Page reference, since it can *greatly* improve
 |  | ||||||
|               // performance by reducing redundant lookups in long documents
 |  | ||||||
|               // where all nodes are found at *one* level of the tree.
 |  | ||||||
|               if (currentNode && !pageKidsCountCache.has(currentNode)) { |  | ||||||
|                 pageKidsCountCache.put(currentNode, 1); |  | ||||||
|               } |  | ||||||
| 
 |  | ||||||
|               if (pageIndex === currentPageIndex) { |  | ||||||
|                 capability.resolve([obj, currentNode]); |  | ||||||
|               } else { |  | ||||||
|                 currentPageIndex++; |  | ||||||
|                 next(); |  | ||||||
|               } |  | ||||||
|               return; |  | ||||||
|             } |             } | ||||||
|             nodesToVisit.push(obj); |  | ||||||
|             next(); |  | ||||||
|           }, capability.reject); |  | ||||||
|           return; |  | ||||||
|         } |  | ||||||
| 
 | 
 | ||||||
|         // Must be a child page dictionary.
 |  | ||||||
|         if (!(currentNode instanceof Dict)) { |  | ||||||
|           capability.reject( |  | ||||||
|             new FormatError( |  | ||||||
|               "Page dictionary kid reference points to wrong type of object." |  | ||||||
|             ) |  | ||||||
|           ); |  | ||||||
|           return; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         let count; |  | ||||||
|         try { |  | ||||||
|           count = currentNode.get("Count"); |  | ||||||
|         } catch (ex) { |  | ||||||
|           if (ex instanceof MissingDataException) { |  | ||||||
|             throw ex; |  | ||||||
|           } |  | ||||||
|         } |  | ||||||
|         if (Number.isInteger(count) && count >= 0) { |  | ||||||
|           // Cache the Kids count, since it can reduce redundant lookups in
 |  | ||||||
|           // documents where all nodes are found at *one* level of the tree.
 |  | ||||||
|           const objId = currentNode.objId; |  | ||||||
|           if (objId && !pageKidsCountCache.has(objId)) { |  | ||||||
|             pageKidsCountCache.put(objId, count); |  | ||||||
|           } |  | ||||||
|           // Skip nodes where the page can't be.
 |  | ||||||
|           if (currentPageIndex + count <= pageIndex) { |  | ||||||
|             currentPageIndex += count; |  | ||||||
|             continue; |  | ||||||
|           } |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         let kids; |  | ||||||
|         try { |  | ||||||
|           kids = currentNode.get("Kids"); |  | ||||||
|         } catch (ex) { |  | ||||||
|           if (ex instanceof MissingDataException) { |  | ||||||
|             throw ex; |  | ||||||
|           } |  | ||||||
|         } |  | ||||||
|         if (!Array.isArray(kids)) { |  | ||||||
|           // Prevent errors in corrupt PDF documents that violate the
 |  | ||||||
|           // specification by *inlining* Page dicts directly in the Kids
 |  | ||||||
|           // array, rather than using indirect objects (fixes issue9540.pdf).
 |  | ||||||
|           let type; |  | ||||||
|           try { |  | ||||||
|             type = currentNode.get("Type"); |  | ||||||
|           } catch (ex) { |  | ||||||
|             if (ex instanceof MissingDataException) { |  | ||||||
|               throw ex; |  | ||||||
|             } |  | ||||||
|           } |  | ||||||
|           if ( |  | ||||||
|             isName(type, "Page") || |  | ||||||
|             (!currentNode.has("Type") && currentNode.has("Contents")) |  | ||||||
|           ) { |  | ||||||
|             if (currentPageIndex === pageIndex) { |             if (currentPageIndex === pageIndex) { | ||||||
|               capability.resolve([currentNode, null]); |               return [obj, currentNode]; | ||||||
|               return; |  | ||||||
|             } |             } | ||||||
|             currentPageIndex++; |             currentPageIndex++; | ||||||
|             continue; |             continue; | ||||||
|           } |           } | ||||||
|  |         } | ||||||
|  |         nodesToVisit.push(obj); | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
| 
 | 
 | ||||||
|           capability.reject( |       // Must be a child page dictionary.
 | ||||||
|             new FormatError("Page dictionary kids object is not an array.") |       if (!(currentNode instanceof Dict)) { | ||||||
|           ); |         throw new FormatError( | ||||||
|           return; |           "Page dictionary kid reference points to wrong type of object." | ||||||
|  |         ); | ||||||
|  |       } | ||||||
|  |       const { objId } = currentNode; | ||||||
|  | 
 | ||||||
|  |       let count = currentNode.getRaw("Count"); | ||||||
|  |       if (count instanceof Ref) { | ||||||
|  |         count = await xref.fetchAsync(count); | ||||||
|  |       } | ||||||
|  |       if (Number.isInteger(count) && count >= 0) { | ||||||
|  |         // Cache the Kids count, since it can reduce redundant lookups in
 | ||||||
|  |         // documents where all nodes are found at *one* level of the tree.
 | ||||||
|  |         if (objId && !pageKidsCountCache.has(objId)) { | ||||||
|  |           pageKidsCountCache.put(objId, count); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Always check all `Kids` nodes, to avoid getting stuck in an empty
 |         // Skip nodes where the page can't be.
 | ||||||
|         // node further down in the tree (see issue5644.pdf, issue8088.pdf),
 |         if (currentPageIndex + count <= pageIndex) { | ||||||
|         // and to ensure that we actually find the correct `Page` dict.
 |           currentPageIndex += count; | ||||||
|         for (let last = kids.length - 1; last >= 0; last--) { |           continue; | ||||||
|           nodesToVisit.push(kids[last]); |  | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
|       capability.reject(new Error(`Page index ${pageIndex} not found.`)); | 
 | ||||||
|  |       let kids = currentNode.getRaw("Kids"); | ||||||
|  |       if (kids instanceof Ref) { | ||||||
|  |         kids = await xref.fetchAsync(kids); | ||||||
|  |       } | ||||||
|  |       if (!Array.isArray(kids)) { | ||||||
|  |         // Prevent errors in corrupt PDF documents that violate the
 | ||||||
|  |         // specification by *inlining* Page dicts directly in the Kids
 | ||||||
|  |         // array, rather than using indirect objects (fixes issue9540.pdf).
 | ||||||
|  |         let type = currentNode.getRaw("Type"); | ||||||
|  |         if (type instanceof Ref) { | ||||||
|  |           type = await xref.fetchAsync(type); | ||||||
|  |         } | ||||||
|  |         if (isName(type, "Page") || !currentNode.has("Kids")) { | ||||||
|  |           if (currentPageIndex === pageIndex) { | ||||||
|  |             return [currentNode, null]; | ||||||
|  |           } | ||||||
|  |           currentPageIndex++; | ||||||
|  |           continue; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         throw new FormatError("Page dictionary kids object is not an array."); | ||||||
|  |       } | ||||||
|  | 
 | ||||||
|  |       // Always check all `Kids` nodes, to avoid getting stuck in an empty
 | ||||||
|  |       // node further down in the tree (see issue5644.pdf, issue8088.pdf),
 | ||||||
|  |       // and to ensure that we actually find the correct `Page` dict.
 | ||||||
|  |       for (let last = kids.length - 1; last >= 0; last--) { | ||||||
|  |         nodesToVisit.push(kids[last]); | ||||||
|  |       } | ||||||
|     } |     } | ||||||
|     next(); | 
 | ||||||
|     return capability.promise; |     throw new Error(`Page index ${pageIndex} not found.`); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   /** |   /** | ||||||
| @ -1319,7 +1292,20 @@ class Catalog { | |||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
| 
 | 
 | ||||||
|       if (isDict(obj, "Page") || !obj.has("Kids")) { |       let type; | ||||||
|  |       try { | ||||||
|  |         type = obj.get("Type"); | ||||||
|  |       } catch (ex) { | ||||||
|  |         if (ex instanceof MissingDataException) { | ||||||
|  |           throw ex; | ||||||
|  |         } | ||||||
|  |         if (ex instanceof XRefEntryException && !recoveryMode) { | ||||||
|  |           throw ex; | ||||||
|  |         } | ||||||
|  |         addPageError(ex); | ||||||
|  |         break; | ||||||
|  |       } | ||||||
|  |       if (isName(type, "Page") || !obj.has("Kids")) { | ||||||
|         addPageDict(obj, kidObj instanceof Ref ? kidObj : null); |         addPageDict(obj, kidObj instanceof Ref ? kidObj : null); | ||||||
|       } else { |       } else { | ||||||
|         queue.push({ currentNode: obj, posInKids: 0 }); |         queue.push({ currentNode: obj, posInKids: 0 }); | ||||||
|  | |||||||
| @ -622,9 +622,7 @@ describe("api", function () { | |||||||
|         expect(false).toEqual(true); |         expect(false).toEqual(true); | ||||||
|       } catch (reason) { |       } catch (reason) { | ||||||
|         expect(reason instanceof UnknownErrorException).toEqual(true); |         expect(reason instanceof UnknownErrorException).toEqual(true); | ||||||
|         expect(reason.message).toEqual( |         expect(reason.message).toEqual("Illegal character: 41"); | ||||||
|           "Page dictionary kids object is not an array." |  | ||||||
|         ); |  | ||||||
|       } |       } | ||||||
|       try { |       try { | ||||||
|         await pdfDocument2.getPage(1); |         await pdfDocument2.getPage(1); | ||||||
| @ -633,9 +631,7 @@ describe("api", function () { | |||||||
|         expect(false).toEqual(true); |         expect(false).toEqual(true); | ||||||
|       } catch (reason) { |       } catch (reason) { | ||||||
|         expect(reason instanceof UnknownErrorException).toEqual(true); |         expect(reason instanceof UnknownErrorException).toEqual(true); | ||||||
|         expect(reason.message).toEqual( |         expect(reason.message).toEqual("End of file inside array."); | ||||||
|           "Page dictionary kids object is not an array." |  | ||||||
|         ); |  | ||||||
|       } |       } | ||||||
| 
 | 
 | ||||||
|       await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); |       await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); | ||||||
|  | |||||||
| @ -76,7 +76,7 @@ const ENABLE_PERMISSIONS_CLASS = "enablePermissions"; | |||||||
| const PagesCountLimit = { | const PagesCountLimit = { | ||||||
|   FORCE_SCROLL_MODE_PAGE: 15000, |   FORCE_SCROLL_MODE_PAGE: 15000, | ||||||
|   FORCE_LAZY_PAGE_INIT: 7500, |   FORCE_LAZY_PAGE_INIT: 7500, | ||||||
|   PAUSE_EAGER_PAGE_INIT: 500, |   PAUSE_EAGER_PAGE_INIT: 250, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /** | /** | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user