Merge pull request #14400 from Snuffleupagus/getPageDict-async
[api-minor] Convert `Catalog.getPageDict` to an asynchronous method
This commit is contained in:
		
						commit
						e42d54e1b5
					
				| @ -34,7 +34,6 @@ import { | ||||
|   XRefEntryException, | ||||
| } from "./core_utils.js"; | ||||
| import { | ||||
|   createPromiseCapability, | ||||
|   createValidAbsoluteUrl, | ||||
|   DocumentActionEventType, | ||||
|   FormatError, | ||||
| @ -1091,8 +1090,7 @@ class Catalog { | ||||
|     }); | ||||
|   } | ||||
| 
 | ||||
|   getPageDict(pageIndex) { | ||||
|     const capability = createPromiseCapability(); | ||||
|   async getPageDict(pageIndex) { | ||||
|     const nodesToVisit = [this.toplevelPagesDict]; | ||||
|     const visitedNodes = new RefSet(); | ||||
| 
 | ||||
| @ -1104,130 +1102,105 @@ class Catalog { | ||||
|       pageKidsCountCache = this.pageKidsCountCache; | ||||
|     let currentPageIndex = 0; | ||||
| 
 | ||||
|     function next() { | ||||
|       while (nodesToVisit.length) { | ||||
|         const currentNode = nodesToVisit.pop(); | ||||
|     while (nodesToVisit.length) { | ||||
|       const currentNode = nodesToVisit.pop(); | ||||
| 
 | ||||
|         if (currentNode instanceof Ref) { | ||||
|           const count = pageKidsCountCache.get(currentNode); | ||||
|           // Skip nodes where the page can't be.
 | ||||
|           if (count >= 0 && currentPageIndex + count <= pageIndex) { | ||||
|             currentPageIndex += count; | ||||
|             continue; | ||||
|       if (currentNode instanceof Ref) { | ||||
|         const count = pageKidsCountCache.get(currentNode); | ||||
|         // Skip nodes where the page can't be.
 | ||||
|         if (count >= 0 && currentPageIndex + count <= pageIndex) { | ||||
|           currentPageIndex += count; | ||||
|           continue; | ||||
|         } | ||||
|         // Prevent circular references in the /Pages tree.
 | ||||
|         if (visitedNodes.has(currentNode)) { | ||||
|           throw new FormatError("Pages tree contains circular reference."); | ||||
|         } | ||||
|         visitedNodes.put(currentNode); | ||||
| 
 | ||||
|         const obj = await xref.fetchAsync(currentNode); | ||||
|         if (obj instanceof Dict) { | ||||
|           let type = obj.getRaw("Type"); | ||||
|           if (type instanceof Ref) { | ||||
|             type = await xref.fetchAsync(type); | ||||
|           } | ||||
|           // Prevent circular references in the /Pages tree.
 | ||||
|           if (visitedNodes.has(currentNode)) { | ||||
|             capability.reject( | ||||
|               new FormatError("Pages tree contains circular reference.") | ||||
|             ); | ||||
|             return; | ||||
|           } | ||||
|           visitedNodes.put(currentNode); | ||||
| 
 | ||||
|           xref.fetchAsync(currentNode).then(function (obj) { | ||||
|             if (isDict(obj, "Page") || (isDict(obj) && !obj.has("Kids"))) { | ||||
|               // Cache the Page reference, since it can *greatly* improve
 | ||||
|               // performance by reducing redundant lookups in long documents
 | ||||
|               // where all nodes are found at *one* level of the tree.
 | ||||
|               if (currentNode && !pageKidsCountCache.has(currentNode)) { | ||||
|                 pageKidsCountCache.put(currentNode, 1); | ||||
|               } | ||||
| 
 | ||||
|               if (pageIndex === currentPageIndex) { | ||||
|                 capability.resolve([obj, currentNode]); | ||||
|               } else { | ||||
|                 currentPageIndex++; | ||||
|                 next(); | ||||
|               } | ||||
|               return; | ||||
|           if (isName(type, "Page") || !obj.has("Kids")) { | ||||
|             // Cache the Page reference, since it can *greatly* improve
 | ||||
|             // performance by reducing redundant lookups in long documents
 | ||||
|             // where all nodes are found at *one* level of the tree.
 | ||||
|             if (currentNode && !pageKidsCountCache.has(currentNode)) { | ||||
|               pageKidsCountCache.put(currentNode, 1); | ||||
|             } | ||||
|             nodesToVisit.push(obj); | ||||
|             next(); | ||||
|           }, capability.reject); | ||||
|           return; | ||||
|         } | ||||
| 
 | ||||
|         // Must be a child page dictionary.
 | ||||
|         if (!(currentNode instanceof Dict)) { | ||||
|           capability.reject( | ||||
|             new FormatError( | ||||
|               "Page dictionary kid reference points to wrong type of object." | ||||
|             ) | ||||
|           ); | ||||
|           return; | ||||
|         } | ||||
| 
 | ||||
|         let count; | ||||
|         try { | ||||
|           count = currentNode.get("Count"); | ||||
|         } catch (ex) { | ||||
|           if (ex instanceof MissingDataException) { | ||||
|             throw ex; | ||||
|           } | ||||
|         } | ||||
|         if (Number.isInteger(count) && count >= 0) { | ||||
|           // Cache the Kids count, since it can reduce redundant lookups in
 | ||||
|           // documents where all nodes are found at *one* level of the tree.
 | ||||
|           const objId = currentNode.objId; | ||||
|           if (objId && !pageKidsCountCache.has(objId)) { | ||||
|             pageKidsCountCache.put(objId, count); | ||||
|           } | ||||
|           // Skip nodes where the page can't be.
 | ||||
|           if (currentPageIndex + count <= pageIndex) { | ||||
|             currentPageIndex += count; | ||||
|             continue; | ||||
|           } | ||||
|         } | ||||
| 
 | ||||
|         let kids; | ||||
|         try { | ||||
|           kids = currentNode.get("Kids"); | ||||
|         } catch (ex) { | ||||
|           if (ex instanceof MissingDataException) { | ||||
|             throw ex; | ||||
|           } | ||||
|         } | ||||
|         if (!Array.isArray(kids)) { | ||||
|           // Prevent errors in corrupt PDF documents that violate the
 | ||||
|           // specification by *inlining* Page dicts directly in the Kids
 | ||||
|           // array, rather than using indirect objects (fixes issue9540.pdf).
 | ||||
|           let type; | ||||
|           try { | ||||
|             type = currentNode.get("Type"); | ||||
|           } catch (ex) { | ||||
|             if (ex instanceof MissingDataException) { | ||||
|               throw ex; | ||||
|             } | ||||
|           } | ||||
|           if ( | ||||
|             isName(type, "Page") || | ||||
|             (!currentNode.has("Type") && currentNode.has("Contents")) | ||||
|           ) { | ||||
|             if (currentPageIndex === pageIndex) { | ||||
|               capability.resolve([currentNode, null]); | ||||
|               return; | ||||
|               return [obj, currentNode]; | ||||
|             } | ||||
|             currentPageIndex++; | ||||
|             continue; | ||||
|           } | ||||
|         } | ||||
|         nodesToVisit.push(obj); | ||||
|         continue; | ||||
|       } | ||||
| 
 | ||||
|           capability.reject( | ||||
|             new FormatError("Page dictionary kids object is not an array.") | ||||
|           ); | ||||
|           return; | ||||
|       // Must be a child page dictionary.
 | ||||
|       if (!(currentNode instanceof Dict)) { | ||||
|         throw new FormatError( | ||||
|           "Page dictionary kid reference points to wrong type of object." | ||||
|         ); | ||||
|       } | ||||
|       const { objId } = currentNode; | ||||
| 
 | ||||
|       let count = currentNode.getRaw("Count"); | ||||
|       if (count instanceof Ref) { | ||||
|         count = await xref.fetchAsync(count); | ||||
|       } | ||||
|       if (Number.isInteger(count) && count >= 0) { | ||||
|         // Cache the Kids count, since it can reduce redundant lookups in
 | ||||
|         // documents where all nodes are found at *one* level of the tree.
 | ||||
|         if (objId && !pageKidsCountCache.has(objId)) { | ||||
|           pageKidsCountCache.put(objId, count); | ||||
|         } | ||||
| 
 | ||||
|         // Always check all `Kids` nodes, to avoid getting stuck in an empty
 | ||||
|         // node further down in the tree (see issue5644.pdf, issue8088.pdf),
 | ||||
|         // and to ensure that we actually find the correct `Page` dict.
 | ||||
|         for (let last = kids.length - 1; last >= 0; last--) { | ||||
|           nodesToVisit.push(kids[last]); | ||||
|         // Skip nodes where the page can't be.
 | ||||
|         if (currentPageIndex + count <= pageIndex) { | ||||
|           currentPageIndex += count; | ||||
|           continue; | ||||
|         } | ||||
|       } | ||||
|       capability.reject(new Error(`Page index ${pageIndex} not found.`)); | ||||
| 
 | ||||
|       let kids = currentNode.getRaw("Kids"); | ||||
|       if (kids instanceof Ref) { | ||||
|         kids = await xref.fetchAsync(kids); | ||||
|       } | ||||
|       if (!Array.isArray(kids)) { | ||||
|         // Prevent errors in corrupt PDF documents that violate the
 | ||||
|         // specification by *inlining* Page dicts directly in the Kids
 | ||||
|         // array, rather than using indirect objects (fixes issue9540.pdf).
 | ||||
|         let type = currentNode.getRaw("Type"); | ||||
|         if (type instanceof Ref) { | ||||
|           type = await xref.fetchAsync(type); | ||||
|         } | ||||
|         if (isName(type, "Page") || !currentNode.has("Kids")) { | ||||
|           if (currentPageIndex === pageIndex) { | ||||
|             return [currentNode, null]; | ||||
|           } | ||||
|           currentPageIndex++; | ||||
|           continue; | ||||
|         } | ||||
| 
 | ||||
|         throw new FormatError("Page dictionary kids object is not an array."); | ||||
|       } | ||||
| 
 | ||||
|       // Always check all `Kids` nodes, to avoid getting stuck in an empty
 | ||||
|       // node further down in the tree (see issue5644.pdf, issue8088.pdf),
 | ||||
|       // and to ensure that we actually find the correct `Page` dict.
 | ||||
|       for (let last = kids.length - 1; last >= 0; last--) { | ||||
|         nodesToVisit.push(kids[last]); | ||||
|       } | ||||
|     } | ||||
|     next(); | ||||
|     return capability.promise; | ||||
| 
 | ||||
|     throw new Error(`Page index ${pageIndex} not found.`); | ||||
|   } | ||||
| 
 | ||||
|   /** | ||||
| @ -1319,7 +1292,20 @@ class Catalog { | ||||
|         break; | ||||
|       } | ||||
| 
 | ||||
|       if (isDict(obj, "Page") || !obj.has("Kids")) { | ||||
|       let type; | ||||
|       try { | ||||
|         type = obj.get("Type"); | ||||
|       } catch (ex) { | ||||
|         if (ex instanceof MissingDataException) { | ||||
|           throw ex; | ||||
|         } | ||||
|         if (ex instanceof XRefEntryException && !recoveryMode) { | ||||
|           throw ex; | ||||
|         } | ||||
|         addPageError(ex); | ||||
|         break; | ||||
|       } | ||||
|       if (isName(type, "Page") || !obj.has("Kids")) { | ||||
|         addPageDict(obj, kidObj instanceof Ref ? kidObj : null); | ||||
|       } else { | ||||
|         queue.push({ currentNode: obj, posInKids: 0 }); | ||||
|  | ||||
| @ -622,9 +622,7 @@ describe("api", function () { | ||||
|         expect(false).toEqual(true); | ||||
|       } catch (reason) { | ||||
|         expect(reason instanceof UnknownErrorException).toEqual(true); | ||||
|         expect(reason.message).toEqual( | ||||
|           "Page dictionary kids object is not an array." | ||||
|         ); | ||||
|         expect(reason.message).toEqual("Illegal character: 41"); | ||||
|       } | ||||
|       try { | ||||
|         await pdfDocument2.getPage(1); | ||||
| @ -633,9 +631,7 @@ describe("api", function () { | ||||
|         expect(false).toEqual(true); | ||||
|       } catch (reason) { | ||||
|         expect(reason instanceof UnknownErrorException).toEqual(true); | ||||
|         expect(reason.message).toEqual( | ||||
|           "Page dictionary kids object is not an array." | ||||
|         ); | ||||
|         expect(reason.message).toEqual("End of file inside array."); | ||||
|       } | ||||
| 
 | ||||
|       await Promise.all([loadingTask1.destroy(), loadingTask2.destroy()]); | ||||
|  | ||||
| @ -76,7 +76,7 @@ const ENABLE_PERMISSIONS_CLASS = "enablePermissions"; | ||||
| const PagesCountLimit = { | ||||
|   FORCE_SCROLL_MODE_PAGE: 15000, | ||||
|   FORCE_LAZY_PAGE_INIT: 7500, | ||||
|   PAUSE_EAGER_PAGE_INIT: 500, | ||||
|   PAUSE_EAGER_PAGE_INIT: 250, | ||||
| }; | ||||
| 
 | ||||
| /** | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user