Add support for the /Catalog Base-URI when resolving URLs (issue 14802)
As far as I can tell, this is actually the very first time that we've seen a PDF document with a Base-URI specified in the /Catalog; please refer to the specification: https://web.archive.org/web/20220309040754if_/https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#G11.2097122 To simplify the overall implementation, this new parameter is accessed via the existing `BasePdfManager.docBaseUrl`-getter and will thus override any user-specified `docBaseUrl` API-parameter.
This commit is contained in:
		
							parent
							
								
									32ae0e4867
								
							
						
					
					
						commit
						5bc7339c1b
					
				| @ -72,9 +72,12 @@ class AnnotationFactory { | |||||||
|   static create(xref, ref, pdfManager, idFactory, collectFields) { |   static create(xref, ref, pdfManager, idFactory, collectFields) { | ||||||
|     return Promise.all([ |     return Promise.all([ | ||||||
|       pdfManager.ensureCatalog("acroForm"), |       pdfManager.ensureCatalog("acroForm"), | ||||||
|  |       // Only necessary to prevent the `pdfManager.docBaseUrl`-getter, used
 | ||||||
|  |       // with certain Annotations, from throwing and thus breaking parsing:
 | ||||||
|  |       pdfManager.ensureCatalog("baseUrl"), | ||||||
|       pdfManager.ensureDoc("xfaDatasets"), |       pdfManager.ensureDoc("xfaDatasets"), | ||||||
|       collectFields ? this._getPageIndex(xref, ref, pdfManager) : -1, |       collectFields ? this._getPageIndex(xref, ref, pdfManager) : -1, | ||||||
|     ]).then(([acroForm, xfaDatasets, pageIndex]) => |     ]).then(([acroForm, baseUrl, xfaDatasets, pageIndex]) => | ||||||
|       pdfManager.ensure(this, "_create", [ |       pdfManager.ensure(this, "_create", [ | ||||||
|         xref, |         xref, | ||||||
|         ref, |         ref, | ||||||
|  | |||||||
| @ -1387,6 +1387,22 @@ class Catalog { | |||||||
|     return next(pageRef); |     return next(pageRef); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|  |   get baseUrl() { | ||||||
|  |     const uri = this._catDict.get("URI"); | ||||||
|  |     if (uri instanceof Dict) { | ||||||
|  |       const base = uri.get("Base"); | ||||||
|  |       if (typeof base === "string") { | ||||||
|  |         const absoluteUrl = createValidAbsoluteUrl(base, null, { | ||||||
|  |           tryConvertEncoding: true, | ||||||
|  |         }); | ||||||
|  |         if (absoluteUrl) { | ||||||
|  |           return shadow(this, "baseUrl", absoluteUrl.href); | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     return shadow(this, "baseUrl", null); | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|   /** |   /** | ||||||
|    * @typedef {Object} ParseDestDictionaryParameters |    * @typedef {Object} ParseDestDictionaryParameters | ||||||
|    * @property {Dict} destDict - The dictionary containing the destination. |    * @property {Dict} destDict - The dictionary containing the destination. | ||||||
| @ -1464,8 +1480,6 @@ class Catalog { | |||||||
|             // Some bad PDFs do not put parentheses around relative URLs.
 |             // Some bad PDFs do not put parentheses around relative URLs.
 | ||||||
|             url = "/" + url.name; |             url = "/" + url.name; | ||||||
|           } |           } | ||||||
|           // TODO: pdf spec mentions urls can be relative to a Base
 |  | ||||||
|           // entry in the dictionary.
 |  | ||||||
|           break; |           break; | ||||||
| 
 | 
 | ||||||
|         case "GoTo": |         case "GoTo": | ||||||
|  | |||||||
| @ -13,7 +13,12 @@ | |||||||
|  * limitations under the License. |  * limitations under the License. | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
| import { createValidAbsoluteUrl, unreachable, warn } from "../shared/util.js"; | import { | ||||||
|  |   createValidAbsoluteUrl, | ||||||
|  |   shadow, | ||||||
|  |   unreachable, | ||||||
|  |   warn, | ||||||
|  | } from "../shared/util.js"; | ||||||
| import { ChunkedStreamManager } from "./chunked_stream.js"; | import { ChunkedStreamManager } from "./chunked_stream.js"; | ||||||
| import { MissingDataException } from "./core_utils.js"; | import { MissingDataException } from "./core_utils.js"; | ||||||
| import { PDFDocument } from "./document.js"; | import { PDFDocument } from "./document.js"; | ||||||
| @ -46,7 +51,8 @@ class BasePdfManager { | |||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   get docBaseUrl() { |   get docBaseUrl() { | ||||||
|     return this._docBaseUrl; |     const catalog = this.pdfDocument.catalog; | ||||||
|  |     return shadow(this, "docBaseUrl", catalog.baseUrl || this._docBaseUrl); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   onLoadedStream() { |   onLoadedStream() { | ||||||
|  | |||||||
							
								
								
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -238,6 +238,7 @@ | |||||||
| !pdfjsbad1586.pdf | !pdfjsbad1586.pdf | ||||||
| !standard_fonts.pdf | !standard_fonts.pdf | ||||||
| !freeculture.pdf | !freeculture.pdf | ||||||
|  | !issue14802.pdf | ||||||
| !issue6006.pdf | !issue6006.pdf | ||||||
| !pdfkit_compressed.pdf | !pdfkit_compressed.pdf | ||||||
| !TAMReview.pdf | !TAMReview.pdf | ||||||
|  | |||||||
							
								
								
									
										89
									
								
								test/pdfs/issue14802.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								test/pdfs/issue14802.pdf
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,89 @@ | |||||||
|  | %PDF-1.7 | ||||||
|  | %âãÏÓ | ||||||
|  | 1 0 obj  | ||||||
|  | << | ||||||
|  | /Pages 2 0 R | ||||||
|  | /URI  | ||||||
|  | << | ||||||
|  | /Base (http://example.com/) | ||||||
|  | >> | ||||||
|  | /Type /Catalog | ||||||
|  | >> | ||||||
|  | endobj  | ||||||
|  | 2 0 obj  | ||||||
|  | << | ||||||
|  | /Kids [3 0 R] | ||||||
|  | /Type /Pages | ||||||
|  | /Count 1 | ||||||
|  | >> | ||||||
|  | endobj  | ||||||
|  | 3 0 obj  | ||||||
|  | << | ||||||
|  | /Parent 2 0 R | ||||||
|  | /Annots [4 0 R] | ||||||
|  | /Resources  | ||||||
|  | << | ||||||
|  | /Font  | ||||||
|  | << | ||||||
|  | /F1 5 0 R | ||||||
|  | >> | ||||||
|  | >> | ||||||
|  | /MediaBox [0 0 260 50] | ||||||
|  | /Type /Page | ||||||
|  | /Contents 6 0 R | ||||||
|  | >> | ||||||
|  | endobj  | ||||||
|  | 4 0 obj  | ||||||
|  | << | ||||||
|  | /Border [0 0 1] | ||||||
|  | /Subtype /Link | ||||||
|  | /C [0 0 1] | ||||||
|  | /A  | ||||||
|  | << | ||||||
|  | /URI (./relative_link.txt) | ||||||
|  | /Type /Action | ||||||
|  | /S /URI | ||||||
|  | >> | ||||||
|  | /Type /Annot | ||||||
|  | /Rect [5 10 250 40] | ||||||
|  | >> | ||||||
|  | endobj  | ||||||
|  | 5 0 obj  | ||||||
|  | << | ||||||
|  | /BaseFont /Times-Roman | ||||||
|  | /Subtype /Type1 | ||||||
|  | /Type /Font | ||||||
|  | /Encoding /WinAnsiEncoding | ||||||
|  | >> | ||||||
|  | endobj  | ||||||
|  | 6 0 obj  | ||||||
|  | << | ||||||
|  | /Length 81 | ||||||
|  | >> | ||||||
|  | stream | ||||||
|  | 1 0 0 rg | ||||||
|  | BT | ||||||
|  | 10 20 TD | ||||||
|  | /F1 14 Tf | ||||||
|  | (A relative link, with a /Catalog Base-URI) Tj | ||||||
|  | ET | ||||||
|  | 
 | ||||||
|  | endstream  | ||||||
|  | endobj xref | ||||||
|  | 0 7 | ||||||
|  | 0000000000 65535 f  | ||||||
|  | 0000000015 00000 n  | ||||||
|  | 0000000106 00000 n  | ||||||
|  | 0000000165 00000 n  | ||||||
|  | 0000000310 00000 n  | ||||||
|  | 0000000467 00000 n  | ||||||
|  | 0000000568 00000 n  | ||||||
|  | trailer | ||||||
|  | 
 | ||||||
|  | << | ||||||
|  | /Root 1 0 R | ||||||
|  | /Size 7 | ||||||
|  | >> | ||||||
|  | startxref | ||||||
|  | 701 | ||||||
|  | %%EOF | ||||||
| @ -2950,6 +2950,14 @@ | |||||||
|       "link": true, |       "link": true, | ||||||
|       "type": "eq" |       "type": "eq" | ||||||
|     }, |     }, | ||||||
|  |     {  "id": "issue14802", | ||||||
|  |        "file": "pdfs/issue14802.pdf", | ||||||
|  |        "md5": "c1e774945fee539c7fcfec00b36dd4e6", | ||||||
|  |        "rounds": 1, | ||||||
|  |        "type": "eq", | ||||||
|  |        "annotations": true, | ||||||
|  |        "about": "LinkAnnotation with a relative link, and a /Catalog Base-URI." | ||||||
|  |     }, | ||||||
|     {  "id": "issue1127-text", |     {  "id": "issue1127-text", | ||||||
|        "file": "pdfs/issue1127.pdf", |        "file": "pdfs/issue1127.pdf", | ||||||
|        "md5": "4fb2be5ffefeafda4ba977de2a1bb4d8", |        "md5": "4fb2be5ffefeafda4ba977de2a1bb4d8", | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user