Add support for the /Catalog Base-URI when resolving URLs (issue 14802)

As far as I can tell, this is actually the very first time that we've seen a PDF document with a Base-URI specified in the /Catalog; please refer to the specification:
https://web.archive.org/web/20220309040754if_/https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#G11.2097122

To simplify the overall implementation, this new parameter is accessed via the existing `BasePdfManager.docBaseUrl`-getter and will thus override any user-specified `docBaseUrl` API-parameter.
This commit is contained in:
Jonas Jenwald 2022-04-19 16:53:44 +02:00
parent 32ae0e4867
commit 5bc7339c1b
6 changed files with 126 additions and 5 deletions

View File

@ -72,9 +72,12 @@ class AnnotationFactory {
static create(xref, ref, pdfManager, idFactory, collectFields) {
return Promise.all([
pdfManager.ensureCatalog("acroForm"),
// Only necessary to prevent the `pdfManager.docBaseUrl`-getter, used
// with certain Annotations, from throwing and thus breaking parsing:
pdfManager.ensureCatalog("baseUrl"),
pdfManager.ensureDoc("xfaDatasets"),
collectFields ? this._getPageIndex(xref, ref, pdfManager) : -1,
]).then(([acroForm, xfaDatasets, pageIndex]) =>
]).then(([acroForm, baseUrl, xfaDatasets, pageIndex]) =>
pdfManager.ensure(this, "_create", [
xref,
ref,

View File

@ -1387,6 +1387,22 @@ class Catalog {
return next(pageRef);
}
get baseUrl() {
const uri = this._catDict.get("URI");
if (uri instanceof Dict) {
const base = uri.get("Base");
if (typeof base === "string") {
const absoluteUrl = createValidAbsoluteUrl(base, null, {
tryConvertEncoding: true,
});
if (absoluteUrl) {
return shadow(this, "baseUrl", absoluteUrl.href);
}
}
}
return shadow(this, "baseUrl", null);
}
/**
* @typedef {Object} ParseDestDictionaryParameters
* @property {Dict} destDict - The dictionary containing the destination.
@ -1464,8 +1480,6 @@ class Catalog {
// Some bad PDFs do not put parentheses around relative URLs.
url = "/" + url.name;
}
// TODO: pdf spec mentions urls can be relative to a Base
// entry in the dictionary.
break;
case "GoTo":

View File

@ -13,7 +13,12 @@
* limitations under the License.
*/
import { createValidAbsoluteUrl, unreachable, warn } from "../shared/util.js";
import {
createValidAbsoluteUrl,
shadow,
unreachable,
warn,
} from "../shared/util.js";
import { ChunkedStreamManager } from "./chunked_stream.js";
import { MissingDataException } from "./core_utils.js";
import { PDFDocument } from "./document.js";
@ -46,7 +51,8 @@ class BasePdfManager {
}
get docBaseUrl() {
return this._docBaseUrl;
const catalog = this.pdfDocument.catalog;
return shadow(this, "docBaseUrl", catalog.baseUrl || this._docBaseUrl);
}
onLoadedStream() {

View File

@ -238,6 +238,7 @@
!pdfjsbad1586.pdf
!standard_fonts.pdf
!freeculture.pdf
!issue14802.pdf
!issue6006.pdf
!pdfkit_compressed.pdf
!TAMReview.pdf

89
test/pdfs/issue14802.pdf Normal file
View File

@ -0,0 +1,89 @@
%PDF-1.7
%âãÏÓ
1 0 obj
<<
/Pages 2 0 R
/URI
<<
/Base (http://example.com/)
>>
/Type /Catalog
>>
endobj
2 0 obj
<<
/Kids [3 0 R]
/Type /Pages
/Count 1
>>
endobj
3 0 obj
<<
/Parent 2 0 R
/Annots [4 0 R]
/Resources
<<
/Font
<<
/F1 5 0 R
>>
>>
/MediaBox [0 0 260 50]
/Type /Page
/Contents 6 0 R
>>
endobj
4 0 obj
<<
/Border [0 0 1]
/Subtype /Link
/C [0 0 1]
/A
<<
/URI (./relative_link.txt)
/Type /Action
/S /URI
>>
/Type /Annot
/Rect [5 10 250 40]
>>
endobj
5 0 obj
<<
/BaseFont /Times-Roman
/Subtype /Type1
/Type /Font
/Encoding /WinAnsiEncoding
>>
endobj
6 0 obj
<<
/Length 81
>>
stream
1 0 0 rg
BT
10 20 TD
/F1 14 Tf
(A relative link, with a /Catalog Base-URI) Tj
ET
endstream
endobj xref
0 7
0000000000 65535 f
0000000015 00000 n
0000000106 00000 n
0000000165 00000 n
0000000310 00000 n
0000000467 00000 n
0000000568 00000 n
trailer
<<
/Root 1 0 R
/Size 7
>>
startxref
701
%%EOF

View File

@ -2950,6 +2950,14 @@
"link": true,
"type": "eq"
},
{ "id": "issue14802",
"file": "pdfs/issue14802.pdf",
"md5": "c1e774945fee539c7fcfec00b36dd4e6",
"rounds": 1,
"type": "eq",
"annotations": true,
"about": "LinkAnnotation with a relative link, and a /Catalog Base-URI."
},
{ "id": "issue1127-text",
"file": "pdfs/issue1127.pdf",
"md5": "4fb2be5ffefeafda4ba977de2a1bb4d8",