Merge pull request #14081 from Snuffleupagus/createValidAbsoluteUrl-options

[api-minor] Move the `addDefaultProtocolToUrl`/`tryConvertUrlEncoding` functionality into the `createValidAbsoluteUrl` function
This commit is contained in:
Tim van der Meij 2021-09-26 15:00:06 +02:00 committed by GitHub
commit 93ed4bfa11
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 42 additions and 48 deletions

View File

@ -13,14 +13,6 @@
* limitations under the License. * limitations under the License.
*/ */
import {
addDefaultProtocolToUrl,
collectActions,
MissingDataException,
recoverJsURL,
toRomanNumerals,
tryConvertUrlEncoding,
} from "./core_utils.js";
import { import {
clearPrimitiveCaches, clearPrimitiveCaches,
Dict, Dict,
@ -29,9 +21,16 @@ import {
isRef, isRef,
isRefsEqual, isRefsEqual,
isStream, isStream,
Name,
RefSet, RefSet,
RefSetCache, RefSetCache,
} from "./primitives.js"; } from "./primitives.js";
import {
collectActions,
MissingDataException,
recoverJsURL,
toRomanNumerals,
} from "./core_utils.js";
import { import {
createPromiseCapability, createPromiseCapability,
createValidAbsoluteUrl, createValidAbsoluteUrl,
@ -1331,11 +1330,9 @@ class Catalog {
switch (actionName) { switch (actionName) {
case "URI": case "URI":
url = action.get("URI"); url = action.get("URI");
if (isName(url)) { if (url instanceof Name) {
// Some bad PDFs do not put parentheses around relative URLs. // Some bad PDFs do not put parentheses around relative URLs.
url = "/" + url.name; url = "/" + url.name;
} else if (isString(url)) {
url = addDefaultProtocolToUrl(url);
} }
// TODO: pdf spec mentions urls can be relative to a Base // TODO: pdf spec mentions urls can be relative to a Base
// entry in the dictionary. // entry in the dictionary.
@ -1426,8 +1423,10 @@ class Catalog {
} }
if (isString(url)) { if (isString(url)) {
url = tryConvertUrlEncoding(url); const absoluteUrl = createValidAbsoluteUrl(url, docBaseUrl, {
const absoluteUrl = createValidAbsoluteUrl(url, docBaseUrl); addDefaultProtocol: true,
tryConvertEncoding: true,
});
if (absoluteUrl) { if (absoluteUrl) {
resultObj.url = absoluteUrl.href; resultObj.url = absoluteUrl.href;
} }

View File

@ -18,7 +18,6 @@ import {
BaseException, BaseException,
objectSize, objectSize,
stringToPDFString, stringToPDFString,
stringToUTF8String,
warn, warn,
} from "../shared/util.js"; } from "../shared/util.js";
import { Dict, isName, isRef, isStream, RefSet } from "./primitives.js"; import { Dict, isName, isRef, isStream, RefSet } from "./primitives.js";
@ -452,21 +451,6 @@ function validateCSSFont(cssFontInfo) {
return true; return true;
} }
// Let URLs beginning with 'www.' default to using the 'http://' protocol.
function addDefaultProtocolToUrl(url) {
return url.startsWith("www.") ? `http://${url}` : url;
}
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding; see Bugzilla 1122280.
function tryConvertUrlEncoding(url) {
try {
return stringToUTF8String(url);
} catch (e) {
return url;
}
}
function recoverJsURL(str) { function recoverJsURL(str) {
// Attempt to recover valid URLs from `JS` entries with certain // Attempt to recover valid URLs from `JS` entries with certain
// white-listed formats: // white-listed formats:
@ -496,7 +480,6 @@ function recoverJsURL(str) {
} }
export { export {
addDefaultProtocolToUrl,
collectActions, collectActions,
encodeToXmlString, encodeToXmlString,
escapePDFName, escapePDFName,
@ -513,7 +496,6 @@ export {
readUint32, readUint32,
recoverJsURL, recoverJsURL,
toRomanNumerals, toRomanNumerals,
tryConvertUrlEncoding,
validateCSSFont, validateCSSFont,
XRefEntryException, XRefEntryException,
XRefParseException, XRefParseException,

View File

@ -26,10 +26,6 @@ import {
$toStyle, $toStyle,
XFAObject, XFAObject,
} from "./xfa_object.js"; } from "./xfa_object.js";
import {
addDefaultProtocolToUrl,
tryConvertUrlEncoding,
} from "../core_utils.js";
import { createValidAbsoluteUrl, warn } from "../../shared/util.js"; import { createValidAbsoluteUrl, warn } from "../../shared/util.js";
import { getMeasurement, stripQuotes } from "./utils.js"; import { getMeasurement, stripQuotes } from "./utils.js";
import { selectFont } from "./fonts.js"; import { selectFont } from "./fonts.js";
@ -638,15 +634,11 @@ function setFontFamily(xfaFont, node, fontFinder, style) {
} }
function fixURL(str) { function fixURL(str) {
if (typeof str === "string") { const absoluteUrl = createValidAbsoluteUrl(str, /* baseUrl = */ null, {
let url = addDefaultProtocolToUrl(str); addDefaultProtocol: true,
url = tryConvertUrlEncoding(url); tryConvertEncoding: true,
const absoluteUrl = createValidAbsoluteUrl(url); });
if (absoluteUrl) { return absoluteUrl ? absoluteUrl.href : null;
return absoluteUrl.href;
}
}
return null;
} }
export { export {

View File

@ -448,14 +448,35 @@ function _isValidProtocol(url) {
* Attempts to create a valid absolute URL. * Attempts to create a valid absolute URL.
* *
* @param {URL|string} url - An absolute, or relative, URL. * @param {URL|string} url - An absolute, or relative, URL.
* @param {URL|string} baseUrl - An absolute URL. * @param {URL|string} [baseUrl] - An absolute URL.
* @param {Object} [options]
* @returns Either a valid {URL}, or `null` otherwise. * @returns Either a valid {URL}, or `null` otherwise.
*/ */
function createValidAbsoluteUrl(url, baseUrl) { function createValidAbsoluteUrl(url, baseUrl = null, options = null) {
if (!url) { if (!url) {
return null; return null;
} }
try { try {
if (options && typeof url === "string") {
// Let URLs beginning with "www." default to using the "http://" protocol.
if (options.addDefaultProtocol && url.startsWith("www.")) {
const dots = url.match(/\./g);
// Avoid accidentally matching a *relative* URL pointing to a file named
// e.g. "www.pdf" or similar.
if (dots && dots.length >= 2) {
url = `http://${url}`;
}
}
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding; see bug 1122280.
if (options.tryConvertEncoding) {
try {
url = stringToUTF8String(url);
} catch (ex) {}
}
}
const absoluteUrl = baseUrl ? new URL(url, baseUrl) : new URL(url); const absoluteUrl = baseUrl ? new URL(url, baseUrl) : new URL(url);
if (_isValidProtocol(absoluteUrl)) { if (_isValidProtocol(absoluteUrl)) {
return absoluteUrl; return absoluteUrl;

View File

@ -796,7 +796,7 @@ describe("annotation", function () {
); );
expect(data.annotationType).toEqual(AnnotationType.LINK); expect(data.annotationType).toEqual(AnnotationType.LINK);
expect(data.url).toEqual("http://www.hmrc.gov.uk/"); expect(data.url).toEqual("http://www.hmrc.gov.uk/");
expect(data.unsafeUrl).toEqual("http://www.hmrc.gov.uk"); expect(data.unsafeUrl).toEqual("www.hmrc.gov.uk");
expect(data.dest).toBeUndefined(); expect(data.dest).toBeUndefined();
} }
); );
@ -843,7 +843,7 @@ describe("annotation", function () {
).href ).href
); );
expect(data.unsafeUrl).toEqual( expect(data.unsafeUrl).toEqual(
stringToUTF8String("http://www.example.com/\xC3\xBC\xC3\xB6\xC3\xA4") "http://www.example.com/\xC3\xBC\xC3\xB6\xC3\xA4"
); );
expect(data.dest).toBeUndefined(); expect(data.dest).toBeUndefined();
} }