Merge pull request #14056 from Snuffleupagus/bug-1731240

Correctly validate URLs in XFA documents (bug 1731240)
This commit is contained in:
Jonas Jenwald 2021-09-21 21:37:19 +02:00 committed by GitHub
commit 6381158855
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 106 additions and 21 deletions

View File

@ -13,6 +13,13 @@
* limitations under the License.
*/
import {
addDefaultProtocolToUrl,
collectActions,
MissingDataException,
toRomanNumerals,
tryConvertUrlEncoding,
} from "./core_utils.js";
import {
clearPrimitiveCaches,
Dict,
@ -24,11 +31,6 @@ import {
RefSet,
RefSetCache,
} from "./primitives.js";
import {
collectActions,
MissingDataException,
toRomanNumerals,
} from "./core_utils.js";
import {
createPromiseCapability,
createValidAbsoluteUrl,
@ -1283,21 +1285,6 @@ class Catalog {
* @param {ParseDestDictionaryParameters} params
*/
static parseDestDictionary(params) {
// Lets URLs beginning with 'www.' default to using the 'http://' protocol.
function addDefaultProtocolToUrl(url) {
return url.startsWith("www.") ? `http://${url}` : url;
}
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding; see Bugzilla 1122280.
function tryConvertUrlEncoding(url) {
try {
return stringToUTF8String(url);
} catch (e) {
return url;
}
}
const destDict = params.destDict;
if (!isDict(destDict)) {
warn("parseDestDictionary: `destDict` must be a dictionary.");

View File

@ -18,6 +18,7 @@ import {
BaseException,
objectSize,
stringToPDFString,
stringToUTF8String,
warn,
} from "../shared/util.js";
import { Dict, isName, isRef, isStream, RefSet } from "./primitives.js";
@ -451,7 +452,23 @@ function validateCSSFont(cssFontInfo) {
return true;
}
// Let URLs beginning with 'www.' default to using the 'http://' protocol.
function addDefaultProtocolToUrl(url) {
return url.startsWith("www.") ? `http://${url}` : url;
}
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding; see Bugzilla 1122280.
function tryConvertUrlEncoding(url) {
try {
return stringToUTF8String(url);
} catch (e) {
return url;
}
}
export {
addDefaultProtocolToUrl,
collectActions,
encodeToXmlString,
escapePDFName,
@ -467,6 +484,7 @@ export {
readUint16,
readUint32,
toRomanNumerals,
tryConvertUrlEncoding,
validateCSSFont,
XRefEntryException,
XRefParseException,

View File

@ -29,8 +29,13 @@ import {
XmlObject,
} from "./xfa_object.js";
import { $buildXFAObject, NamespaceIds } from "./namespaces.js";
import {
addDefaultProtocolToUrl,
tryConvertUrlEncoding,
} from "../core_utils.js";
import { fixTextIndent, measureToString, setFontFamily } from "./html_utils.js";
import { getMeasurement, HTMLResult, stripQuotes } from "./utils.js";
import { createValidAbsoluteUrl } from "../../shared/util.js";
const XHTML_NS_ID = NamespaceIds.xhtml.id;
@ -321,7 +326,16 @@ class XhtmlObject extends XmlObject {
class A extends XhtmlObject {
constructor(attributes) {
super(attributes, "a");
this.href = attributes.href || "";
let href = "";
if (typeof attributes.href === "string") {
let url = addDefaultProtocolToUrl(attributes.href);
url = tryConvertUrlEncoding(url);
const absoluteUrl = createValidAbsoluteUrl(url);
if (absoluteUrl) {
href = absoluteUrl.href;
}
}
this.href = href;
}
}

View File

@ -522,4 +522,70 @@ describe("XFAFactory", function () {
expect(field1).not.toEqual(null);
expect(field1.attributes.value).toEqual("123");
});
it("should parse URLs correctly", function () {
function getXml(href) {
return `
<?xml version="1.0"?>
<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/">
<template xmlns="http://www.xfa.org/schema/xfa-template/3.3">
<subform name="root" mergeMode="matchTemplate">
<pageSet>
<pageArea>
<contentArea x="0pt" w="456pt" h="789pt"/>
<medium stock="default" short="456pt" long="789pt"/>
<draw name="url" y="5.928mm" x="128.388mm" w="71.237mm" h="9.528mm">
<value>
<exData contentType="text/html">
<body xmlns="http://www.w3.org/1999/xhtml">
<a href="${href}">${href}</a>
</body>
</exData>
</value>
</draw>
</pageArea>
</pageSet>
</subform>
</template>
<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
<xfa:data>
</xfa:data>
</xfa:datasets>
</xdp:xdp>
`;
}
let factory, pages, a;
// A valid, and complete, URL.
factory = new XFAFactory({ "xdp:xdp": getXml("https://www.example.com/") });
expect(factory.numberPages).toEqual(1);
pages = factory.getPages();
a = searchHtmlNode(pages, "name", "a");
expect(a.value).toEqual("https://www.example.com/");
expect(a.attributes.href).toEqual("https://www.example.com/");
// A valid, but incomplete, URL.
factory = new XFAFactory({ "xdp:xdp": getXml("www.example.com/") });
expect(factory.numberPages).toEqual(1);
pages = factory.getPages();
a = searchHtmlNode(pages, "name", "a");
expect(a.value).toEqual("www.example.com/");
expect(a.attributes.href).toEqual("http://www.example.com/");
// A valid email-address.
factory = new XFAFactory({ "xdp:xdp": getXml("mailto:test@example.com") });
expect(factory.numberPages).toEqual(1);
pages = factory.getPages();
a = searchHtmlNode(pages, "name", "a");
expect(a.value).toEqual("mailto:test@example.com");
expect(a.attributes.href).toEqual("mailto:test@example.com");
// Not a valid URL.
factory = new XFAFactory({ "xdp:xdp": getXml("qwerty/") });
expect(factory.numberPages).toEqual(1);
pages = factory.getPages();
a = searchHtmlNode(pages, "name", "a");
expect(a.value).toEqual("qwerty/");
expect(a.attributes.href).toEqual("");
});
});