Refactor LinkAnnotation slightly to add data.url/data.dest at the end

This patch also makes sure that all URLs are converted to the correct encoding.
This commit is contained in:
Jonas Jenwald 2016-03-03 13:07:22 +01:00
parent 4a601ffc28
commit b63ef7a8b6
3 changed files with 60 additions and 49 deletions

View File

@ -37,6 +37,7 @@ var AnnotationFlag = sharedUtil.AnnotationFlag;
var AnnotationType = sharedUtil.AnnotationType;
var OPS = sharedUtil.OPS;
var Util = sharedUtil.Util;
var isString = sharedUtil.isString;
var isArray = sharedUtil.isArray;
var isInt = sharedUtil.isInt;
var isValidUrl = sharedUtil.isValidUrl;
@ -705,68 +706,78 @@ var LinkAnnotation = (function LinkAnnotationClosure() {
var data = this.data;
data.annotationType = AnnotationType.LINK;
var action = dict.get('A');
var action = dict.get('A'), url, dest;
if (action && isDict(action)) {
var linkType = action.get('S').name;
if (linkType === 'URI') {
var url = action.get('URI');
if (isName(url)) {
// Some bad PDFs do not put parentheses around relative URLs.
url = '/' + url.name;
} else if (url) {
url = addDefaultProtocolToUrl(url);
}
// TODO: pdf spec mentions urls can be relative to a Base
// entry in the dictionary.
if (!isValidUrl(url, false)) {
url = '';
}
// According to ISO 32000-1:2008, section 12.6.4.7,
// URI should to be encoded in 7-bit ASCII.
// Some bad PDFs may have URIs in UTF-8 encoding, see Bugzilla 1122280.
try {
data.url = stringToUTF8String(url);
} catch (e) {
// Fall back to a simple copy.
data.url = url;
}
} else if (linkType === 'GoTo') {
data.dest = action.get('D');
} else if (linkType === 'GoToR') {
var urlDict = action.get('F');
if (isDict(urlDict)) {
// We assume that the 'url' is a Filspec dictionary
// and fetch the url without checking any further
url = urlDict.get('F') || '';
}
switch (linkType) {
case 'URI':
url = action.get('URI');
if (isName(url)) {
// Some bad PDFs do not put parentheses around relative URLs.
url = '/' + url.name;
} else if (url) {
url = addDefaultProtocolToUrl(url);
}
// TODO: pdf spec mentions urls can be relative to a Base
// entry in the dictionary.
break;
// TODO: pdf reference says that GoToR
// can also have 'NewWindow' attribute
if (!isValidUrl(url, false)) {
url = '';
}
data.url = url;
data.dest = action.get('D');
} else if (linkType === 'Named') {
data.action = action.get('N').name;
} else {
warn('unrecognized link type: ' + linkType);
case 'GoTo':
dest = action.get('D');
break;
case 'GoToR':
var urlDict = action.get('F');
if (isDict(urlDict)) {
// We assume that the 'url' is a Filspec dictionary
// and fetch the url without checking any further
url = urlDict.get('F') || '';
}
// TODO: pdf reference says that GoToR
// can also have 'NewWindow' attribute
dest = action.get('D');
break;
case 'Named':
data.action = action.get('N').name;
break;
default:
warn('unrecognized link type: ' + linkType);
}
} else if (dict.has('Dest')) {
// simple destination link
var dest = dict.get('Dest');
} else if (dict.has('Dest')) { // Simple destination link.
dest = dict.get('Dest');
}
if (url) {
if (isValidUrl(url, /* allowRelative = */ false)) {
data.url = tryConvertUrlEncoding(url);
}
}
if (dest) {
data.dest = isName(dest) ? dest.name : dest;
}
}
// Lets URLs beginning with 'www.' default to using the 'http://' protocol.
function addDefaultProtocolToUrl(url) {
if (url && url.indexOf('www.') === 0) {
if (isString(url) && url.indexOf('www.') === 0) {
return ('http://' + url);
}
return url;
}
function tryConvertUrlEncoding(url) {
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding, see Bugzilla 1122280.
try {
return stringToUTF8String(url);
} catch (e) {
return url;
}
}
Util.inherit(LinkAnnotation, Annotation, {});
return LinkAnnotation;

View File

@ -284,7 +284,7 @@ var LinkAnnotationElement = (function LinkAnnotationElementClosure() {
if (this.data.action) {
this._bindNamedAction(link, this.data.action);
} else {
this._bindLink(link, ('dest' in this.data) ? this.data.dest : null);
this._bindLink(link, (this.data.dest || null));
}
}

View File

@ -312,7 +312,7 @@ function isSameOrigin(baseUrl, otherUrl) {
// Validates if URL is safe and allowed, e.g. to avoid XSS.
function isValidUrl(url, allowRelative) {
if (!url) {
if (!url || typeof url !== 'string') {
return false;
}
// RFC 3986 (http://tools.ietf.org/html/rfc3986#section-3.1)