From 42f07c62624361204fd1892e2a635c08b5d9c9f5 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 30 Sep 2016 16:32:22 +0200 Subject: [PATCH] [api-minor] Use the `new URL` constructor when validating URLs in annotations and the outline, as a complement to only checking the protocol, and add a bit more validation to `Catalog_parseDestDictionary` Note that this will automatically reject any relative URL. To make the API more useful to consumers, URLs that are rejected will be available via the `unsafeUrl` property in the data object returned by `PDFPageProxy_getAnnotations`. The patch also adds a bit more validation of the data for `Named` actions. --- src/core/obj.js | 40 +++++++++++++++++++++------ test/unit/annotation_layer_spec.js | 44 +++++++++++++++++++++++++++--- test/unit/api_spec.js | 2 +- 3 files changed, 73 insertions(+), 13 deletions(-) diff --git a/src/core/obj.js b/src/core/obj.js index 254f1d444..bbbfe232c 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -594,7 +594,7 @@ var Catalog = (function CatalogClosure() { Catalog.parseDestDictionary = function Catalog_parseDestDictionary(params) { // Lets URLs beginning with 'www.' default to using the 'http://' protocol. function addDefaultProtocolToUrl(url) { - if (isString(url) && url.indexOf('www.') === 0) { + if (url.indexOf('www.') === 0) { return ('http://' + url); } return url; @@ -610,10 +610,18 @@ var Catalog = (function CatalogClosure() { } var destDict = params.destDict; + if (!isDict(destDict)) { + warn('Catalog_parseDestDictionary: "destDict" must be a dictionary.'); + return; + } var resultObj = params.resultObj; + if (typeof resultObj !== 'object') { + warn('Catalog_parseDestDictionary: "resultObj" must be an object.'); + return; + } var action = destDict.get('A'), url, dest; - if (action && isDict(action)) { + if (isDict(action)) { var linkType = action.get('S').name; switch (linkType) { case 'URI': @@ -621,7 +629,7 @@ var Catalog = (function CatalogClosure() { if (isName(url)) { // Some bad PDFs do not put parentheses around relative URLs. url = '/' + url.name; - } else if (url) { + } else if (isString(url)) { url = addDefaultProtocolToUrl(url); } // TODO: pdf spec mentions urls can be relative to a Base @@ -669,24 +677,40 @@ var Catalog = (function CatalogClosure() { break; case 'Named': - resultObj.action = action.get('N').name; + var namedAction = action.get('N'); + if (isName(namedAction)) { + resultObj.action = namedAction.name; + } break; default: warn('Catalog_parseDestDictionary: Unrecognized link type "' + linkType + '".'); + break; } } else if (destDict.has('Dest')) { // Simple destination link. dest = destDict.get('Dest'); } - if (url) { - if (isValidUrl(url, /* allowRelative = */ false)) { - resultObj.url = tryConvertUrlEncoding(url); + if (isString(url)) { + url = tryConvertUrlEncoding(url); + var absoluteUrl; + try { + absoluteUrl = new URL(url).href; + } catch (ex) { /* `new URL()` will throw on incorrect data. */ } + + if (isValidUrl(absoluteUrl, /* allowRelative = */ false)) { + resultObj.url = absoluteUrl; } + resultObj.unsafeUrl = url; } if (dest) { - resultObj.dest = isName(dest) ? dest.name : dest; + if (isName(dest)) { + dest = dest.name; + } + if (isString(dest) || isArray(dest)) { + resultObj.dest = dest; + } } }; diff --git a/test/unit/annotation_layer_spec.js b/test/unit/annotation_layer_spec.js index 89b02fe73..276f5bf69 100644 --- a/test/unit/annotation_layer_spec.js +++ b/test/unit/annotation_layer_spec.js @@ -275,6 +275,8 @@ describe('Annotation layer', function() { expect(data.annotationType).toEqual(AnnotationType.LINK); expect(data.url).toEqual('http://www.ctan.org/tex-archive/info/lshort'); + expect(data.unsafeUrl).toEqual( + 'http://www.ctan.org/tex-archive/info/lshort'); expect(data.dest).toBeUndefined(); }); @@ -299,7 +301,8 @@ describe('Annotation layer', function() { var data = annotation.data; expect(data.annotationType).toEqual(AnnotationType.LINK); - expect(data.url).toEqual('http://www.hmrc.gov.uk'); + expect(data.url).toEqual('http://www.hmrc.gov.uk/'); + expect(data.unsafeUrl).toEqual('http://www.hmrc.gov.uk'); expect(data.dest).toBeUndefined(); }); @@ -331,6 +334,8 @@ describe('Annotation layer', function() { expect(data.annotationType).toEqual(AnnotationType.LINK); expect(data.url).toEqual( + new URL(stringToUTF8String('http://www.example.com/üöä')).href); + expect(data.unsafeUrl).toEqual( stringToUTF8String('http://www.example.com/üöä')); expect(data.dest).toBeUndefined(); }); @@ -356,6 +361,7 @@ describe('Annotation layer', function() { expect(data.annotationType).toEqual(AnnotationType.LINK); expect(data.url).toBeUndefined(); + expect(data.unsafeUrl).toBeUndefined(); expect(data.dest).toEqual('page.157'); }); @@ -382,7 +388,8 @@ describe('Annotation layer', function() { var data = annotation.data; expect(data.annotationType).toEqual(AnnotationType.LINK); - expect(data.url).toBeUndefined(); // ../../0013/001346/134685E.pdf#4.3 + expect(data.url).toBeUndefined(); + expect(data.unsafeUrl).toEqual('../../0013/001346/134685E.pdf#4.3'); expect(data.dest).toBeUndefined(); expect(data.newWindow).toEqual(true); }); @@ -410,6 +417,8 @@ describe('Annotation layer', function() { expect(data.annotationType).toEqual(AnnotationType.LINK); expect(data.url).toEqual('http://www.example.com/test.pdf#nameddest=15'); + expect(data.unsafeUrl).toEqual( + 'http://www.example.com/test.pdf#nameddest=15'); expect(data.dest).toBeUndefined(); expect(data.newWindow).toBeFalsy(); }); @@ -436,8 +445,10 @@ describe('Annotation layer', function() { var data = annotation.data; expect(data.annotationType).toEqual(AnnotationType.LINK); - expect(data.url).toEqual('http://www.example.com/test.pdf#' + - '[14,{"name":"XYZ"},null,298.043,null]'); + expect(data.url).toEqual(new URL('http://www.example.com/test.pdf#' + + '[14,{"name":"XYZ"},null,298.043,null]').href); + expect(data.unsafeUrl).toEqual('http://www.example.com/test.pdf#' + + '[14,{"name":"XYZ"},null,298.043,null]'); expect(data.dest).toBeUndefined(); expect(data.newWindow).toBeFalsy(); }); @@ -463,6 +474,7 @@ describe('Annotation layer', function() { expect(data.annotationType).toEqual(AnnotationType.LINK); expect(data.url).toBeUndefined(); + expect(data.unsafeUrl).toBeUndefined(); expect(data.action).toEqual('GoToPage'); }); @@ -482,8 +494,32 @@ describe('Annotation layer', function() { expect(data.annotationType).toEqual(AnnotationType.LINK); expect(data.url).toBeUndefined(); + expect(data.unsafeUrl).toBeUndefined(); expect(data.dest).toEqual('LI0'); }); + + it('should correctly parse a simple Dest, with explicit destination array', + function() { + var annotationDict = new Dict(); + annotationDict.set('Type', Name.get('Annot')); + annotationDict.set('Subtype', Name.get('Link')); + annotationDict.set('Dest', [new Ref(17, 0), Name.get('XYZ'), + 0, 841.89, null]); + + var annotationRef = new Ref(10, 0); + var xref = new XRefMock([ + { ref: annotationRef, data: annotationDict, } + ]); + + var annotation = annotationFactory.create(xref, annotationRef); + var data = annotation.data; + expect(data.annotationType).toEqual(AnnotationType.LINK); + + expect(data.url).toBeUndefined(); + expect(data.unsafeUrl).toBeUndefined(); + expect(data.dest).toEqual([{ num: 17, gen: 0, }, { name: 'XYZ' }, + 0, 841.89, null]); + }); }); describe('TextWidgetAnnotation', function() { diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index f599f5ab2..e593a3725 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -643,7 +643,7 @@ describe('api', function() { var outlineItemTwo = outline[2]; expect(typeof outlineItemTwo.title).toEqual('string'); expect(outlineItemTwo.dest).toEqual(null); - expect(outlineItemTwo.url).toEqual('http://google.com'); + expect(outlineItemTwo.url).toEqual('http://google.com/'); expect(outlineItemTwo.newWindow).toBeUndefined(); var outlineItemOne = outline[1];