From 3c0b8073be4880d50cca05bdd4776157083246ac Mon Sep 17 00:00:00 2001
From: Rob Wu <gwnRob@gmail.com>
Date: Wed, 22 Jan 2014 12:27:44 +0100
Subject: [PATCH 1/2] Try to recover from bad URI value

See https://github.com/mozilla/pdf.js/issues/4159

BAD (http://cms.di.unipi.it/files/bbec7791fac20e98127c77531e4031912392156c/testo.pdf)
<< /S /URI /URI /v#2findex.php#2fFile:Logo.png >>

GOOD (http://www.ioi2012.org/wp-content/uploads/2011/12/practice.pdf):
<< /S /URI /URI (http://127.0.0.1/v/index.php/File:Logo.png >>

The URL should be wrapped in parentheses, but sometimes it isn't.
Consequently, the value is interpreted as a Name (because of the leading "/"),
and the resulting object is `{name: "v/index.php/File:Logo.png" }`. Obviously,
this is not a string, so `url.indexOf` throws an error here.
---
 src/shared/annotation.js | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/shared/annotation.js b/src/shared/annotation.js
index ea504e8aa..f54732ea0 100644
--- a/src/shared/annotation.js
+++ b/src/shared/annotation.js
@@ -640,7 +640,13 @@ var LinkAnnotation = (function LinkAnnotationClosure() {
     if (action) {
       var linkType = action.get('S').name;
       if (linkType === 'URI') {
-        var url = addDefaultProtocolToUrl(action.get('URI'));
+        var url = action.get('URI');
+        if (isName(url)) {
+          // Some bad PDFs do not put parentheses around relative URLs.
+          url = '/' + url.name;
+        } else {
+          url = addDefaultProtocolToUrl(url);
+        }
         // TODO: pdf spec mentions urls can be relative to a Base
         // entry in the dictionary.
         if (!isValidUrl(url, false)) {

From b35ced8c9e8ca8c0004ab8187e0c1ab771b08be7 Mon Sep 17 00:00:00 2001
From: Rob Wu <gwnRob@gmail.com>
Date: Wed, 22 Jan 2014 12:39:42 +0100
Subject: [PATCH 2/2] RFC 3986-compliant isValidUrl (protocol parsing)

To avoid misinterpreting URLs like "/File:wikipedia.png" as a
non-relative URLs.
---
 src/shared/util.js | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/shared/util.js b/src/shared/util.js
index 51fd42bee..312445565 100644
--- a/src/shared/util.js
+++ b/src/shared/util.js
@@ -223,7 +223,7 @@ var UnsupportedManager = PDFJS.UnsupportedManager =
 function combineUrl(baseUrl, url) {
   if (!url)
     return baseUrl;
-  if (url.indexOf(':') >= 0)
+  if (/^[a-z][a-z0-9+\-.]*:/i.test(url))
     return url;
   if (url.charAt(0) == '/') {
     // absolute path
@@ -247,11 +247,13 @@ function isValidUrl(url, allowRelative) {
   if (!url) {
     return false;
   }
-  var colon = url.indexOf(':');
-  if (colon < 0) {
+  // RFC 3986 (http://tools.ietf.org/html/rfc3986#section-3.1)
+  // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+  var protocol = /^[a-z][a-z0-9+\-.]*(?=:)/i.exec(url);
+  if (!protocol) {
     return allowRelative;
   }
-  var protocol = url.substr(0, colon);
+  protocol = protocol[0].toLowerCase();
   switch (protocol) {
     case 'http':
     case 'https':