From eb8f4e83434b20644aea30bff9755b29199c29b5 Mon Sep 17 00:00:00 2001
From: mduan <duan.mack@gmail.com>
Date: Tue, 8 Jan 2013 15:28:08 -0800
Subject: [PATCH] Handle some illegal characters in hex string

Do not throw exception when hex strings are in the wrong format

Currently pdf.js is throwing an exception for the following hex string:

`<7 0 2 15 5 2 2 2 4 3 2 4>`

The issue is that the 15 is not a valid hex character so pdf.js ends up
throwing an exception.

This diff changes the parser to process the above hex string as follow:

`70 21 55 2 24 32` (Note: the final 4 of the hex string is ignored)

replicating the behaviour of MuPDF, and doesn't throw an exception.
---
 src/parser.js            | 42 +++++++++++++++++++++++-----------------
 test/unit/parser_spec.js | 11 +++++++++++
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/src/parser.js b/src/parser.js
index 6987bbf03..6f738b80e 100644
--- a/src/parser.js
+++ b/src/parser.js
@@ -460,28 +460,34 @@ var Lexer = (function LexerClosure() {
     getHexString: function Lexer_getHexString(ch) {
       var str = '';
       var stream = this.stream;
-      for (;;) {
+      var isFirstHex = true;
+      var firstDigit;
+      var secondDigit;
+      while (true) {
         ch = stream.getChar();
-        if (ch == '>') {
-          break;
-        }
         if (!ch) {
           warn('Unterminated hex string');
           break;
-        }
-        if (specialChars[ch.charCodeAt(0)] != 1) {
-          var x, x2;
-          if ((x = toHexDigit(ch)) == -1)
-            error('Illegal character in hex string: ' + ch);
-
-          ch = stream.getChar();
-          while (specialChars[ch.charCodeAt(0)] == 1)
-            ch = stream.getChar();
-
-          if ((x2 = toHexDigit(ch)) == -1)
-            error('Illegal character in hex string: ' + ch);
-
-          str += String.fromCharCode((x << 4) | x2);
+        } else if (ch === '>') {
+          break;
+        } else if (specialChars[ch.charCodeAt(0)] === 1) {
+          continue;
+        } else {
+          if (isFirstHex) {
+            firstDigit = toHexDigit(ch);
+            if (firstDigit === -1) {
+              warn("Ignoring invalid character '" + ch + "' in hex string");
+              continue;
+            }
+          } else {
+            secondDigit = toHexDigit(ch);
+            if (secondDigit === -1) {
+              warn("Ignoring invalid character '" + ch + "' in hex string");
+              continue;
+            }
+            str += String.fromCharCode((firstDigit << 4) | secondDigit);
+          }
+          isFirstHex = !isFirstHex;
         }
       }
       return str;
diff --git a/test/unit/parser_spec.js b/test/unit/parser_spec.js
index 69a6be954..a9dcc2e74 100644
--- a/test/unit/parser_spec.js
+++ b/test/unit/parser_spec.js
@@ -12,6 +12,17 @@ describe('parser', function() {
 
       expect(result).toEqual(11.234);
     });
+
+    it('should not throw exception on bad input', function() {
+      // '8 0 2 15 5 2 2 2 4 3 2 4'
+      // should be parsed as
+      // '80 21 55 22 24 32'
+      var input = new StringStream('7 0 2 15 5 2 2 2 4 3 2 4>');
+      var lexer = new Lexer(input);
+      var result = lexer.getHexString('<');
+
+      expect(result).toEqual('p!U"$2');
+    });
   });
 });