Merge pull request #4213 from nnethercote/strings

Create less garbage while parsing
2014-01-30 04:41:53 -08:00 · 2014-01-30 04:41:53 -08:00 · acb33b3e7d
commit acb33b3e7d
parent b4eceac826 164d7a6e15
1 changed files with 58 additions and 32 deletions
--- a/src/core/parser.js
+++ b/src/core/parser.js
@ -335,6 +335,13 @@ var Lexer = (function LexerClosure() {
    this.stream = stream;
    this.nextChar();

+    // While lexing, we build up many strings one char at a time. Using += for
+    // this can result in lots of garbage strings. It's better to build an
+    // array of single-char strings and then join() them together at the end.
+    // And reusing a single array (i.e. |this.strBuf|) over and over for this
+    // purpose uses less memory than using a new array for each string.
+    this.strBuf = [];
+
    // The PDFs might have "glued" commands with other commands, operands or
    // literals, e.g. "q1". The knownCommands is a dictionary of the valid
    // commands and their prefixes. The prefixes are built the following way:
@ -389,33 +396,50 @@ var Lexer = (function LexerClosure() {
    getNumber: function Lexer_getNumber() {
      var floating = false;
      var ch = this.currentChar;
-      var str = String.fromCharCode(ch);
+      var allDigits = ch >= 0x30 && ch <= 0x39;
+      var strBuf = this.strBuf;
+      strBuf.length = 0;
+      strBuf.push(String.fromCharCode(ch));
      while ((ch = this.nextChar()) >= 0) {
-        if (ch === 0x2E && !floating) { // '.'
-          str += '.';
+        if (ch >= 0x30 && ch <= 0x39) { // '0'-'9'
+          strBuf.push(String.fromCharCode(ch));
+        } else if (ch === 0x2E && !floating) { // '.'
+          strBuf.push('.');
          floating = true;
+          allDigits = false;
        } else if (ch === 0x2D) { // '-'
          // ignore minus signs in the middle of numbers to match
          // Adobe's behavior
          warn('Badly formated number');
-        } else if (ch >= 0x30 && ch <= 0x39) { // '0'-'9'
-          str += String.fromCharCode(ch);
+          allDigits = false;
        } else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
          floating = true;
+          allDigits = false;
        } else {
          // the last character doesn't belong to us
          break;
        }
      }
-      var value = parseFloat(str);
-      if (isNaN(value))
-        error('Invalid floating point number: ' + value);
+      var value;
+      if (allDigits) {
+        value = 0;
+        var charCodeOfZero = 48;    // '0'
+        for (var i = 0, ii = strBuf.length; i < ii; i++) {
+          value = value * 10 + (strBuf[i].charCodeAt(0) - charCodeOfZero);
+        }
+      } else {
+        value = parseFloat(strBuf.join(''));
+        if (isNaN(value)) {
+          error('Invalid floating point number: ' + value);
+        }
+      }
      return value;
    },
    getString: function Lexer_getString() {
      var numParen = 1;
      var done = false;
-      var str = '';
+      var strBuf = this.strBuf;
+      strBuf.length = 0;

      var ch = this.nextChar();
      while (true) {
@ -427,14 +451,14 @@ var Lexer = (function LexerClosure() {
            break;
          case 0x28: // '('
            ++numParen;
-            str += '(';
+            strBuf.push('(');
            break;
          case 0x29: // ')'
            if (--numParen === 0) {
              this.nextChar(); // consume strings ')'
              done = true;
            } else {
-              str += ')';
+              strBuf.push(')');
            }
            break;
          case 0x5C: // '\\'
@ -445,24 +469,24 @@ var Lexer = (function LexerClosure() {
                done = true;
                break;
              case 0x6E: // 'n'
-                str += '\n';
+                strBuf.push('\n');
                break;
              case 0x72: // 'r'
-                str += '\r';
+                strBuf.push('\r');
                break;
              case 0x74: // 't'
-                str += '\t';
+                strBuf.push('\t');
                break;
              case 0x62: // 'b'
-                str += '\b';
+                strBuf.push('\b');
                break;
              case 0x66: // 'f'
-                str += '\f';
+                strBuf.push('\f');
                break;
              case 0x5C: // '\'
              case 0x28: // '('
              case 0x29: // ')'
-                str += String.fromCharCode(ch);
+                strBuf.push(String.fromCharCode(ch));
                break;
              case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3'
              case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7'
@ -478,17 +502,17 @@ var Lexer = (function LexerClosure() {
                  }
                }

-                str += String.fromCharCode(x);
+                strBuf.push(String.fromCharCode(x));
                break;
              case 0x0A: case 0x0D: // LF, CR
                break;
              default:
-                str += String.fromCharCode(ch);
+                strBuf.push(String.fromCharCode(ch));
                break;
            }
            break;
          default:
-            str += String.fromCharCode(ch);
+            strBuf.push(String.fromCharCode(ch));
            break;
        }
        if (done) {
@ -498,10 +522,12 @@ var Lexer = (function LexerClosure() {
          ch = this.nextChar();
        }
      }
-      return str;
+      return strBuf.join('');
    },
    getName: function Lexer_getName() {
-      var str = '', ch;
+      var ch;
+      var strBuf = this.strBuf;
+      strBuf.length = 0;
      while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
        if (ch === 0x23) { // '#'
          ch = this.nextChar();
@ -510,23 +536,23 @@ var Lexer = (function LexerClosure() {
            var x2 = toHexDigit(this.nextChar());
            if (x2 == -1)
              error('Illegal digit in hex char in name: ' + x2);
-            str += String.fromCharCode((x << 4) | x2);
+            strBuf.push(String.fromCharCode((x << 4) | x2));
          } else {
-            str += '#';
-            str += String.fromCharCode(ch);
+            strBuf.push('#', String.fromCharCode(ch));
          }
        } else {
-          str += String.fromCharCode(ch);
+          strBuf.push(String.fromCharCode(ch));
        }
      }
-      if (str.length > 128) {
+      if (strBuf.length > 128) {
        error('Warning: name token is longer than allowed by the spec: ' +
-              str.length);
+              strBuf.length);
      }
-      return new Name(str);
+      return new Name(strBuf.join(''));
    },
    getHexString: function Lexer_getHexString() {
-      var str = '';
+      var strBuf = this.strBuf;
+      strBuf.length = 0;
      var ch = this.currentChar;
      var isFirstHex = true;
      var firstDigit;
@ -556,13 +582,13 @@ var Lexer = (function LexerClosure() {
              ch = this.nextChar();
              continue;
            }
-            str += String.fromCharCode((firstDigit << 4) | secondDigit);
+            strBuf.push(String.fromCharCode((firstDigit << 4) | secondDigit));
          }
          isFirstHex = !isFirstHex;
          ch = this.nextChar();
        }
      }
-      return str;
+      return strBuf.join('');
    },
    getObj: function Lexer_getObj() {
      // skip whitespace and comments