Merge pull request #1148 from arturadib/readXRefTable

Rewrite of readXRefTable [obj.js]
2012-02-01 04:40:22 -08:00 · 2012-02-01 04:40:22 -08:00 · ff01faa86f
commit ff01faa86f
parent 13f207424c 775290d698
1 changed files with 86 additions and 69 deletions
--- a/src/obj.js
+++ b/src/obj.js
@ -287,74 +287,69 @@ var XRef = (function XRefClosure() {
  XRef.prototype = {
    readXRefTable: function readXRefTable(parser) {
      // Example of cross-reference table:
      // xref
      // 0 1                    <-- subsection header (first obj #, obj count)
      // 0000000000 65535 f     <-- actual object (offset, generation #, f/n)
      // 23 2                   <-- subsection header ... and so on ...
      // 0000025518 00002 n
      // 0000025635 00000 n
      // trailer
      // ...
      // Outer loop is over subsection headers
      var obj;
-      while (true) {
+      while (!isCmd(obj = parser.getObj(), 'trailer')) {
-        if (isCmd(obj = parser.getObj(), 'trailer'))
+        var first = obj,
-          break;
+            count = parser.getObj();
-        if (!isInt(obj))
+
-          error('Invalid XRef table');
+        if (!isInt(first) || !isInt(count))
-        var first = obj;
+          error('Invalid XRef table: wrong types in subsection header');
-        if (!isInt(obj = parser.getObj()))
+
-          error('Invalid XRef table');
+        // Inner loop is over objects themselves
-        var n = obj;
+        for (var i = 0; i < count; i++) {
        if (first < 0 || n < 0 || (first + n) != ((first + n) | 0))
          error('Invalid XRef table: ' + first + ', ' + n);
        for (var i = first; i < first + n; ++i) {
          var entry = {};
-          if (!isInt(obj = parser.getObj()))
+          entry.offset = parser.getObj();
-            error('Invalid XRef table: ' + first + ', ' + n);
+          entry.gen = parser.getObj();
-          entry.offset = obj;
+          var type = parser.getObj();
-          if (!isInt(obj = parser.getObj()))
+
-            error('Invalid XRef table: ' + first + ', ' + n);
+          if (isCmd(type, 'f'))
          entry.gen = obj;
          obj = parser.getObj();
          if (isCmd(obj, 'n')) {
            entry.uncompressed = true;
          } else if (isCmd(obj, 'f')) {
            entry.free = true;
-          } else {
+          else if (isCmd(type, 'n'))
-            error('Invalid XRef table: ' + first + ', ' + n);
+            entry.uncompressed = true;
-          }
+
-          if (!this.entries[i]) {
+          // Validate entry obj
-            // In some buggy PDF files the xref table claims to start at 1
+          if (!isInt(entry.offset) || !isInt(entry.gen) ||
-            // instead of 0.
+              !(entry.free || entry.uncompressed)) {
-            if (i == 1 && first == 1 &&
+            error('Invalid entry in XRef subsection: ' + first + ', ' + count);
                entry.offset == 0 && entry.gen == 65535 && entry.free) {
              i = first = 0;
            }
            this.entries[i] = entry;
          }
          if (!this.entries[i + first])
            this.entries[i + first] = entry;
        }
      }
-      // read the trailer dictionary
+      // Sanity check: as per spec, first object must have these properties
-      var dict;
+      if (this.entries[0] &&
-      if (!isDict(dict = parser.getObj()))
+          !(this.entries[0].gen === 65535 && this.entries[0].free))
-        error('Invalid XRef table');
+        error('Invalid XRef table: unexpected first object');
-      // get the 'Prev' pointer
+      // Sanity check
-      var prev;
+      if (!isCmd(obj, 'trailer'))
-      obj = dict.get('Prev');
+        error('Invalid XRef table: could not find trailer dictionary');
      if (isInt(obj)) {
        prev = obj;
      } else if (isRef(obj)) {
        // certain buggy PDF generators generate "/Prev NNN 0 R" instead
        // of "/Prev NNN"
        prev = obj.num;
      }
      if (prev) {
        this.readXRef(prev);
      }
-      // check for 'XRefStm' key
+      // Read trailer dictionary, e.g.
-      if (isInt(obj = dict.get('XRefStm'))) {
+      // trailer
-        var pos = obj;
+      //    << /Size 22
-        // ignore previously loaded xref streams (possible infinite recursion)
+      //      /Root 20R
-        if (!(pos in this.xrefstms)) {
+      //      /Info 10R
-          this.xrefstms[pos] = 1;
+      //      /ID [ <81b14aafa313db63dbd6f981e49f94f4> ]
-          this.readXRef(pos);
+      //    >>
-        }
+      // The parser goes through the entire stream << ... >> and provides
-      }
+      // a getter interface for the key-value table
      var dict = parser.getObj();
      if (!isDict(dict))
        error('Invalid XRef table: could not parse trailer dictionary');
      return dict;
    },
@ -407,9 +402,6 @@ var XRef = (function XRefClosure() {
        }
        range.splice(0, 2);
      }
      var prev = streamParameters.get('Prev');
      if (isInt(prev))
        this.readXRef(prev);
      return streamParameters;
    },
    indexObjects: function indexObjects() {
@ -529,22 +521,47 @@ var XRef = (function XRefClosure() {
      try {
        var parser = new Parser(new Lexer(stream), true);
        var obj = parser.getObj();
        var dict;
-        // parse an old-style xref table
+        // Get dictionary
-        if (isCmd(obj, 'xref'))
+        if (isCmd(obj, 'xref')) {
-          return this.readXRefTable(parser);
+          // Parse end-of-file XRef
          dict = this.readXRefTable(parser);
-        // parse an xref stream
+          // Recursively get other XRefs 'XRefStm', if any
          obj = dict.get('XRefStm');
          if (isInt(obj)) {
            var pos = obj;
            // ignore previously loaded xref streams
            // (possible infinite recursion)
            if (!(pos in this.xrefstms)) {
              this.xrefstms[pos] = 1;
              this.readXRef(pos);
            }
          }
        } else if (isInt(obj)) {
          // Parse in-stream XRef
          if (!isInt(parser.getObj()) ||
              !isCmd(parser.getObj(), 'obj') ||
              !isStream(obj = parser.getObj())) {
            error('Invalid XRef stream');
          }
-          return this.readXRefStream(obj);
+          dict = this.readXRefStream(obj);
        }
        // Recursively get previous dictionary, if any
        obj = dict.get('Prev');
        if (isInt(obj))
          this.readXRef(obj);
        else if (isRef(obj)) {
          // The spec says Prev must not be a reference, i.e. "/Prev NNN"
          // This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
          this.readXRef(obj.num);
        }
        return dict;
      } catch (e) {
-        log('Reading of the xref table/stream failed: ' + e);
+        log('(while reading XRef): ' + e);
      }
      warn('Indexing all PDF objects');