Merge pull request #1148 from arturadib/readXRefTable

Rewrite of readXRefTable [obj.js]
This commit is contained in:
notmasteryet 2012-02-01 04:40:22 -08:00
commit ff01faa86f

View File

@ -287,74 +287,69 @@ var XRef = (function XRefClosure() {
XRef.prototype = { XRef.prototype = {
readXRefTable: function readXRefTable(parser) { readXRefTable: function readXRefTable(parser) {
// Example of cross-reference table:
// xref
// 0 1 <-- subsection header (first obj #, obj count)
// 0000000000 65535 f <-- actual object (offset, generation #, f/n)
// 23 2 <-- subsection header ... and so on ...
// 0000025518 00002 n
// 0000025635 00000 n
// trailer
// ...
// Outer loop is over subsection headers
var obj; var obj;
while (true) { while (!isCmd(obj = parser.getObj(), 'trailer')) {
if (isCmd(obj = parser.getObj(), 'trailer')) var first = obj,
break; count = parser.getObj();
if (!isInt(obj))
error('Invalid XRef table'); if (!isInt(first) || !isInt(count))
var first = obj; error('Invalid XRef table: wrong types in subsection header');
if (!isInt(obj = parser.getObj()))
error('Invalid XRef table'); // Inner loop is over objects themselves
var n = obj; for (var i = 0; i < count; i++) {
if (first < 0 || n < 0 || (first + n) != ((first + n) | 0))
error('Invalid XRef table: ' + first + ', ' + n);
for (var i = first; i < first + n; ++i) {
var entry = {}; var entry = {};
if (!isInt(obj = parser.getObj())) entry.offset = parser.getObj();
error('Invalid XRef table: ' + first + ', ' + n); entry.gen = parser.getObj();
entry.offset = obj; var type = parser.getObj();
if (!isInt(obj = parser.getObj()))
error('Invalid XRef table: ' + first + ', ' + n); if (isCmd(type, 'f'))
entry.gen = obj;
obj = parser.getObj();
if (isCmd(obj, 'n')) {
entry.uncompressed = true;
} else if (isCmd(obj, 'f')) {
entry.free = true; entry.free = true;
} else { else if (isCmd(type, 'n'))
error('Invalid XRef table: ' + first + ', ' + n); entry.uncompressed = true;
}
if (!this.entries[i]) { // Validate entry obj
// In some buggy PDF files the xref table claims to start at 1 if (!isInt(entry.offset) || !isInt(entry.gen) ||
// instead of 0. !(entry.free || entry.uncompressed)) {
if (i == 1 && first == 1 && error('Invalid entry in XRef subsection: ' + first + ', ' + count);
entry.offset == 0 && entry.gen == 65535 && entry.free) {
i = first = 0;
}
this.entries[i] = entry;
} }
if (!this.entries[i + first])
this.entries[i + first] = entry;
} }
} }
// read the trailer dictionary // Sanity check: as per spec, first object must have these properties
var dict; if (this.entries[0] &&
if (!isDict(dict = parser.getObj())) !(this.entries[0].gen === 65535 && this.entries[0].free))
error('Invalid XRef table'); error('Invalid XRef table: unexpected first object');
// get the 'Prev' pointer // Sanity check
var prev; if (!isCmd(obj, 'trailer'))
obj = dict.get('Prev'); error('Invalid XRef table: could not find trailer dictionary');
if (isInt(obj)) {
prev = obj;
} else if (isRef(obj)) {
// certain buggy PDF generators generate "/Prev NNN 0 R" instead
// of "/Prev NNN"
prev = obj.num;
}
if (prev) {
this.readXRef(prev);
}
// check for 'XRefStm' key // Read trailer dictionary, e.g.
if (isInt(obj = dict.get('XRefStm'))) { // trailer
var pos = obj; // << /Size 22
// ignore previously loaded xref streams (possible infinite recursion) // /Root 20R
if (!(pos in this.xrefstms)) { // /Info 10R
this.xrefstms[pos] = 1; // /ID [ <81b14aafa313db63dbd6f981e49f94f4> ]
this.readXRef(pos); // >>
} // The parser goes through the entire stream << ... >> and provides
} // a getter interface for the key-value table
var dict = parser.getObj();
if (!isDict(dict))
error('Invalid XRef table: could not parse trailer dictionary');
return dict; return dict;
}, },
@ -407,9 +402,6 @@ var XRef = (function XRefClosure() {
} }
range.splice(0, 2); range.splice(0, 2);
} }
var prev = streamParameters.get('Prev');
if (isInt(prev))
this.readXRef(prev);
return streamParameters; return streamParameters;
}, },
indexObjects: function indexObjects() { indexObjects: function indexObjects() {
@ -529,22 +521,47 @@ var XRef = (function XRefClosure() {
try { try {
var parser = new Parser(new Lexer(stream), true); var parser = new Parser(new Lexer(stream), true);
var obj = parser.getObj(); var obj = parser.getObj();
var dict;
// parse an old-style xref table // Get dictionary
if (isCmd(obj, 'xref')) if (isCmd(obj, 'xref')) {
return this.readXRefTable(parser); // Parse end-of-file XRef
dict = this.readXRefTable(parser);
// parse an xref stream // Recursively get other XRefs 'XRefStm', if any
obj = dict.get('XRefStm');
if (isInt(obj)) { if (isInt(obj)) {
var pos = obj;
// ignore previously loaded xref streams
// (possible infinite recursion)
if (!(pos in this.xrefstms)) {
this.xrefstms[pos] = 1;
this.readXRef(pos);
}
}
} else if (isInt(obj)) {
// Parse in-stream XRef
if (!isInt(parser.getObj()) || if (!isInt(parser.getObj()) ||
!isCmd(parser.getObj(), 'obj') || !isCmd(parser.getObj(), 'obj') ||
!isStream(obj = parser.getObj())) { !isStream(obj = parser.getObj())) {
error('Invalid XRef stream'); error('Invalid XRef stream');
} }
return this.readXRefStream(obj); dict = this.readXRefStream(obj);
} }
// Recursively get previous dictionary, if any
obj = dict.get('Prev');
if (isInt(obj))
this.readXRef(obj);
else if (isRef(obj)) {
// The spec says Prev must not be a reference, i.e. "/Prev NNN"
// This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
this.readXRef(obj.num);
}
return dict;
} catch (e) { } catch (e) {
log('Reading of the xref table/stream failed: ' + e); log('(while reading XRef): ' + e);
} }
warn('Indexing all PDF objects'); warn('Indexing all PDF objects');