2011-10-26 10:18:22 +09:00
|
|
|
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
|
|
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
|
2012-09-01 07:48:21 +09:00
|
|
|
/* Copyright 2012 Mozilla Foundation
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
2014-04-13 23:02:56 +09:00
|
|
|
/* globals assert, bytesToString, CipherTransformFactory, error, info,
|
2013-02-03 07:49:19 +09:00
|
|
|
InvalidPDFException, isArray, isCmd, isDict, isInt, isName, isRef,
|
2014-01-16 06:28:31 +09:00
|
|
|
isStream, Lexer, Page, Parser, Promise, shadow,
|
2014-04-13 23:02:56 +09:00
|
|
|
stringToPDFString, stringToUTF8String, warn, isString,
|
2013-06-05 09:57:52 +09:00
|
|
|
Promise, MissingDataException, XRefParseException, Stream,
|
2014-05-01 22:27:31 +09:00
|
|
|
ChunkedStream, createPromiseCapability */
|
2011-10-26 10:18:22 +09:00
|
|
|
|
|
|
|
'use strict';
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
var Name = (function NameClosure() {
|
|
|
|
function Name(name) {
|
2011-10-25 08:55:23 +09:00
|
|
|
this.name = name;
|
|
|
|
}
|
|
|
|
|
2011-12-29 06:20:04 +09:00
|
|
|
Name.prototype = {};
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2014-02-28 13:41:03 +09:00
|
|
|
var nameCache = {};
|
|
|
|
|
|
|
|
Name.get = function Name_get(name) {
|
|
|
|
var nameValue = nameCache[name];
|
2014-03-21 04:28:22 +09:00
|
|
|
return (nameValue ? nameValue : (nameCache[name] = new Name(name)));
|
2014-02-28 13:41:03 +09:00
|
|
|
};
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
return Name;
|
2011-10-25 08:55:23 +09:00
|
|
|
})();
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
var Cmd = (function CmdClosure() {
|
|
|
|
function Cmd(cmd) {
|
2011-10-25 08:55:23 +09:00
|
|
|
this.cmd = cmd;
|
|
|
|
}
|
|
|
|
|
2011-12-29 06:20:04 +09:00
|
|
|
Cmd.prototype = {};
|
2011-12-19 04:39:10 +09:00
|
|
|
|
|
|
|
var cmdCache = {};
|
|
|
|
|
2012-04-05 05:43:26 +09:00
|
|
|
Cmd.get = function Cmd_get(cmd) {
|
2011-12-20 00:37:36 +09:00
|
|
|
var cmdValue = cmdCache[cmd];
|
2014-03-21 04:28:22 +09:00
|
|
|
return (cmdValue ? cmdValue : (cmdCache[cmd] = new Cmd(cmd)));
|
2011-12-20 00:37:36 +09:00
|
|
|
};
|
2011-12-19 04:39:10 +09:00
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
return Cmd;
|
2011-10-25 08:55:23 +09:00
|
|
|
})();
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
var Dict = (function DictClosure() {
|
2013-04-04 03:01:45 +09:00
|
|
|
var nonSerializable = function nonSerializableClosure() {
|
|
|
|
return nonSerializable; // creating closure on some variable
|
|
|
|
};
|
|
|
|
|
2014-03-26 23:07:38 +09:00
|
|
|
var GETALL_DICTIONARY_TYPES_WHITELIST = {
|
|
|
|
'Background': true,
|
|
|
|
'ExtGState': true,
|
|
|
|
'Halftone': true,
|
|
|
|
'Layout': true,
|
|
|
|
'Mask': true,
|
|
|
|
'Pagination': true,
|
|
|
|
'Printing': true
|
|
|
|
};
|
|
|
|
|
|
|
|
function isRecursionAllowedFor(dict) {
|
|
|
|
if (!isName(dict.Type)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
var dictType = dict.Type.name;
|
|
|
|
return GETALL_DICTIONARY_TYPES_WHITELIST[dictType] === true;
|
|
|
|
}
|
|
|
|
|
2012-04-05 03:43:04 +09:00
|
|
|
// xref is optional
|
|
|
|
function Dict(xref) {
|
|
|
|
// Map should only be used internally, use functions below to access.
|
2013-04-04 03:01:45 +09:00
|
|
|
this.map = Object.create(null);
|
|
|
|
this.xref = xref;
|
2014-03-26 23:07:38 +09:00
|
|
|
this.objId = null;
|
2013-04-04 03:01:45 +09:00
|
|
|
this.__nonSerializable__ = nonSerializable; // disable cloning of the Dict
|
|
|
|
}
|
2012-04-15 05:54:31 +09:00
|
|
|
|
2013-04-04 03:01:45 +09:00
|
|
|
Dict.prototype = {
|
2013-04-04 06:42:23 +09:00
|
|
|
assignXref: function Dict_assignXref(newXref) {
|
2013-04-04 03:01:45 +09:00
|
|
|
this.xref = newXref;
|
|
|
|
},
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2012-04-05 03:43:04 +09:00
|
|
|
// automatically dereferences Ref objects
|
2013-04-04 03:01:45 +09:00
|
|
|
get: function Dict_get(key1, key2, key3) {
|
2011-10-25 08:55:23 +09:00
|
|
|
var value;
|
2013-04-04 03:01:45 +09:00
|
|
|
var xref = this.xref;
|
2014-08-02 04:45:39 +09:00
|
|
|
if (typeof (value = this.map[key1]) !== 'undefined' || key1 in this.map ||
|
|
|
|
typeof key2 === 'undefined') {
|
2012-04-15 05:54:31 +09:00
|
|
|
return xref ? xref.fetchIfRef(value) : value;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2014-08-02 04:45:39 +09:00
|
|
|
if (typeof (value = this.map[key2]) !== 'undefined' || key2 in this.map ||
|
|
|
|
typeof key3 === 'undefined') {
|
2012-04-15 05:54:31 +09:00
|
|
|
return xref ? xref.fetchIfRef(value) : value;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2013-04-04 03:01:45 +09:00
|
|
|
value = this.map[key3] || null;
|
2012-04-15 05:54:31 +09:00
|
|
|
return xref ? xref.fetchIfRef(value) : value;
|
2013-04-04 03:01:45 +09:00
|
|
|
},
|
2012-04-15 05:54:31 +09:00
|
|
|
|
2013-06-05 09:57:52 +09:00
|
|
|
// Same as get(), but returns a promise and uses fetchIfRefAsync().
|
|
|
|
getAsync: function Dict_getAsync(key1, key2, key3) {
|
|
|
|
var value;
|
|
|
|
var xref = this.xref;
|
2014-06-05 01:30:53 +09:00
|
|
|
if (typeof (value = this.map[key1]) !== 'undefined' || key1 in this.map ||
|
|
|
|
typeof key2 === 'undefined') {
|
2013-06-05 09:57:52 +09:00
|
|
|
if (xref) {
|
|
|
|
return xref.fetchIfRefAsync(value);
|
|
|
|
}
|
2014-05-01 22:27:31 +09:00
|
|
|
return Promise.resolve(value);
|
2013-06-05 09:57:52 +09:00
|
|
|
}
|
2014-06-05 01:30:53 +09:00
|
|
|
if (typeof (value = this.map[key2]) !== 'undefined' || key2 in this.map ||
|
|
|
|
typeof key3 === 'undefined') {
|
2013-06-05 09:57:52 +09:00
|
|
|
if (xref) {
|
|
|
|
return xref.fetchIfRefAsync(value);
|
|
|
|
}
|
2014-05-01 22:27:31 +09:00
|
|
|
return Promise.resolve(value);
|
2013-06-05 09:57:52 +09:00
|
|
|
}
|
|
|
|
value = this.map[key3] || null;
|
|
|
|
if (xref) {
|
|
|
|
return xref.fetchIfRefAsync(value);
|
|
|
|
}
|
2014-05-01 22:27:31 +09:00
|
|
|
return Promise.resolve(value);
|
2013-06-05 09:57:52 +09:00
|
|
|
},
|
|
|
|
|
2012-04-05 03:43:04 +09:00
|
|
|
// no dereferencing
|
2013-04-04 03:01:45 +09:00
|
|
|
getRaw: function Dict_getRaw(key) {
|
|
|
|
return this.map[key];
|
|
|
|
},
|
2012-04-15 05:54:31 +09:00
|
|
|
|
2012-04-05 03:43:04 +09:00
|
|
|
// creates new map and dereferences all Refs
|
2013-04-04 03:01:45 +09:00
|
|
|
getAll: function Dict_getAll() {
|
2014-03-26 23:07:38 +09:00
|
|
|
var all = Object.create(null);
|
|
|
|
var queue = null;
|
2014-04-10 02:47:42 +09:00
|
|
|
var key, obj;
|
|
|
|
for (key in this.map) {
|
|
|
|
obj = this.get(key);
|
2014-03-26 23:07:38 +09:00
|
|
|
if (obj instanceof Dict) {
|
|
|
|
if (isRecursionAllowedFor(obj)) {
|
|
|
|
(queue || (queue = [])).push({target: all, key: key, obj: obj});
|
|
|
|
} else {
|
|
|
|
all[key] = this.getRaw(key);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
all[key] = obj;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!queue) {
|
|
|
|
return all;
|
|
|
|
}
|
|
|
|
|
|
|
|
// trying to take cyclic references into the account
|
|
|
|
var processed = Object.create(null);
|
|
|
|
while (queue.length > 0) {
|
|
|
|
var item = queue.shift();
|
|
|
|
var itemObj = item.obj;
|
|
|
|
var objId = itemObj.objId;
|
|
|
|
if (objId && objId in processed) {
|
|
|
|
item.target[item.key] = processed[objId];
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
var dereferenced = Object.create(null);
|
2014-04-10 02:47:42 +09:00
|
|
|
for (key in itemObj.map) {
|
|
|
|
obj = itemObj.get(key);
|
2014-03-26 23:07:38 +09:00
|
|
|
if (obj instanceof Dict) {
|
|
|
|
if (isRecursionAllowedFor(obj)) {
|
|
|
|
queue.push({target: dereferenced, key: key, obj: obj});
|
|
|
|
} else {
|
|
|
|
dereferenced[key] = itemObj.getRaw(key);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
dereferenced[key] = obj;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (objId) {
|
|
|
|
processed[objId] = dereferenced;
|
|
|
|
}
|
|
|
|
item.target[item.key] = dereferenced;
|
2012-04-15 05:54:31 +09:00
|
|
|
}
|
2012-04-05 03:43:04 +09:00
|
|
|
return all;
|
2013-04-04 03:01:45 +09:00
|
|
|
},
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2013-04-04 03:01:45 +09:00
|
|
|
set: function Dict_set(key, value) {
|
|
|
|
this.map[key] = value;
|
|
|
|
},
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2013-04-04 03:01:45 +09:00
|
|
|
has: function Dict_has(key) {
|
|
|
|
return key in this.map;
|
|
|
|
},
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2013-04-04 03:01:45 +09:00
|
|
|
forEach: function Dict_forEach(callback) {
|
|
|
|
for (var key in this.map) {
|
2012-04-05 03:43:04 +09:00
|
|
|
callback(key, this.get(key));
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2013-04-04 03:01:45 +09:00
|
|
|
}
|
|
|
|
};
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2014-03-26 23:07:38 +09:00
|
|
|
Dict.empty = new Dict(null);
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
return Dict;
|
2011-10-25 08:55:23 +09:00
|
|
|
})();
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
var Ref = (function RefClosure() {
|
|
|
|
function Ref(num, gen) {
|
2011-10-25 08:55:23 +09:00
|
|
|
this.num = num;
|
|
|
|
this.gen = gen;
|
|
|
|
}
|
|
|
|
|
2014-06-19 12:41:33 +09:00
|
|
|
Ref.prototype = {
|
|
|
|
toString: function Ref_toString() {
|
2014-07-24 22:32:10 +09:00
|
|
|
// This function is hot, so we make the string as compact as possible.
|
|
|
|
// |this.gen| is almost always zero, so we treat that case specially.
|
|
|
|
var str = this.num + 'R';
|
|
|
|
if (this.gen !== 0) {
|
|
|
|
str += this.gen;
|
|
|
|
}
|
|
|
|
return str;
|
2014-06-19 12:41:33 +09:00
|
|
|
}
|
|
|
|
};
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
return Ref;
|
2011-10-25 08:55:23 +09:00
|
|
|
})();
|
|
|
|
|
2014-03-21 04:28:22 +09:00
|
|
|
// The reference is identified by number and generation.
|
|
|
|
// This structure stores only one instance of the reference.
|
2011-12-07 07:18:40 +09:00
|
|
|
var RefSet = (function RefSetClosure() {
|
|
|
|
function RefSet() {
|
2011-10-25 08:55:23 +09:00
|
|
|
this.dict = {};
|
|
|
|
}
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
RefSet.prototype = {
|
2012-04-05 05:43:26 +09:00
|
|
|
has: function RefSet_has(ref) {
|
2014-06-19 12:41:33 +09:00
|
|
|
return ref.toString() in this.dict;
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
|
|
|
|
2012-04-05 05:43:26 +09:00
|
|
|
put: function RefSet_put(ref) {
|
2014-06-19 12:41:33 +09:00
|
|
|
this.dict[ref.toString()] = true;
|
2013-06-05 09:57:52 +09:00
|
|
|
},
|
|
|
|
|
|
|
|
remove: function RefSet_remove(ref) {
|
2014-06-19 12:41:33 +09:00
|
|
|
delete this.dict[ref.toString()];
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
return RefSet;
|
2011-10-25 08:55:23 +09:00
|
|
|
})();
|
|
|
|
|
2013-06-26 02:33:53 +09:00
|
|
|
var RefSetCache = (function RefSetCacheClosure() {
|
|
|
|
function RefSetCache() {
|
2013-11-15 06:43:38 +09:00
|
|
|
this.dict = Object.create(null);
|
2013-06-26 02:33:53 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
RefSetCache.prototype = {
|
|
|
|
get: function RefSetCache_get(ref) {
|
2014-06-19 12:41:33 +09:00
|
|
|
return this.dict[ref.toString()];
|
2013-06-26 02:33:53 +09:00
|
|
|
},
|
|
|
|
|
|
|
|
has: function RefSetCache_has(ref) {
|
2014-06-19 12:41:33 +09:00
|
|
|
return ref.toString() in this.dict;
|
2013-06-26 02:33:53 +09:00
|
|
|
},
|
|
|
|
|
|
|
|
put: function RefSetCache_put(ref, obj) {
|
2014-06-19 12:41:33 +09:00
|
|
|
this.dict[ref.toString()] = obj;
|
2013-11-15 06:43:38 +09:00
|
|
|
},
|
|
|
|
|
2014-03-04 02:44:45 +09:00
|
|
|
putAlias: function RefSetCache_putAlias(ref, aliasRef) {
|
2014-06-19 12:41:33 +09:00
|
|
|
this.dict[ref.toString()] = this.get(aliasRef);
|
2014-03-04 02:44:45 +09:00
|
|
|
},
|
|
|
|
|
2013-11-15 06:43:38 +09:00
|
|
|
forEach: function RefSetCache_forEach(fn, thisArg) {
|
|
|
|
for (var i in this.dict) {
|
|
|
|
fn.call(thisArg, this.dict[i]);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
clear: function RefSetCache_clear() {
|
|
|
|
this.dict = Object.create(null);
|
2013-06-26 02:33:53 +09:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
return RefSetCache;
|
|
|
|
})();
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
var Catalog = (function CatalogClosure() {
|
2013-04-09 07:14:56 +09:00
|
|
|
function Catalog(pdfManager, xref) {
|
|
|
|
this.pdfManager = pdfManager;
|
2011-10-25 08:55:23 +09:00
|
|
|
this.xref = xref;
|
2013-02-07 08:19:29 +09:00
|
|
|
this.catDict = xref.getCatalogObj();
|
2013-11-15 06:43:38 +09:00
|
|
|
this.fontCache = new RefSetCache();
|
2014-04-13 23:02:56 +09:00
|
|
|
assert(isDict(this.catDict),
|
2013-02-07 08:19:29 +09:00
|
|
|
'catalog object is not a dictionary');
|
|
|
|
|
|
|
|
this.pagePromises = [];
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
Catalog.prototype = {
|
2012-03-25 03:59:51 +09:00
|
|
|
get metadata() {
|
2012-05-28 08:03:04 +09:00
|
|
|
var streamRef = this.catDict.getRaw('Metadata');
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!isRef(streamRef)) {
|
2012-05-28 08:03:04 +09:00
|
|
|
return shadow(this, 'metadata', null);
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2012-05-28 08:03:04 +09:00
|
|
|
|
2014-03-21 04:28:22 +09:00
|
|
|
var encryptMetadata = (!this.xref.encrypt ? false :
|
|
|
|
this.xref.encrypt.encryptMetadata);
|
2012-05-28 08:03:04 +09:00
|
|
|
|
|
|
|
var stream = this.xref.fetch(streamRef, !encryptMetadata);
|
2012-03-27 07:05:14 +09:00
|
|
|
var metadata;
|
|
|
|
if (stream && isDict(stream.dict)) {
|
|
|
|
var type = stream.dict.get('Type');
|
|
|
|
var subtype = stream.dict.get('Subtype');
|
2012-03-25 03:59:51 +09:00
|
|
|
|
2012-03-25 04:02:20 +09:00
|
|
|
if (isName(type) && isName(subtype) &&
|
2012-03-25 03:59:51 +09:00
|
|
|
type.name === 'Metadata' && subtype.name === 'XML') {
|
2012-05-28 05:49:28 +09:00
|
|
|
// XXX: This should examine the charset the XML document defines,
|
|
|
|
// however since there are currently no real means to decode
|
|
|
|
// arbitrary charsets, let's just hope that the author of the PDF
|
|
|
|
// was reasonable enough to stick with the XML default charset,
|
|
|
|
// which is UTF-8.
|
2012-05-28 09:00:13 +09:00
|
|
|
try {
|
|
|
|
metadata = stringToUTF8String(bytesToString(stream.getBytes()));
|
|
|
|
} catch (e) {
|
2012-05-30 01:01:46 +09:00
|
|
|
info('Skipping invalid metadata.');
|
2012-05-28 09:00:13 +09:00
|
|
|
}
|
2012-03-25 03:59:51 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-03-27 07:05:14 +09:00
|
|
|
return shadow(this, 'metadata', metadata);
|
2012-03-25 03:59:51 +09:00
|
|
|
},
|
2011-10-25 08:55:23 +09:00
|
|
|
get toplevelPagesDict() {
|
|
|
|
var pagesObj = this.catDict.get('Pages');
|
2014-04-13 23:02:56 +09:00
|
|
|
assert(isDict(pagesObj), 'invalid top-level pages dictionary');
|
2011-10-25 08:55:23 +09:00
|
|
|
// shadow the prototype getter
|
2012-04-05 03:43:04 +09:00
|
|
|
return shadow(this, 'toplevelPagesDict', pagesObj);
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
|
|
|
get documentOutline() {
|
2013-06-22 05:42:55 +09:00
|
|
|
var obj = null;
|
|
|
|
try {
|
|
|
|
obj = this.readDocumentOutline();
|
|
|
|
} catch (ex) {
|
|
|
|
if (ex instanceof MissingDataException) {
|
|
|
|
throw ex;
|
|
|
|
}
|
|
|
|
warn('Unable to read document outline');
|
|
|
|
}
|
|
|
|
return shadow(this, 'documentOutline', obj);
|
|
|
|
},
|
|
|
|
readDocumentOutline: function Catalog_readDocumentOutline() {
|
2011-10-25 08:55:23 +09:00
|
|
|
var xref = this.xref;
|
2012-04-05 03:43:04 +09:00
|
|
|
var obj = this.catDict.get('Outlines');
|
2011-10-25 08:55:23 +09:00
|
|
|
var root = { items: [] };
|
2012-01-09 05:03:00 +09:00
|
|
|
if (isDict(obj)) {
|
2012-04-05 07:29:50 +09:00
|
|
|
obj = obj.getRaw('First');
|
2011-10-25 08:55:23 +09:00
|
|
|
var processed = new RefSet();
|
2012-04-05 07:29:50 +09:00
|
|
|
if (isRef(obj)) {
|
2011-10-25 08:55:23 +09:00
|
|
|
var queue = [{obj: obj, parent: root}];
|
|
|
|
// to avoid recursion keeping track of the items
|
|
|
|
// in the processed dictionary
|
2012-04-05 07:29:50 +09:00
|
|
|
processed.put(obj);
|
2011-10-25 08:55:23 +09:00
|
|
|
while (queue.length > 0) {
|
|
|
|
var i = queue.shift();
|
2012-04-05 07:29:50 +09:00
|
|
|
var outlineDict = xref.fetchIfRef(i.obj);
|
2014-02-27 21:46:12 +09:00
|
|
|
if (outlineDict === null) {
|
2012-03-14 06:25:39 +09:00
|
|
|
continue;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
|
|
|
if (!outlineDict.has('Title')) {
|
2012-03-14 06:25:39 +09:00
|
|
|
error('Invalid outline item');
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2012-03-14 06:25:39 +09:00
|
|
|
var dest = outlineDict.get('A');
|
2014-02-27 21:46:12 +09:00
|
|
|
if (dest) {
|
2012-04-05 03:43:04 +09:00
|
|
|
dest = dest.get('D');
|
2014-02-27 21:46:12 +09:00
|
|
|
} else if (outlineDict.has('Dest')) {
|
2012-04-05 03:43:04 +09:00
|
|
|
dest = outlineDict.getRaw('Dest');
|
2014-02-27 21:46:12 +09:00
|
|
|
if (isName(dest)) {
|
2012-03-15 05:58:22 +09:00
|
|
|
dest = dest.name;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2012-03-14 06:25:39 +09:00
|
|
|
}
|
2012-04-05 03:43:04 +09:00
|
|
|
var title = outlineDict.get('Title');
|
2012-03-14 06:25:39 +09:00
|
|
|
var outlineItem = {
|
|
|
|
dest: dest,
|
|
|
|
title: stringToPDFString(title),
|
|
|
|
color: outlineDict.get('C') || [0, 0, 0],
|
|
|
|
count: outlineDict.get('Count'),
|
|
|
|
bold: !!(outlineDict.get('F') & 2),
|
|
|
|
italic: !!(outlineDict.get('F') & 1),
|
|
|
|
items: []
|
|
|
|
};
|
|
|
|
i.parent.items.push(outlineItem);
|
2012-04-05 07:29:50 +09:00
|
|
|
obj = outlineDict.getRaw('First');
|
|
|
|
if (isRef(obj) && !processed.has(obj)) {
|
2012-03-14 06:25:39 +09:00
|
|
|
queue.push({obj: obj, parent: outlineItem});
|
2012-04-05 07:29:50 +09:00
|
|
|
processed.put(obj);
|
2012-03-14 06:25:39 +09:00
|
|
|
}
|
2012-04-05 07:29:50 +09:00
|
|
|
obj = outlineDict.getRaw('Next');
|
|
|
|
if (isRef(obj) && !processed.has(obj)) {
|
2012-03-14 06:25:39 +09:00
|
|
|
queue.push({obj: obj, parent: i.parent});
|
2012-04-05 07:29:50 +09:00
|
|
|
processed.put(obj);
|
2012-03-15 05:58:22 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-03-21 04:28:22 +09:00
|
|
|
return (root.items.length > 0 ? root.items : null);
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
|
|
|
get numPages() {
|
|
|
|
var obj = this.toplevelPagesDict.get('Count');
|
2014-04-13 23:02:56 +09:00
|
|
|
assert(
|
2011-10-25 08:55:23 +09:00
|
|
|
isInt(obj),
|
|
|
|
'page count in top level pages object is not an integer'
|
|
|
|
);
|
|
|
|
// shadow the prototype getter
|
|
|
|
return shadow(this, 'num', obj);
|
|
|
|
},
|
|
|
|
get destinations() {
|
2012-04-05 07:29:50 +09:00
|
|
|
function fetchDestination(dest) {
|
2011-10-25 08:55:23 +09:00
|
|
|
return isDict(dest) ? dest.get('D') : dest;
|
|
|
|
}
|
|
|
|
|
|
|
|
var xref = this.xref;
|
|
|
|
var dests = {}, nameTreeRef, nameDictionaryRef;
|
|
|
|
var obj = this.catDict.get('Names');
|
2014-05-14 19:43:20 +09:00
|
|
|
if (obj && obj.has('Dests')) {
|
2012-04-05 03:43:04 +09:00
|
|
|
nameTreeRef = obj.getRaw('Dests');
|
2014-02-27 21:46:12 +09:00
|
|
|
} else if (this.catDict.has('Dests')) {
|
2011-10-25 08:55:23 +09:00
|
|
|
nameDictionaryRef = this.catDict.get('Dests');
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
|
|
|
|
if (nameDictionaryRef) {
|
|
|
|
// reading simple destination dictionary
|
2012-04-05 03:43:04 +09:00
|
|
|
obj = nameDictionaryRef;
|
2011-10-25 08:55:23 +09:00
|
|
|
obj.forEach(function catalogForEach(key, value) {
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!value) {
|
|
|
|
return;
|
|
|
|
}
|
2012-04-05 07:29:50 +09:00
|
|
|
dests[key] = fetchDestination(value);
|
2011-10-25 08:55:23 +09:00
|
|
|
});
|
|
|
|
}
|
|
|
|
if (nameTreeRef) {
|
2013-03-01 08:29:07 +09:00
|
|
|
var nameTree = new NameTree(nameTreeRef, xref);
|
|
|
|
var names = nameTree.getAll();
|
|
|
|
for (var name in names) {
|
|
|
|
if (!names.hasOwnProperty(name)) {
|
2011-10-25 08:55:23 +09:00
|
|
|
continue;
|
|
|
|
}
|
2013-03-01 08:29:07 +09:00
|
|
|
dests[name] = fetchDestination(names[name]);
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return shadow(this, 'destinations', dests);
|
|
|
|
},
|
2014-03-19 05:32:47 +09:00
|
|
|
get attachments() {
|
|
|
|
var xref = this.xref;
|
2014-05-19 06:35:29 +09:00
|
|
|
var attachments = null, nameTreeRef;
|
2014-03-19 05:32:47 +09:00
|
|
|
var obj = this.catDict.get('Names');
|
|
|
|
if (obj) {
|
|
|
|
nameTreeRef = obj.getRaw('EmbeddedFiles');
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nameTreeRef) {
|
|
|
|
var nameTree = new NameTree(nameTreeRef, xref);
|
|
|
|
var names = nameTree.getAll();
|
|
|
|
for (var name in names) {
|
|
|
|
if (!names.hasOwnProperty(name)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
var fs = new FileSpec(names[name], xref);
|
|
|
|
if (!attachments) {
|
|
|
|
attachments = {};
|
|
|
|
}
|
|
|
|
attachments[stringToPDFString(name)] = fs.serializable;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return shadow(this, 'attachments', attachments);
|
|
|
|
},
|
2013-03-01 08:29:07 +09:00
|
|
|
get javaScript() {
|
|
|
|
var xref = this.xref;
|
|
|
|
var obj = this.catDict.get('Names');
|
|
|
|
|
|
|
|
var javaScript = [];
|
|
|
|
if (obj && obj.has('JavaScript')) {
|
|
|
|
var nameTree = new NameTree(obj.getRaw('JavaScript'), xref);
|
|
|
|
var names = nameTree.getAll();
|
|
|
|
for (var name in names) {
|
|
|
|
if (!names.hasOwnProperty(name)) {
|
|
|
|
continue;
|
|
|
|
}
|
2014-03-21 04:28:22 +09:00
|
|
|
// We don't really use the JavaScript right now. This code is
|
2013-03-01 08:29:07 +09:00
|
|
|
// defensive so we don't cause errors on document load.
|
|
|
|
var jsDict = names[name];
|
|
|
|
if (!isDict(jsDict)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
var type = jsDict.get('S');
|
|
|
|
if (!isName(type) || type.name !== 'JavaScript') {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
var js = jsDict.get('JS');
|
|
|
|
if (!isString(js) && !isStream(js)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (isStream(js)) {
|
|
|
|
js = bytesToString(js.getBytes());
|
|
|
|
}
|
2013-03-19 22:37:57 +09:00
|
|
|
javaScript.push(stringToPDFString(js));
|
2013-03-01 08:29:07 +09:00
|
|
|
}
|
|
|
|
}
|
2014-05-24 09:07:25 +09:00
|
|
|
|
|
|
|
// Append OpenAction actions to javaScript array
|
|
|
|
var openactionDict = this.catDict.get('OpenAction');
|
|
|
|
if (isDict(openactionDict)) {
|
|
|
|
var objType = openactionDict.get('Type');
|
|
|
|
var actionType = openactionDict.get('S');
|
|
|
|
var action = openactionDict.get('N');
|
|
|
|
var isPrintAction = (isName(objType) && objType.name === 'Action' &&
|
|
|
|
isName(actionType) && actionType.name === 'Named' &&
|
|
|
|
isName(action) && action.name === 'Print');
|
|
|
|
|
|
|
|
if (isPrintAction) {
|
|
|
|
javaScript.push('print(true);');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-01 08:29:07 +09:00
|
|
|
return shadow(this, 'javaScript', javaScript);
|
|
|
|
},
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2013-11-15 06:43:38 +09:00
|
|
|
cleanup: function Catalog_cleanup() {
|
2014-05-10 10:21:15 +09:00
|
|
|
var promises = [];
|
|
|
|
this.fontCache.forEach(function (promise) {
|
|
|
|
promises.push(promise);
|
2013-11-15 06:43:38 +09:00
|
|
|
});
|
2014-05-21 11:57:04 +09:00
|
|
|
return Promise.all(promises).then(function (translatedFonts) {
|
|
|
|
for (var i = 0, ii = translatedFonts.length; i < ii; i++) {
|
|
|
|
var font = translatedFonts[i].dict;
|
|
|
|
delete font.translated;
|
2014-05-10 10:21:15 +09:00
|
|
|
}
|
|
|
|
this.fontCache.clear();
|
|
|
|
}.bind(this));
|
2013-11-15 06:43:38 +09:00
|
|
|
},
|
|
|
|
|
2013-02-07 08:19:29 +09:00
|
|
|
getPage: function Catalog_getPage(pageIndex) {
|
|
|
|
if (!(pageIndex in this.pagePromises)) {
|
2013-11-14 08:27:46 +09:00
|
|
|
this.pagePromises[pageIndex] = this.getPageDict(pageIndex).then(
|
|
|
|
function (a) {
|
|
|
|
var dict = a[0];
|
|
|
|
var ref = a[1];
|
2013-11-15 06:43:38 +09:00
|
|
|
return new Page(this.pdfManager, this.xref, pageIndex, dict, ref,
|
|
|
|
this.fontCache);
|
2013-11-14 08:27:46 +09:00
|
|
|
}.bind(this)
|
|
|
|
);
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
|
|
|
return this.pagePromises[pageIndex];
|
|
|
|
},
|
|
|
|
|
2013-11-14 08:27:46 +09:00
|
|
|
getPageDict: function Catalog_getPageDict(pageIndex) {
|
2014-05-01 22:27:31 +09:00
|
|
|
var capability = createPromiseCapability();
|
2013-11-14 08:27:46 +09:00
|
|
|
var nodesToVisit = [this.catDict.getRaw('Pages')];
|
|
|
|
var currentPageIndex = 0;
|
|
|
|
var xref = this.xref;
|
|
|
|
|
|
|
|
function next() {
|
|
|
|
while (nodesToVisit.length) {
|
|
|
|
var currentNode = nodesToVisit.pop();
|
|
|
|
|
|
|
|
if (isRef(currentNode)) {
|
|
|
|
xref.fetchAsync(currentNode).then(function (obj) {
|
|
|
|
if ((isDict(obj, 'Page') || (isDict(obj) && !obj.has('Kids')))) {
|
|
|
|
if (pageIndex === currentPageIndex) {
|
2014-05-01 22:27:31 +09:00
|
|
|
capability.resolve([obj, currentNode]);
|
2013-11-14 08:27:46 +09:00
|
|
|
} else {
|
|
|
|
currentPageIndex++;
|
|
|
|
next();
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
nodesToVisit.push(obj);
|
|
|
|
next();
|
2014-05-01 22:27:31 +09:00
|
|
|
}.bind(this), capability.reject.bind(capability));
|
2013-11-14 08:27:46 +09:00
|
|
|
return;
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
|
|
|
|
2013-11-14 08:27:46 +09:00
|
|
|
// must be a child page dictionary
|
2013-02-07 08:19:29 +09:00
|
|
|
assert(
|
2013-11-14 08:27:46 +09:00
|
|
|
isDict(currentNode),
|
2013-02-07 08:19:29 +09:00
|
|
|
'page dictionary kid reference points to wrong type of object'
|
|
|
|
);
|
2013-11-14 08:27:46 +09:00
|
|
|
var count = currentNode.get('Count');
|
|
|
|
// Skip nodes where the page can't be.
|
|
|
|
if (currentPageIndex + count <= pageIndex) {
|
|
|
|
currentPageIndex += count;
|
|
|
|
continue;
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2013-11-14 08:27:46 +09:00
|
|
|
var kids = currentNode.get('Kids');
|
|
|
|
assert(isArray(kids), 'page dictionary kids object is not an array');
|
|
|
|
if (count === kids.length) {
|
|
|
|
// Nodes that don't have the page have been skipped and this is the
|
|
|
|
// bottom of the tree which means the page requested must be a
|
|
|
|
// descendant of this pages node. Ideally we would just resolve the
|
|
|
|
// promise with the page ref here, but there is the case where more
|
|
|
|
// pages nodes could link to single a page (see issue 3666 pdf). To
|
|
|
|
// handle this push it back on the queue so if it is a pages node it
|
|
|
|
// will be descended into.
|
|
|
|
nodesToVisit = [kids[pageIndex - currentPageIndex]];
|
|
|
|
currentPageIndex = pageIndex;
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
for (var last = kids.length - 1; last >= 0; last--) {
|
|
|
|
nodesToVisit.push(kids[last]);
|
|
|
|
}
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
2014-05-01 22:27:31 +09:00
|
|
|
capability.reject('Page index ' + pageIndex + ' not found.');
|
2013-11-14 08:27:46 +09:00
|
|
|
}
|
|
|
|
next();
|
2014-05-01 22:27:31 +09:00
|
|
|
return capability.promise;
|
2013-11-14 08:27:46 +09:00
|
|
|
},
|
|
|
|
|
|
|
|
getPageIndex: function Catalog_getPageIndex(ref) {
|
|
|
|
// The page tree nodes have the count of all the leaves below them. To get
|
|
|
|
// how many pages are before we just have to walk up the tree and keep
|
|
|
|
// adding the count of siblings to the left of the node.
|
|
|
|
var xref = this.xref;
|
|
|
|
function pagesBeforeRef(kidRef) {
|
|
|
|
var total = 0;
|
|
|
|
var parentRef;
|
|
|
|
return xref.fetchAsync(kidRef).then(function (node) {
|
|
|
|
if (!node) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
parentRef = node.getRaw('Parent');
|
|
|
|
return node.getAsync('Parent');
|
|
|
|
}).then(function (parent) {
|
|
|
|
if (!parent) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
return parent.getAsync('Kids');
|
|
|
|
}).then(function (kids) {
|
|
|
|
if (!kids) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
var kidPromises = [];
|
|
|
|
var found = false;
|
|
|
|
for (var i = 0; i < kids.length; i++) {
|
|
|
|
var kid = kids[i];
|
2014-03-21 04:28:22 +09:00
|
|
|
assert(isRef(kid), 'kids must be a ref');
|
2014-08-02 04:45:39 +09:00
|
|
|
if (kid.num === kidRef.num) {
|
2013-11-14 08:27:46 +09:00
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
kidPromises.push(xref.fetchAsync(kid).then(function (kid) {
|
|
|
|
if (kid.has('Count')) {
|
|
|
|
var count = kid.get('Count');
|
|
|
|
total += count;
|
|
|
|
} else { // page leaf node
|
|
|
|
total++;
|
|
|
|
}
|
|
|
|
}));
|
|
|
|
}
|
|
|
|
if (!found) {
|
|
|
|
error('kid ref not found in parents kids');
|
|
|
|
}
|
|
|
|
return Promise.all(kidPromises).then(function () {
|
|
|
|
return [total, parentRef];
|
|
|
|
});
|
|
|
|
});
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2013-11-14 08:27:46 +09:00
|
|
|
|
|
|
|
var total = 0;
|
|
|
|
function next(ref) {
|
|
|
|
return pagesBeforeRef(ref).then(function (args) {
|
|
|
|
if (!args) {
|
|
|
|
return total;
|
|
|
|
}
|
|
|
|
var count = args[0];
|
|
|
|
var parentRef = args[1];
|
|
|
|
total += count;
|
|
|
|
return next(parentRef);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
return next(ref);
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
return Catalog;
|
2011-10-25 08:55:23 +09:00
|
|
|
})();
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
var XRef = (function XRefClosure() {
|
2013-02-07 08:19:29 +09:00
|
|
|
function XRef(stream, password) {
|
2011-10-25 08:55:23 +09:00
|
|
|
this.stream = stream;
|
|
|
|
this.entries = [];
|
|
|
|
this.xrefstms = {};
|
|
|
|
// prepare the XRef cache
|
|
|
|
this.cache = [];
|
2013-02-07 08:19:29 +09:00
|
|
|
this.password = password;
|
2014-06-16 23:52:04 +09:00
|
|
|
this.stats = {
|
|
|
|
streamTypes: [],
|
|
|
|
fontTypes: []
|
|
|
|
};
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
XRef.prototype = {
|
2013-02-07 08:19:29 +09:00
|
|
|
setStartXRef: function XRef_setStartXRef(startXRef) {
|
|
|
|
// Store the starting positions of xref tables as we process them
|
|
|
|
// so we can recover from missing data errors
|
|
|
|
this.startXRefQueue = [startXRef];
|
|
|
|
},
|
|
|
|
|
|
|
|
parse: function XRef_parse(recoveryMode) {
|
|
|
|
var trailerDict;
|
|
|
|
if (!recoveryMode) {
|
|
|
|
trailerDict = this.readXRef();
|
|
|
|
} else {
|
|
|
|
warn('Indexing all PDF objects');
|
|
|
|
trailerDict = this.indexObjects();
|
|
|
|
}
|
|
|
|
trailerDict.assignXref(this);
|
|
|
|
this.trailer = trailerDict;
|
|
|
|
var encrypt = trailerDict.get('Encrypt');
|
|
|
|
if (encrypt) {
|
|
|
|
var ids = trailerDict.get('ID');
|
|
|
|
var fileId = (ids && ids.length) ? ids[0] : '';
|
2014-03-21 04:28:22 +09:00
|
|
|
this.encrypt = new CipherTransformFactory(encrypt, fileId,
|
|
|
|
this.password);
|
2013-02-07 08:19:29 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
// get the root dictionary (catalog) object
|
|
|
|
if (!(this.root = trailerDict.get('Root'))) {
|
|
|
|
error('Invalid root reference');
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
processXRefTable: function XRef_processXRefTable(parser) {
|
|
|
|
if (!('tableState' in this)) {
|
|
|
|
// Stores state of the table as we process it so we can resume
|
|
|
|
// from middle of table in case of missing data error
|
|
|
|
this.tableState = {
|
|
|
|
entryNum: 0,
|
|
|
|
streamPos: parser.lexer.stream.pos,
|
|
|
|
parserBuf1: parser.buf1,
|
|
|
|
parserBuf2: parser.buf2
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
var obj = this.readXRefTable(parser);
|
|
|
|
|
|
|
|
// Sanity check
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!isCmd(obj, 'trailer')) {
|
2013-02-07 08:19:29 +09:00
|
|
|
error('Invalid XRef table: could not find trailer dictionary');
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
// Read trailer dictionary, e.g.
|
|
|
|
// trailer
|
|
|
|
// << /Size 22
|
|
|
|
// /Root 20R
|
|
|
|
// /Info 10R
|
|
|
|
// /ID [ <81b14aafa313db63dbd6f981e49f94f4> ]
|
|
|
|
// >>
|
|
|
|
// The parser goes through the entire stream << ... >> and provides
|
|
|
|
// a getter interface for the key-value table
|
|
|
|
var dict = parser.getObj();
|
2014-05-03 03:45:34 +09:00
|
|
|
|
|
|
|
// The pdflib PDF generator can generate a nested trailer dictionary
|
|
|
|
if (!isDict(dict) && dict.dict) {
|
|
|
|
dict = dict.dict;
|
|
|
|
}
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!isDict(dict)) {
|
2013-02-07 08:19:29 +09:00
|
|
|
error('Invalid XRef table: could not parse trailer dictionary');
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
delete this.tableState;
|
|
|
|
|
|
|
|
return dict;
|
|
|
|
},
|
|
|
|
|
2012-04-05 05:43:26 +09:00
|
|
|
readXRefTable: function XRef_readXRefTable(parser) {
|
2012-01-31 23:01:04 +09:00
|
|
|
// Example of cross-reference table:
|
|
|
|
// xref
|
|
|
|
// 0 1 <-- subsection header (first obj #, obj count)
|
|
|
|
// 0000000000 65535 f <-- actual object (offset, generation #, f/n)
|
|
|
|
// 23 2 <-- subsection header ... and so on ...
|
2012-02-01 00:57:32 +09:00
|
|
|
// 0000025518 00002 n
|
2012-01-31 23:01:04 +09:00
|
|
|
// 0000025635 00000 n
|
|
|
|
// trailer
|
|
|
|
// ...
|
2012-02-01 00:57:32 +09:00
|
|
|
|
2013-02-07 08:19:29 +09:00
|
|
|
var stream = parser.lexer.stream;
|
|
|
|
var tableState = this.tableState;
|
|
|
|
stream.pos = tableState.streamPos;
|
|
|
|
parser.buf1 = tableState.parserBuf1;
|
|
|
|
parser.buf2 = tableState.parserBuf2;
|
|
|
|
|
2012-01-31 23:01:04 +09:00
|
|
|
// Outer loop is over subsection headers
|
2011-10-25 08:55:23 +09:00
|
|
|
var obj;
|
2012-01-31 23:01:04 +09:00
|
|
|
|
2013-02-07 08:19:29 +09:00
|
|
|
while (true) {
|
|
|
|
if (!('firstEntryNum' in tableState) || !('entryCount' in tableState)) {
|
|
|
|
if (isCmd(obj = parser.getObj(), 'trailer')) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
tableState.firstEntryNum = obj;
|
|
|
|
tableState.entryCount = parser.getObj();
|
|
|
|
}
|
|
|
|
|
|
|
|
var first = tableState.firstEntryNum;
|
|
|
|
var count = tableState.entryCount;
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!isInt(first) || !isInt(count)) {
|
2012-01-31 23:57:12 +09:00
|
|
|
error('Invalid XRef table: wrong types in subsection header');
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2012-01-31 23:01:04 +09:00
|
|
|
// Inner loop is over objects themselves
|
2013-02-07 08:19:29 +09:00
|
|
|
for (var i = tableState.entryNum; i < count; i++) {
|
|
|
|
tableState.streamPos = stream.pos;
|
|
|
|
tableState.entryNum = i;
|
|
|
|
tableState.parserBuf1 = parser.buf1;
|
|
|
|
tableState.parserBuf2 = parser.buf2;
|
|
|
|
|
2011-10-25 08:55:23 +09:00
|
|
|
var entry = {};
|
2012-01-31 23:01:04 +09:00
|
|
|
entry.offset = parser.getObj();
|
|
|
|
entry.gen = parser.getObj();
|
|
|
|
var type = parser.getObj();
|
|
|
|
|
2014-03-21 04:28:22 +09:00
|
|
|
if (isCmd(type, 'f')) {
|
2011-10-25 08:55:23 +09:00
|
|
|
entry.free = true;
|
2014-03-21 04:28:22 +09:00
|
|
|
} else if (isCmd(type, 'n')) {
|
2012-01-31 23:01:04 +09:00
|
|
|
entry.uncompressed = true;
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2012-01-31 23:01:04 +09:00
|
|
|
// Validate entry obj
|
2012-02-01 00:57:32 +09:00
|
|
|
if (!isInt(entry.offset) || !isInt(entry.gen) ||
|
|
|
|
!(entry.free || entry.uncompressed)) {
|
2013-02-07 08:19:29 +09:00
|
|
|
console.log(entry.offset, entry.gen, entry.free,
|
2014-03-21 04:28:22 +09:00
|
|
|
entry.uncompressed);
|
2012-02-01 00:49:06 +09:00
|
|
|
error('Invalid entry in XRef subsection: ' + first + ', ' + count);
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!this.entries[i + first]) {
|
2012-01-31 23:57:12 +09:00
|
|
|
this.entries[i + first] = entry;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
|
|
|
tableState.entryNum = 0;
|
|
|
|
tableState.streamPos = stream.pos;
|
|
|
|
tableState.parserBuf1 = parser.buf1;
|
|
|
|
tableState.parserBuf2 = parser.buf2;
|
|
|
|
delete tableState.firstEntryNum;
|
|
|
|
delete tableState.entryCount;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
|
2013-06-23 02:06:37 +09:00
|
|
|
// Per issue 3248: hp scanners generate bad XRef
|
|
|
|
if (first === 1 && this.entries[1] && this.entries[1].free) {
|
|
|
|
// shifting the entries
|
|
|
|
this.entries.shift();
|
|
|
|
}
|
|
|
|
|
2012-04-24 12:14:58 +09:00
|
|
|
// Sanity check: as per spec, first object must be free
|
2014-02-27 21:46:12 +09:00
|
|
|
if (this.entries[0] && !this.entries[0].free) {
|
2012-02-01 00:49:06 +09:00
|
|
|
error('Invalid XRef table: unexpected first object');
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
return obj;
|
|
|
|
},
|
2012-02-01 00:49:06 +09:00
|
|
|
|
2013-02-07 08:19:29 +09:00
|
|
|
processXRefStream: function XRef_processXRefStream(stream) {
|
|
|
|
if (!('streamState' in this)) {
|
|
|
|
// Stores state of the stream as we process it so we can resume
|
|
|
|
// from middle of stream in case of missing data error
|
2013-05-10 12:26:28 +09:00
|
|
|
var streamParameters = stream.dict;
|
2013-02-07 08:19:29 +09:00
|
|
|
var byteWidths = streamParameters.get('W');
|
|
|
|
var range = streamParameters.get('Index');
|
|
|
|
if (!range) {
|
|
|
|
range = [0, streamParameters.get('Size')];
|
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2013-02-07 08:19:29 +09:00
|
|
|
this.streamState = {
|
|
|
|
entryRanges: range,
|
|
|
|
byteWidths: byteWidths,
|
|
|
|
entryNum: 0,
|
|
|
|
streamPos: stream.pos
|
|
|
|
};
|
|
|
|
}
|
|
|
|
this.readXRefStream(stream);
|
|
|
|
delete this.streamState;
|
|
|
|
|
2013-05-10 12:26:28 +09:00
|
|
|
return stream.dict;
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2012-04-05 05:43:26 +09:00
|
|
|
readXRefStream: function XRef_readXRefStream(stream) {
|
2011-10-25 08:55:23 +09:00
|
|
|
var i, j;
|
2013-02-07 08:19:29 +09:00
|
|
|
var streamState = this.streamState;
|
|
|
|
stream.pos = streamState.streamPos;
|
|
|
|
|
|
|
|
var byteWidths = streamState.byteWidths;
|
|
|
|
var typeFieldWidth = byteWidths[0];
|
|
|
|
var offsetFieldWidth = byteWidths[1];
|
|
|
|
var generationFieldWidth = byteWidths[2];
|
|
|
|
|
|
|
|
var entryRanges = streamState.entryRanges;
|
|
|
|
while (entryRanges.length > 0) {
|
|
|
|
var first = entryRanges[0];
|
|
|
|
var n = entryRanges[1];
|
|
|
|
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!isInt(first) || !isInt(n)) {
|
2011-10-25 08:55:23 +09:00
|
|
|
error('Invalid XRef range fields: ' + first + ', ' + n);
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
if (!isInt(typeFieldWidth) || !isInt(offsetFieldWidth) ||
|
|
|
|
!isInt(generationFieldWidth)) {
|
|
|
|
error('Invalid XRef entry fields length: ' + first + ', ' + n);
|
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
for (i = streamState.entryNum; i < n; ++i) {
|
|
|
|
streamState.entryNum = i;
|
|
|
|
streamState.streamPos = stream.pos;
|
|
|
|
|
2011-10-25 08:55:23 +09:00
|
|
|
var type = 0, offset = 0, generation = 0;
|
2014-03-21 04:28:22 +09:00
|
|
|
for (j = 0; j < typeFieldWidth; ++j) {
|
2011-10-25 08:55:23 +09:00
|
|
|
type = (type << 8) | stream.getByte();
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
|
|
|
// if type field is absent, its default value is 1
|
2014-02-27 21:46:12 +09:00
|
|
|
if (typeFieldWidth === 0) {
|
2011-10-25 08:55:23 +09:00
|
|
|
type = 1;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
|
|
|
for (j = 0; j < offsetFieldWidth; ++j) {
|
2011-10-25 08:55:23 +09:00
|
|
|
offset = (offset << 8) | stream.getByte();
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
|
|
|
for (j = 0; j < generationFieldWidth; ++j) {
|
2011-10-25 08:55:23 +09:00
|
|
|
generation = (generation << 8) | stream.getByte();
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
var entry = {};
|
|
|
|
entry.offset = offset;
|
|
|
|
entry.gen = generation;
|
|
|
|
switch (type) {
|
|
|
|
case 0:
|
|
|
|
entry.free = true;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
entry.uncompressed = true;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
error('Invalid XRef entry type: ' + type);
|
|
|
|
}
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!this.entries[first + i]) {
|
2011-10-25 08:55:23 +09:00
|
|
|
this.entries[first + i] = entry;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
|
|
|
streamState.entryNum = 0;
|
|
|
|
streamState.streamPos = stream.pos;
|
|
|
|
entryRanges.splice(0, 2);
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
},
|
2014-02-27 21:46:12 +09:00
|
|
|
|
2012-04-05 05:43:26 +09:00
|
|
|
indexObjects: function XRef_indexObjects() {
|
2011-10-25 08:55:23 +09:00
|
|
|
// Simple scan through the PDF content to find objects,
|
|
|
|
// trailers and XRef streams.
|
|
|
|
function readToken(data, offset) {
|
|
|
|
var token = '', ch = data[offset];
|
|
|
|
while (ch !== 13 && ch !== 10) {
|
2014-03-21 04:28:22 +09:00
|
|
|
if (++offset >= data.length) {
|
2011-10-25 08:55:23 +09:00
|
|
|
break;
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
token += String.fromCharCode(ch);
|
|
|
|
ch = data[offset];
|
|
|
|
}
|
|
|
|
return token;
|
|
|
|
}
|
|
|
|
function skipUntil(data, offset, what) {
|
|
|
|
var length = what.length, dataLength = data.length;
|
|
|
|
var skipped = 0;
|
|
|
|
// finding byte sequence
|
|
|
|
while (offset < dataLength) {
|
|
|
|
var i = 0;
|
2014-08-02 04:45:39 +09:00
|
|
|
while (i < length && data[offset + i] === what[i]) {
|
2011-10-25 08:55:23 +09:00
|
|
|
++i;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2014-03-21 04:28:22 +09:00
|
|
|
if (i >= length) {
|
2011-10-25 08:55:23 +09:00
|
|
|
break; // sequence found
|
2014-03-21 04:28:22 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
offset++;
|
|
|
|
skipped++;
|
|
|
|
}
|
|
|
|
return skipped;
|
|
|
|
}
|
|
|
|
var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]);
|
|
|
|
var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114,
|
|
|
|
101, 102]);
|
|
|
|
var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]);
|
|
|
|
var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
|
|
|
|
|
|
|
|
var stream = this.stream;
|
|
|
|
stream.pos = 0;
|
|
|
|
var buffer = stream.getBytes();
|
|
|
|
var position = stream.start, length = buffer.length;
|
|
|
|
var trailers = [], xrefStms = [];
|
|
|
|
while (position < length) {
|
|
|
|
var ch = buffer[position];
|
|
|
|
if (ch === 32 || ch === 9 || ch === 13 || ch === 10) {
|
|
|
|
++position;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (ch === 37) { // %-comment
|
|
|
|
do {
|
|
|
|
++position;
|
2013-08-24 02:57:11 +09:00
|
|
|
if (position >= length) {
|
|
|
|
break;
|
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
ch = buffer[position];
|
|
|
|
} while (ch !== 13 && ch !== 10);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
var token = readToken(buffer, position);
|
|
|
|
var m;
|
|
|
|
if (token === 'xref') {
|
|
|
|
position += skipUntil(buffer, position, trailerBytes);
|
|
|
|
trailers.push(position);
|
|
|
|
position += skipUntil(buffer, position, startxrefBytes);
|
|
|
|
} else if ((m = /^(\d+)\s+(\d+)\s+obj\b/.exec(token))) {
|
|
|
|
this.entries[m[1]] = {
|
|
|
|
offset: position,
|
|
|
|
gen: m[2] | 0,
|
|
|
|
uncompressed: true
|
|
|
|
};
|
|
|
|
|
|
|
|
var contentLength = skipUntil(buffer, position, endobjBytes) + 7;
|
|
|
|
var content = buffer.subarray(position, position + contentLength);
|
|
|
|
|
|
|
|
// checking XRef stream suspect
|
|
|
|
// (it shall have '/XRef' and next char is not a letter)
|
|
|
|
var xrefTagOffset = skipUntil(content, 0, xrefBytes);
|
|
|
|
if (xrefTagOffset < contentLength &&
|
|
|
|
content[xrefTagOffset + 5] < 64) {
|
|
|
|
xrefStms.push(position);
|
|
|
|
this.xrefstms[position] = 1; // don't read it recursively
|
|
|
|
}
|
|
|
|
|
|
|
|
position += contentLength;
|
2014-02-27 21:46:12 +09:00
|
|
|
} else {
|
2011-10-25 08:55:23 +09:00
|
|
|
position += token.length + 1;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
// reading XRef streams
|
2014-04-08 06:42:54 +09:00
|
|
|
var i, ii;
|
|
|
|
for (i = 0, ii = xrefStms.length; i < ii; ++i) {
|
2013-02-07 08:19:29 +09:00
|
|
|
this.startXRefQueue.push(xrefStms[i]);
|
|
|
|
this.readXRef(/* recoveryMode */ true);
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
// finding main trailer
|
|
|
|
var dict;
|
2014-04-08 06:42:54 +09:00
|
|
|
for (i = 0, ii = trailers.length; i < ii; ++i) {
|
2011-10-25 08:55:23 +09:00
|
|
|
stream.pos = trailers[i];
|
2014-06-16 23:52:04 +09:00
|
|
|
var parser = new Parser(new Lexer(stream), true, this);
|
2011-10-25 08:55:23 +09:00
|
|
|
var obj = parser.getObj();
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!isCmd(obj, 'trailer')) {
|
2011-10-25 08:55:23 +09:00
|
|
|
continue;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
// read the trailer dictionary
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!isDict(dict = parser.getObj())) {
|
2011-10-25 08:55:23 +09:00
|
|
|
continue;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
// taking the first one with 'ID'
|
2014-02-27 21:46:12 +09:00
|
|
|
if (dict.has('ID')) {
|
2011-10-25 08:55:23 +09:00
|
|
|
return dict;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
// no tailer with 'ID', taking last one (if exists)
|
2014-02-27 21:46:12 +09:00
|
|
|
if (dict) {
|
2011-10-25 08:55:23 +09:00
|
|
|
return dict;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
// nothing helps
|
2012-10-16 19:10:37 +09:00
|
|
|
// calling error() would reject worker with an UnknownErrorException.
|
|
|
|
throw new InvalidPDFException('Invalid PDF structure');
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
2013-02-07 08:19:29 +09:00
|
|
|
|
|
|
|
readXRef: function XRef_readXRef(recoveryMode) {
|
2011-10-25 08:55:23 +09:00
|
|
|
var stream = this.stream;
|
2011-12-03 06:35:18 +09:00
|
|
|
|
2011-12-03 06:31:29 +09:00
|
|
|
try {
|
2013-02-07 08:19:29 +09:00
|
|
|
while (this.startXRefQueue.length) {
|
|
|
|
var startXRef = this.startXRefQueue[0];
|
2012-02-01 00:49:06 +09:00
|
|
|
|
2014-03-05 06:16:54 +09:00
|
|
|
stream.pos = startXRef + stream.start;
|
2012-02-01 00:49:06 +09:00
|
|
|
|
2014-06-16 23:52:04 +09:00
|
|
|
var parser = new Parser(new Lexer(stream), true, this);
|
2013-02-07 08:19:29 +09:00
|
|
|
var obj = parser.getObj();
|
|
|
|
var dict;
|
|
|
|
|
|
|
|
// Get dictionary
|
|
|
|
if (isCmd(obj, 'xref')) {
|
|
|
|
// Parse end-of-file XRef
|
|
|
|
dict = this.processXRefTable(parser);
|
|
|
|
if (!this.topDict) {
|
|
|
|
this.topDict = dict;
|
2012-02-01 00:49:06 +09:00
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
|
|
|
// Recursively get other XRefs 'XRefStm', if any
|
|
|
|
obj = dict.get('XRefStm');
|
|
|
|
if (isInt(obj)) {
|
|
|
|
var pos = obj;
|
|
|
|
// ignore previously loaded xref streams
|
|
|
|
// (possible infinite recursion)
|
|
|
|
if (!(pos in this.xrefstms)) {
|
|
|
|
this.xrefstms[pos] = 1;
|
|
|
|
this.startXRefQueue.push(pos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (isInt(obj)) {
|
|
|
|
// Parse in-stream XRef
|
|
|
|
if (!isInt(parser.getObj()) ||
|
|
|
|
!isCmd(parser.getObj(), 'obj') ||
|
|
|
|
!isStream(obj = parser.getObj())) {
|
|
|
|
error('Invalid XRef stream');
|
|
|
|
}
|
|
|
|
dict = this.processXRefStream(obj);
|
|
|
|
if (!this.topDict) {
|
|
|
|
this.topDict = dict;
|
|
|
|
}
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!dict) {
|
2013-02-07 08:19:29 +09:00
|
|
|
error('Failed to read XRef stream');
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2013-11-22 23:49:36 +09:00
|
|
|
} else {
|
|
|
|
error('Invalid XRef stream header');
|
2012-02-01 00:49:06 +09:00
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
|
|
|
|
// Recursively get previous dictionary, if any
|
|
|
|
obj = dict.get('Prev');
|
|
|
|
if (isInt(obj)) {
|
|
|
|
this.startXRefQueue.push(obj);
|
|
|
|
} else if (isRef(obj)) {
|
|
|
|
// The spec says Prev must not be a reference, i.e. "/Prev NNN"
|
|
|
|
// This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
|
|
|
|
this.startXRefQueue.push(obj.num);
|
2011-12-03 06:31:29 +09:00
|
|
|
}
|
2012-02-01 00:49:06 +09:00
|
|
|
|
2013-02-07 08:19:29 +09:00
|
|
|
this.startXRefQueue.shift();
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2012-02-01 00:49:06 +09:00
|
|
|
|
2013-02-07 08:19:29 +09:00
|
|
|
return this.topDict;
|
2011-12-03 06:35:18 +09:00
|
|
|
} catch (e) {
|
2013-02-07 08:19:29 +09:00
|
|
|
if (e instanceof MissingDataException) {
|
|
|
|
throw e;
|
|
|
|
}
|
2014-01-16 06:28:31 +09:00
|
|
|
info('(while reading XRef): ' + e);
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2011-12-03 06:35:18 +09:00
|
|
|
|
2014-02-27 21:46:12 +09:00
|
|
|
if (recoveryMode) {
|
2012-04-24 12:14:58 +09:00
|
|
|
return;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2013-02-07 08:19:29 +09:00
|
|
|
throw new XRefParseException();
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
2013-02-07 08:19:29 +09:00
|
|
|
|
2012-04-05 05:43:26 +09:00
|
|
|
getEntry: function XRef_getEntry(i) {
|
2014-02-27 21:46:12 +09:00
|
|
|
var xrefEntry = this.entries[i];
|
2014-04-12 19:05:12 +09:00
|
|
|
if (xrefEntry && !xrefEntry.free && xrefEntry.offset) {
|
2014-02-27 21:46:12 +09:00
|
|
|
return xrefEntry;
|
|
|
|
}
|
|
|
|
return null;
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
2014-02-27 21:46:12 +09:00
|
|
|
|
2012-04-05 05:43:26 +09:00
|
|
|
fetchIfRef: function XRef_fetchIfRef(obj) {
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!isRef(obj)) {
|
2011-10-25 08:55:23 +09:00
|
|
|
return obj;
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
return this.fetch(obj);
|
|
|
|
},
|
2014-02-27 21:46:12 +09:00
|
|
|
|
2012-04-05 05:43:26 +09:00
|
|
|
fetch: function XRef_fetch(ref, suppressEncryption) {
|
2014-04-13 23:02:56 +09:00
|
|
|
assert(isRef(ref), 'ref object is not a reference');
|
2011-10-25 08:55:23 +09:00
|
|
|
var num = ref.num;
|
2013-04-09 07:14:56 +09:00
|
|
|
if (num in this.cache) {
|
2014-02-27 21:46:12 +09:00
|
|
|
var cacheEntry = this.cache[num];
|
|
|
|
return cacheEntry;
|
2013-04-09 07:14:56 +09:00
|
|
|
}
|
2012-01-09 05:03:00 +09:00
|
|
|
|
2014-02-27 21:46:12 +09:00
|
|
|
var xrefEntry = this.getEntry(num);
|
2012-01-09 05:03:00 +09:00
|
|
|
|
|
|
|
// the referenced entry can be free
|
2014-02-27 21:46:12 +09:00
|
|
|
if (xrefEntry === null) {
|
|
|
|
return (this.cache[num] = null);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (xrefEntry.uncompressed) {
|
2014-03-26 23:07:38 +09:00
|
|
|
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
|
2014-02-27 21:46:12 +09:00
|
|
|
} else {
|
2014-03-26 23:07:38 +09:00
|
|
|
xrefEntry = this.fetchCompressed(xrefEntry, suppressEncryption);
|
|
|
|
}
|
2014-06-10 18:29:25 +09:00
|
|
|
if (isDict(xrefEntry)){
|
2014-06-19 12:41:33 +09:00
|
|
|
xrefEntry.objId = ref.toString();
|
2014-06-10 18:29:25 +09:00
|
|
|
} else if (isStream(xrefEntry)) {
|
2014-06-19 12:41:33 +09:00
|
|
|
xrefEntry.dict.objId = ref.toString();
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2014-03-26 23:07:38 +09:00
|
|
|
return xrefEntry;
|
2014-02-27 21:46:12 +09:00
|
|
|
},
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2014-03-21 04:28:22 +09:00
|
|
|
fetchUncompressed: function XRef_fetchUncompressed(ref, xrefEntry,
|
2014-02-27 21:46:12 +09:00
|
|
|
suppressEncryption) {
|
2011-10-25 08:55:23 +09:00
|
|
|
var gen = ref.gen;
|
2014-02-27 21:46:12 +09:00
|
|
|
var num = ref.num;
|
|
|
|
if (xrefEntry.gen !== gen) {
|
|
|
|
error('inconsistent generation in XRef');
|
|
|
|
}
|
2014-03-05 06:16:54 +09:00
|
|
|
var stream = this.stream.makeSubStream(xrefEntry.offset +
|
|
|
|
this.stream.start);
|
2014-02-27 21:46:12 +09:00
|
|
|
var parser = new Parser(new Lexer(stream), true, this);
|
|
|
|
var obj1 = parser.getObj();
|
|
|
|
var obj2 = parser.getObj();
|
|
|
|
var obj3 = parser.getObj();
|
|
|
|
if (!isInt(obj1) || parseInt(obj1, 10) !== num ||
|
|
|
|
!isInt(obj2) || parseInt(obj2, 10) !== gen ||
|
|
|
|
!isCmd(obj3)) {
|
|
|
|
error('bad XRef entry');
|
|
|
|
}
|
|
|
|
if (!isCmd(obj3, 'obj')) {
|
2014-03-21 04:28:22 +09:00
|
|
|
// some bad PDFs use "obj1234" and really mean 1234
|
2014-02-27 21:46:12 +09:00
|
|
|
if (obj3.cmd.indexOf('obj') === 0) {
|
|
|
|
num = parseInt(obj3.cmd.substring(3), 10);
|
|
|
|
if (!isNaN(num)) {
|
|
|
|
return num;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
}
|
2014-02-27 21:46:12 +09:00
|
|
|
error('bad XRef entry');
|
|
|
|
}
|
|
|
|
if (this.encrypt && !suppressEncryption) {
|
2014-06-19 08:30:27 +09:00
|
|
|
xrefEntry = parser.getObj(this.encrypt.createCipherTransform(num, gen));
|
2014-02-27 21:46:12 +09:00
|
|
|
} else {
|
|
|
|
xrefEntry = parser.getObj();
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2014-02-27 21:46:12 +09:00
|
|
|
if (!isStream(xrefEntry)) {
|
|
|
|
this.cache[num] = xrefEntry;
|
|
|
|
}
|
|
|
|
return xrefEntry;
|
|
|
|
},
|
2011-10-25 08:55:23 +09:00
|
|
|
|
2014-02-27 21:46:12 +09:00
|
|
|
fetchCompressed: function XRef_fetchCompressed(xrefEntry,
|
|
|
|
suppressEncryption) {
|
|
|
|
var tableOffset = xrefEntry.offset;
|
|
|
|
var stream = this.fetch(new Ref(tableOffset, 0));
|
|
|
|
if (!isStream(stream)) {
|
2011-10-25 08:55:23 +09:00
|
|
|
error('bad ObjStm stream');
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2013-05-10 12:26:28 +09:00
|
|
|
var first = stream.dict.get('First');
|
|
|
|
var n = stream.dict.get('N');
|
2011-10-25 08:55:23 +09:00
|
|
|
if (!isInt(first) || !isInt(n)) {
|
|
|
|
error('invalid first and n parameters for ObjStm stream');
|
|
|
|
}
|
2014-02-27 21:46:12 +09:00
|
|
|
var parser = new Parser(new Lexer(stream), false, this);
|
2012-11-02 22:26:45 +09:00
|
|
|
parser.allowStreams = true;
|
2014-02-27 21:46:12 +09:00
|
|
|
var i, entries = [], num, nums = [];
|
2011-10-25 08:55:23 +09:00
|
|
|
// read the object numbers to populate cache
|
|
|
|
for (i = 0; i < n; ++i) {
|
|
|
|
num = parser.getObj();
|
|
|
|
if (!isInt(num)) {
|
|
|
|
error('invalid object number in the ObjStm stream: ' + num);
|
|
|
|
}
|
|
|
|
nums.push(num);
|
|
|
|
var offset = parser.getObj();
|
|
|
|
if (!isInt(offset)) {
|
|
|
|
error('invalid object offset in the ObjStm stream: ' + offset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// read stream objects for cache
|
|
|
|
for (i = 0; i < n; ++i) {
|
|
|
|
entries.push(parser.getObj());
|
2012-11-04 13:03:52 +09:00
|
|
|
num = nums[i];
|
|
|
|
var entry = this.entries[num];
|
|
|
|
if (entry && entry.offset === tableOffset && entry.gen === i) {
|
|
|
|
this.cache[num] = entries[i];
|
|
|
|
}
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
2014-02-27 21:46:12 +09:00
|
|
|
xrefEntry = entries[xrefEntry.gen];
|
|
|
|
if (xrefEntry === undefined) {
|
2011-10-25 08:55:23 +09:00
|
|
|
error('bad XRef entry for compressed object');
|
|
|
|
}
|
2014-02-27 21:46:12 +09:00
|
|
|
return xrefEntry;
|
2011-10-25 08:55:23 +09:00
|
|
|
},
|
2014-02-27 21:46:12 +09:00
|
|
|
|
2013-06-05 09:57:52 +09:00
|
|
|
fetchIfRefAsync: function XRef_fetchIfRefAsync(obj) {
|
|
|
|
if (!isRef(obj)) {
|
2014-05-01 22:27:31 +09:00
|
|
|
return Promise.resolve(obj);
|
2013-06-05 09:57:52 +09:00
|
|
|
}
|
|
|
|
return this.fetchAsync(obj);
|
|
|
|
},
|
2014-02-27 21:46:12 +09:00
|
|
|
|
2013-06-05 09:57:52 +09:00
|
|
|
fetchAsync: function XRef_fetchAsync(ref, suppressEncryption) {
|
2014-05-01 22:27:31 +09:00
|
|
|
return new Promise(function (resolve, reject) {
|
|
|
|
var tryFetch = function () {
|
|
|
|
try {
|
|
|
|
resolve(this.fetch(ref, suppressEncryption));
|
|
|
|
} catch (e) {
|
|
|
|
if (e instanceof MissingDataException) {
|
|
|
|
this.stream.manager.requestRange(e.begin, e.end, tryFetch);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
reject(e);
|
|
|
|
}
|
|
|
|
}.bind(this);
|
|
|
|
tryFetch();
|
|
|
|
}.bind(this));
|
|
|
|
},
|
2014-02-27 21:46:12 +09:00
|
|
|
|
2012-04-05 05:43:26 +09:00
|
|
|
getCatalogObj: function XRef_getCatalogObj() {
|
2012-04-06 00:12:48 +09:00
|
|
|
return this.root;
|
2011-10-25 08:55:23 +09:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2011-12-07 07:18:40 +09:00
|
|
|
return XRef;
|
2011-10-25 08:55:23 +09:00
|
|
|
})();
|
|
|
|
|
2013-03-01 08:29:07 +09:00
|
|
|
/**
|
2014-03-21 04:28:22 +09:00
|
|
|
* A NameTree is like a Dict but has some advantageous properties, see the
|
|
|
|
* spec (7.9.6) for more details.
|
2013-03-01 08:29:07 +09:00
|
|
|
* TODO: implement all the Dict functions and make this more efficent.
|
|
|
|
*/
|
|
|
|
var NameTree = (function NameTreeClosure() {
|
|
|
|
function NameTree(root, xref) {
|
|
|
|
this.root = root;
|
|
|
|
this.xref = xref;
|
|
|
|
}
|
|
|
|
|
|
|
|
NameTree.prototype = {
|
|
|
|
getAll: function NameTree_getAll() {
|
|
|
|
var dict = {};
|
|
|
|
if (!this.root) {
|
|
|
|
return dict;
|
|
|
|
}
|
|
|
|
var xref = this.xref;
|
|
|
|
// reading name tree
|
|
|
|
var processed = new RefSet();
|
|
|
|
processed.put(this.root);
|
|
|
|
var queue = [this.root];
|
|
|
|
while (queue.length > 0) {
|
|
|
|
var i, n;
|
2013-03-02 23:00:17 +09:00
|
|
|
var obj = xref.fetchIfRef(queue.shift());
|
2013-03-19 22:36:12 +09:00
|
|
|
if (!isDict(obj)) {
|
|
|
|
continue;
|
|
|
|
}
|
2013-03-01 08:29:07 +09:00
|
|
|
if (obj.has('Kids')) {
|
|
|
|
var kids = obj.get('Kids');
|
|
|
|
for (i = 0, n = kids.length; i < n; i++) {
|
|
|
|
var kid = kids[i];
|
2014-02-27 21:46:12 +09:00
|
|
|
if (processed.has(kid)) {
|
2013-03-01 08:29:07 +09:00
|
|
|
error('invalid destinations');
|
2014-02-27 21:46:12 +09:00
|
|
|
}
|
2013-03-01 08:29:07 +09:00
|
|
|
queue.push(kid);
|
|
|
|
processed.put(kid);
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
var names = obj.get('Names');
|
|
|
|
if (names) {
|
|
|
|
for (i = 0, n = names.length; i < n; i += 2) {
|
|
|
|
dict[names[i]] = xref.fetchIfRef(names[i + 1]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return dict;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
return NameTree;
|
|
|
|
})();
|
|
|
|
|
2014-03-19 05:32:47 +09:00
|
|
|
/**
|
|
|
|
* "A PDF file can refer to the contents of another file by using a File
|
|
|
|
* Specification (PDF 1.1)", see the spec (7.11) for more details.
|
|
|
|
* NOTE: Only embedded files are supported (as part of the attachments support)
|
|
|
|
* TODO: support the 'URL' file system (with caching if !/V), portable
|
|
|
|
* collections attributes and related files (/RF)
|
|
|
|
*/
|
|
|
|
var FileSpec = (function FileSpecClosure() {
|
|
|
|
function FileSpec(root, xref) {
|
|
|
|
if (!root || !isDict(root)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
this.xref = xref;
|
|
|
|
this.root = root;
|
|
|
|
if (root.has('FS')) {
|
|
|
|
this.fs = root.get('FS');
|
|
|
|
}
|
|
|
|
this.description = root.has('Desc') ?
|
|
|
|
stringToPDFString(root.get('Desc')) :
|
|
|
|
'';
|
|
|
|
if (root.has('RF')) {
|
|
|
|
warn('Related file specifications are not supported');
|
|
|
|
}
|
|
|
|
this.contentAvailable = true;
|
|
|
|
if (!root.has('EF')) {
|
|
|
|
this.contentAvailable = false;
|
|
|
|
warn('Non-embedded file specifications are not supported');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function pickPlatformItem(dict) {
|
|
|
|
// Look for the filename in this order:
|
|
|
|
// UF, F, Unix, Mac, DOS
|
|
|
|
if (dict.has('UF')) {
|
|
|
|
return dict.get('UF');
|
|
|
|
} else if (dict.has('F')) {
|
|
|
|
return dict.get('F');
|
|
|
|
} else if (dict.has('Unix')) {
|
|
|
|
return dict.get('Unix');
|
|
|
|
} else if (dict.has('Mac')) {
|
|
|
|
return dict.get('Mac');
|
|
|
|
} else if (dict.has('DOS')) {
|
|
|
|
return dict.get('DOS');
|
|
|
|
} else {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
FileSpec.prototype = {
|
|
|
|
get filename() {
|
|
|
|
if (!this._filename && this.root) {
|
|
|
|
var filename = pickPlatformItem(this.root) || 'unnamed';
|
|
|
|
this._filename = stringToPDFString(filename).
|
|
|
|
replace(/\\\\/g, '\\').
|
|
|
|
replace(/\\\//g, '/').
|
|
|
|
replace(/\\/g, '/');
|
|
|
|
}
|
|
|
|
return this._filename;
|
|
|
|
},
|
|
|
|
get content() {
|
|
|
|
if (!this.contentAvailable) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
if (!this.contentRef && this.root) {
|
|
|
|
this.contentRef = pickPlatformItem(this.root.get('EF'));
|
|
|
|
}
|
|
|
|
var content = null;
|
|
|
|
if (this.contentRef) {
|
|
|
|
var xref = this.xref;
|
|
|
|
var fileObj = xref.fetchIfRef(this.contentRef);
|
|
|
|
if (fileObj && isStream(fileObj)) {
|
|
|
|
content = fileObj.getBytes();
|
|
|
|
} else {
|
|
|
|
warn('Embedded file specification points to non-existing/invalid ' +
|
|
|
|
'content');
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
warn('Embedded file specification does not have a content');
|
|
|
|
}
|
|
|
|
return content;
|
|
|
|
},
|
|
|
|
get serializable() {
|
|
|
|
return {
|
|
|
|
filename: this.filename,
|
|
|
|
content: this.content
|
|
|
|
};
|
|
|
|
}
|
|
|
|
};
|
|
|
|
return FileSpec;
|
|
|
|
})();
|
|
|
|
|
2013-06-05 09:57:52 +09:00
|
|
|
/**
|
|
|
|
* A helper for loading missing data in object graphs. It traverses the graph
|
|
|
|
* depth first and queues up any objects that have missing data. Once it has
|
|
|
|
* has traversed as many objects that are available it attempts to bundle the
|
|
|
|
* missing data requests and then resume from the nodes that weren't ready.
|
|
|
|
*
|
|
|
|
* NOTE: It provides protection from circular references by keeping track of
|
|
|
|
* of loaded references. However, you must be careful not to load any graphs
|
|
|
|
* that have references to the catalog or other pages since that will cause the
|
|
|
|
* entire PDF document object graph to be traversed.
|
|
|
|
*/
|
|
|
|
var ObjectLoader = (function() {
|
|
|
|
function mayHaveChildren(value) {
|
|
|
|
return isRef(value) || isDict(value) || isArray(value) || isStream(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
function addChildren(node, nodesToVisit) {
|
2014-04-08 06:42:54 +09:00
|
|
|
var value;
|
2013-06-05 09:57:52 +09:00
|
|
|
if (isDict(node) || isStream(node)) {
|
|
|
|
var map;
|
|
|
|
if (isDict(node)) {
|
|
|
|
map = node.map;
|
|
|
|
} else {
|
|
|
|
map = node.dict.map;
|
|
|
|
}
|
|
|
|
for (var key in map) {
|
2014-04-08 06:42:54 +09:00
|
|
|
value = map[key];
|
2013-06-05 09:57:52 +09:00
|
|
|
if (mayHaveChildren(value)) {
|
|
|
|
nodesToVisit.push(value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (isArray(node)) {
|
|
|
|
for (var i = 0, ii = node.length; i < ii; i++) {
|
2014-04-08 06:42:54 +09:00
|
|
|
value = node[i];
|
2013-06-05 09:57:52 +09:00
|
|
|
if (mayHaveChildren(value)) {
|
|
|
|
nodesToVisit.push(value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function ObjectLoader(obj, keys, xref) {
|
|
|
|
this.obj = obj;
|
|
|
|
this.keys = keys;
|
|
|
|
this.xref = xref;
|
|
|
|
this.refSet = null;
|
|
|
|
}
|
|
|
|
|
|
|
|
ObjectLoader.prototype = {
|
|
|
|
load: function ObjectLoader_load() {
|
|
|
|
var keys = this.keys;
|
2014-05-01 22:27:31 +09:00
|
|
|
this.capability = createPromiseCapability();
|
2013-06-05 09:57:52 +09:00
|
|
|
// Don't walk the graph if all the data is already loaded.
|
|
|
|
if (!(this.xref.stream instanceof ChunkedStream) ||
|
|
|
|
this.xref.stream.getMissingChunks().length === 0) {
|
2014-05-01 22:27:31 +09:00
|
|
|
this.capability.resolve();
|
|
|
|
return this.capability.promise;
|
2013-06-05 09:57:52 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
this.refSet = new RefSet();
|
|
|
|
// Setup the initial nodes to visit.
|
|
|
|
var nodesToVisit = [];
|
|
|
|
for (var i = 0; i < keys.length; i++) {
|
|
|
|
nodesToVisit.push(this.obj[keys[i]]);
|
|
|
|
}
|
|
|
|
|
|
|
|
this.walk(nodesToVisit);
|
2014-05-01 22:27:31 +09:00
|
|
|
return this.capability.promise;
|
2013-06-05 09:57:52 +09:00
|
|
|
},
|
|
|
|
|
|
|
|
walk: function ObjectLoader_walk(nodesToVisit) {
|
|
|
|
var nodesToRevisit = [];
|
|
|
|
var pendingRequests = [];
|
|
|
|
// DFS walk of the object graph.
|
|
|
|
while (nodesToVisit.length) {
|
|
|
|
var currentNode = nodesToVisit.pop();
|
|
|
|
|
|
|
|
// Only references or chunked streams can cause missing data exceptions.
|
|
|
|
if (isRef(currentNode)) {
|
|
|
|
// Skip nodes that have already been visited.
|
|
|
|
if (this.refSet.has(currentNode)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
var ref = currentNode;
|
|
|
|
this.refSet.put(ref);
|
|
|
|
currentNode = this.xref.fetch(currentNode);
|
|
|
|
} catch (e) {
|
|
|
|
if (!(e instanceof MissingDataException)) {
|
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
nodesToRevisit.push(currentNode);
|
|
|
|
pendingRequests.push({ begin: e.begin, end: e.end });
|
|
|
|
}
|
|
|
|
}
|
2013-07-04 06:29:38 +09:00
|
|
|
if (currentNode && currentNode.getBaseStreams) {
|
|
|
|
var baseStreams = currentNode.getBaseStreams();
|
|
|
|
var foundMissingData = false;
|
|
|
|
for (var i = 0; i < baseStreams.length; i++) {
|
|
|
|
var stream = baseStreams[i];
|
|
|
|
if (stream.getMissingChunks && stream.getMissingChunks().length) {
|
|
|
|
foundMissingData = true;
|
|
|
|
pendingRequests.push({
|
|
|
|
begin: stream.start,
|
|
|
|
end: stream.end
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (foundMissingData) {
|
|
|
|
nodesToRevisit.push(currentNode);
|
|
|
|
}
|
2013-06-05 09:57:52 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
addChildren(currentNode, nodesToVisit);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pendingRequests.length) {
|
|
|
|
this.xref.stream.manager.requestRanges(pendingRequests,
|
|
|
|
function pendingRequestCallback() {
|
|
|
|
nodesToVisit = nodesToRevisit;
|
|
|
|
for (var i = 0; i < nodesToRevisit.length; i++) {
|
|
|
|
var node = nodesToRevisit[i];
|
|
|
|
// Remove any reference nodes from the currrent refset so they
|
|
|
|
// aren't skipped when we revist them.
|
|
|
|
if (isRef(node)) {
|
|
|
|
this.refSet.remove(node);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
this.walk(nodesToVisit);
|
|
|
|
}.bind(this));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Everything is loaded.
|
|
|
|
this.refSet = null;
|
2014-05-01 22:27:31 +09:00
|
|
|
this.capability.resolve();
|
2013-06-05 09:57:52 +09:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
return ObjectLoader;
|
|
|
|
})();
|