856e1c600b
I have a large PDF where this function is called 1.6 million times during loading. Minimizing the string concatenations reduces the cumulative allocations done by Firefox within this function from 113 MB to 48 MB.
1598 lines
48 KiB
JavaScript
1598 lines
48 KiB
JavaScript
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
|
|
/* Copyright 2012 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
/* globals assert, bytesToString, CipherTransformFactory, error, info,
|
|
InvalidPDFException, isArray, isCmd, isDict, isInt, isName, isRef,
|
|
isStream, Lexer, Page, Parser, Promise, shadow,
|
|
stringToPDFString, stringToUTF8String, warn, isString,
|
|
Promise, MissingDataException, XRefParseException, Stream,
|
|
ChunkedStream, createPromiseCapability */
|
|
|
|
'use strict';
|
|
|
|
var Name = (function NameClosure() {
|
|
function Name(name) {
|
|
this.name = name;
|
|
}
|
|
|
|
Name.prototype = {};
|
|
|
|
var nameCache = {};
|
|
|
|
Name.get = function Name_get(name) {
|
|
var nameValue = nameCache[name];
|
|
return (nameValue ? nameValue : (nameCache[name] = new Name(name)));
|
|
};
|
|
|
|
return Name;
|
|
})();
|
|
|
|
var Cmd = (function CmdClosure() {
|
|
function Cmd(cmd) {
|
|
this.cmd = cmd;
|
|
}
|
|
|
|
Cmd.prototype = {};
|
|
|
|
var cmdCache = {};
|
|
|
|
Cmd.get = function Cmd_get(cmd) {
|
|
var cmdValue = cmdCache[cmd];
|
|
return (cmdValue ? cmdValue : (cmdCache[cmd] = new Cmd(cmd)));
|
|
};
|
|
|
|
return Cmd;
|
|
})();
|
|
|
|
var Dict = (function DictClosure() {
|
|
var nonSerializable = function nonSerializableClosure() {
|
|
return nonSerializable; // creating closure on some variable
|
|
};
|
|
|
|
var GETALL_DICTIONARY_TYPES_WHITELIST = {
|
|
'Background': true,
|
|
'ExtGState': true,
|
|
'Halftone': true,
|
|
'Layout': true,
|
|
'Mask': true,
|
|
'Pagination': true,
|
|
'Printing': true
|
|
};
|
|
|
|
function isRecursionAllowedFor(dict) {
|
|
if (!isName(dict.Type)) {
|
|
return true;
|
|
}
|
|
var dictType = dict.Type.name;
|
|
return GETALL_DICTIONARY_TYPES_WHITELIST[dictType] === true;
|
|
}
|
|
|
|
// xref is optional
|
|
function Dict(xref) {
|
|
// Map should only be used internally, use functions below to access.
|
|
this.map = Object.create(null);
|
|
this.xref = xref;
|
|
this.objId = null;
|
|
this.__nonSerializable__ = nonSerializable; // disable cloning of the Dict
|
|
}
|
|
|
|
Dict.prototype = {
|
|
assignXref: function Dict_assignXref(newXref) {
|
|
this.xref = newXref;
|
|
},
|
|
|
|
// automatically dereferences Ref objects
|
|
get: function Dict_get(key1, key2, key3) {
|
|
var value;
|
|
var xref = this.xref;
|
|
if (typeof (value = this.map[key1]) != 'undefined' || key1 in this.map ||
|
|
typeof key2 == 'undefined') {
|
|
return xref ? xref.fetchIfRef(value) : value;
|
|
}
|
|
if (typeof (value = this.map[key2]) != 'undefined' || key2 in this.map ||
|
|
typeof key3 == 'undefined') {
|
|
return xref ? xref.fetchIfRef(value) : value;
|
|
}
|
|
value = this.map[key3] || null;
|
|
return xref ? xref.fetchIfRef(value) : value;
|
|
},
|
|
|
|
// Same as get(), but returns a promise and uses fetchIfRefAsync().
|
|
getAsync: function Dict_getAsync(key1, key2, key3) {
|
|
var value;
|
|
var xref = this.xref;
|
|
if (typeof (value = this.map[key1]) !== 'undefined' || key1 in this.map ||
|
|
typeof key2 === 'undefined') {
|
|
if (xref) {
|
|
return xref.fetchIfRefAsync(value);
|
|
}
|
|
return Promise.resolve(value);
|
|
}
|
|
if (typeof (value = this.map[key2]) !== 'undefined' || key2 in this.map ||
|
|
typeof key3 === 'undefined') {
|
|
if (xref) {
|
|
return xref.fetchIfRefAsync(value);
|
|
}
|
|
return Promise.resolve(value);
|
|
}
|
|
value = this.map[key3] || null;
|
|
if (xref) {
|
|
return xref.fetchIfRefAsync(value);
|
|
}
|
|
return Promise.resolve(value);
|
|
},
|
|
|
|
// no dereferencing
|
|
getRaw: function Dict_getRaw(key) {
|
|
return this.map[key];
|
|
},
|
|
|
|
// creates new map and dereferences all Refs
|
|
getAll: function Dict_getAll() {
|
|
var all = Object.create(null);
|
|
var queue = null;
|
|
var key, obj;
|
|
for (key in this.map) {
|
|
obj = this.get(key);
|
|
if (obj instanceof Dict) {
|
|
if (isRecursionAllowedFor(obj)) {
|
|
(queue || (queue = [])).push({target: all, key: key, obj: obj});
|
|
} else {
|
|
all[key] = this.getRaw(key);
|
|
}
|
|
} else {
|
|
all[key] = obj;
|
|
}
|
|
}
|
|
if (!queue) {
|
|
return all;
|
|
}
|
|
|
|
// trying to take cyclic references into the account
|
|
var processed = Object.create(null);
|
|
while (queue.length > 0) {
|
|
var item = queue.shift();
|
|
var itemObj = item.obj;
|
|
var objId = itemObj.objId;
|
|
if (objId && objId in processed) {
|
|
item.target[item.key] = processed[objId];
|
|
continue;
|
|
}
|
|
var dereferenced = Object.create(null);
|
|
for (key in itemObj.map) {
|
|
obj = itemObj.get(key);
|
|
if (obj instanceof Dict) {
|
|
if (isRecursionAllowedFor(obj)) {
|
|
queue.push({target: dereferenced, key: key, obj: obj});
|
|
} else {
|
|
dereferenced[key] = itemObj.getRaw(key);
|
|
}
|
|
} else {
|
|
dereferenced[key] = obj;
|
|
}
|
|
}
|
|
if (objId) {
|
|
processed[objId] = dereferenced;
|
|
}
|
|
item.target[item.key] = dereferenced;
|
|
}
|
|
return all;
|
|
},
|
|
|
|
set: function Dict_set(key, value) {
|
|
this.map[key] = value;
|
|
},
|
|
|
|
has: function Dict_has(key) {
|
|
return key in this.map;
|
|
},
|
|
|
|
forEach: function Dict_forEach(callback) {
|
|
for (var key in this.map) {
|
|
callback(key, this.get(key));
|
|
}
|
|
}
|
|
};
|
|
|
|
Dict.empty = new Dict(null);
|
|
|
|
return Dict;
|
|
})();
|
|
|
|
var Ref = (function RefClosure() {
|
|
function Ref(num, gen) {
|
|
this.num = num;
|
|
this.gen = gen;
|
|
}
|
|
|
|
Ref.prototype = {
|
|
toString: function Ref_toString() {
|
|
// This function is hot, so we make the string as compact as possible.
|
|
// |this.gen| is almost always zero, so we treat that case specially.
|
|
var str = this.num + 'R';
|
|
if (this.gen !== 0) {
|
|
str += this.gen;
|
|
}
|
|
return str;
|
|
}
|
|
};
|
|
|
|
return Ref;
|
|
})();
|
|
|
|
// The reference is identified by number and generation.
|
|
// This structure stores only one instance of the reference.
|
|
var RefSet = (function RefSetClosure() {
|
|
function RefSet() {
|
|
this.dict = {};
|
|
}
|
|
|
|
RefSet.prototype = {
|
|
has: function RefSet_has(ref) {
|
|
return ref.toString() in this.dict;
|
|
},
|
|
|
|
put: function RefSet_put(ref) {
|
|
this.dict[ref.toString()] = true;
|
|
},
|
|
|
|
remove: function RefSet_remove(ref) {
|
|
delete this.dict[ref.toString()];
|
|
}
|
|
};
|
|
|
|
return RefSet;
|
|
})();
|
|
|
|
var RefSetCache = (function RefSetCacheClosure() {
|
|
function RefSetCache() {
|
|
this.dict = Object.create(null);
|
|
}
|
|
|
|
RefSetCache.prototype = {
|
|
get: function RefSetCache_get(ref) {
|
|
return this.dict[ref.toString()];
|
|
},
|
|
|
|
has: function RefSetCache_has(ref) {
|
|
return ref.toString() in this.dict;
|
|
},
|
|
|
|
put: function RefSetCache_put(ref, obj) {
|
|
this.dict[ref.toString()] = obj;
|
|
},
|
|
|
|
putAlias: function RefSetCache_putAlias(ref, aliasRef) {
|
|
this.dict[ref.toString()] = this.get(aliasRef);
|
|
},
|
|
|
|
forEach: function RefSetCache_forEach(fn, thisArg) {
|
|
for (var i in this.dict) {
|
|
fn.call(thisArg, this.dict[i]);
|
|
}
|
|
},
|
|
|
|
clear: function RefSetCache_clear() {
|
|
this.dict = Object.create(null);
|
|
}
|
|
};
|
|
|
|
return RefSetCache;
|
|
})();
|
|
|
|
var Catalog = (function CatalogClosure() {
|
|
function Catalog(pdfManager, xref) {
|
|
this.pdfManager = pdfManager;
|
|
this.xref = xref;
|
|
this.catDict = xref.getCatalogObj();
|
|
this.fontCache = new RefSetCache();
|
|
assert(isDict(this.catDict),
|
|
'catalog object is not a dictionary');
|
|
|
|
this.pagePromises = [];
|
|
}
|
|
|
|
Catalog.prototype = {
|
|
get metadata() {
|
|
var streamRef = this.catDict.getRaw('Metadata');
|
|
if (!isRef(streamRef)) {
|
|
return shadow(this, 'metadata', null);
|
|
}
|
|
|
|
var encryptMetadata = (!this.xref.encrypt ? false :
|
|
this.xref.encrypt.encryptMetadata);
|
|
|
|
var stream = this.xref.fetch(streamRef, !encryptMetadata);
|
|
var metadata;
|
|
if (stream && isDict(stream.dict)) {
|
|
var type = stream.dict.get('Type');
|
|
var subtype = stream.dict.get('Subtype');
|
|
|
|
if (isName(type) && isName(subtype) &&
|
|
type.name === 'Metadata' && subtype.name === 'XML') {
|
|
// XXX: This should examine the charset the XML document defines,
|
|
// however since there are currently no real means to decode
|
|
// arbitrary charsets, let's just hope that the author of the PDF
|
|
// was reasonable enough to stick with the XML default charset,
|
|
// which is UTF-8.
|
|
try {
|
|
metadata = stringToUTF8String(bytesToString(stream.getBytes()));
|
|
} catch (e) {
|
|
info('Skipping invalid metadata.');
|
|
}
|
|
}
|
|
}
|
|
|
|
return shadow(this, 'metadata', metadata);
|
|
},
|
|
get toplevelPagesDict() {
|
|
var pagesObj = this.catDict.get('Pages');
|
|
assert(isDict(pagesObj), 'invalid top-level pages dictionary');
|
|
// shadow the prototype getter
|
|
return shadow(this, 'toplevelPagesDict', pagesObj);
|
|
},
|
|
get documentOutline() {
|
|
var obj = null;
|
|
try {
|
|
obj = this.readDocumentOutline();
|
|
} catch (ex) {
|
|
if (ex instanceof MissingDataException) {
|
|
throw ex;
|
|
}
|
|
warn('Unable to read document outline');
|
|
}
|
|
return shadow(this, 'documentOutline', obj);
|
|
},
|
|
readDocumentOutline: function Catalog_readDocumentOutline() {
|
|
var xref = this.xref;
|
|
var obj = this.catDict.get('Outlines');
|
|
var root = { items: [] };
|
|
if (isDict(obj)) {
|
|
obj = obj.getRaw('First');
|
|
var processed = new RefSet();
|
|
if (isRef(obj)) {
|
|
var queue = [{obj: obj, parent: root}];
|
|
// to avoid recursion keeping track of the items
|
|
// in the processed dictionary
|
|
processed.put(obj);
|
|
while (queue.length > 0) {
|
|
var i = queue.shift();
|
|
var outlineDict = xref.fetchIfRef(i.obj);
|
|
if (outlineDict === null) {
|
|
continue;
|
|
}
|
|
if (!outlineDict.has('Title')) {
|
|
error('Invalid outline item');
|
|
}
|
|
var dest = outlineDict.get('A');
|
|
if (dest) {
|
|
dest = dest.get('D');
|
|
} else if (outlineDict.has('Dest')) {
|
|
dest = outlineDict.getRaw('Dest');
|
|
if (isName(dest)) {
|
|
dest = dest.name;
|
|
}
|
|
}
|
|
var title = outlineDict.get('Title');
|
|
var outlineItem = {
|
|
dest: dest,
|
|
title: stringToPDFString(title),
|
|
color: outlineDict.get('C') || [0, 0, 0],
|
|
count: outlineDict.get('Count'),
|
|
bold: !!(outlineDict.get('F') & 2),
|
|
italic: !!(outlineDict.get('F') & 1),
|
|
items: []
|
|
};
|
|
i.parent.items.push(outlineItem);
|
|
obj = outlineDict.getRaw('First');
|
|
if (isRef(obj) && !processed.has(obj)) {
|
|
queue.push({obj: obj, parent: outlineItem});
|
|
processed.put(obj);
|
|
}
|
|
obj = outlineDict.getRaw('Next');
|
|
if (isRef(obj) && !processed.has(obj)) {
|
|
queue.push({obj: obj, parent: i.parent});
|
|
processed.put(obj);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return (root.items.length > 0 ? root.items : null);
|
|
},
|
|
get numPages() {
|
|
var obj = this.toplevelPagesDict.get('Count');
|
|
assert(
|
|
isInt(obj),
|
|
'page count in top level pages object is not an integer'
|
|
);
|
|
// shadow the prototype getter
|
|
return shadow(this, 'num', obj);
|
|
},
|
|
get destinations() {
|
|
function fetchDestination(dest) {
|
|
return isDict(dest) ? dest.get('D') : dest;
|
|
}
|
|
|
|
var xref = this.xref;
|
|
var dests = {}, nameTreeRef, nameDictionaryRef;
|
|
var obj = this.catDict.get('Names');
|
|
if (obj && obj.has('Dests')) {
|
|
nameTreeRef = obj.getRaw('Dests');
|
|
} else if (this.catDict.has('Dests')) {
|
|
nameDictionaryRef = this.catDict.get('Dests');
|
|
}
|
|
|
|
if (nameDictionaryRef) {
|
|
// reading simple destination dictionary
|
|
obj = nameDictionaryRef;
|
|
obj.forEach(function catalogForEach(key, value) {
|
|
if (!value) {
|
|
return;
|
|
}
|
|
dests[key] = fetchDestination(value);
|
|
});
|
|
}
|
|
if (nameTreeRef) {
|
|
var nameTree = new NameTree(nameTreeRef, xref);
|
|
var names = nameTree.getAll();
|
|
for (var name in names) {
|
|
if (!names.hasOwnProperty(name)) {
|
|
continue;
|
|
}
|
|
dests[name] = fetchDestination(names[name]);
|
|
}
|
|
}
|
|
return shadow(this, 'destinations', dests);
|
|
},
|
|
get attachments() {
|
|
var xref = this.xref;
|
|
var attachments = null, nameTreeRef;
|
|
var obj = this.catDict.get('Names');
|
|
if (obj) {
|
|
nameTreeRef = obj.getRaw('EmbeddedFiles');
|
|
}
|
|
|
|
if (nameTreeRef) {
|
|
var nameTree = new NameTree(nameTreeRef, xref);
|
|
var names = nameTree.getAll();
|
|
for (var name in names) {
|
|
if (!names.hasOwnProperty(name)) {
|
|
continue;
|
|
}
|
|
var fs = new FileSpec(names[name], xref);
|
|
if (!attachments) {
|
|
attachments = {};
|
|
}
|
|
attachments[stringToPDFString(name)] = fs.serializable;
|
|
}
|
|
}
|
|
return shadow(this, 'attachments', attachments);
|
|
},
|
|
get javaScript() {
|
|
var xref = this.xref;
|
|
var obj = this.catDict.get('Names');
|
|
|
|
var javaScript = [];
|
|
if (obj && obj.has('JavaScript')) {
|
|
var nameTree = new NameTree(obj.getRaw('JavaScript'), xref);
|
|
var names = nameTree.getAll();
|
|
for (var name in names) {
|
|
if (!names.hasOwnProperty(name)) {
|
|
continue;
|
|
}
|
|
// We don't really use the JavaScript right now. This code is
|
|
// defensive so we don't cause errors on document load.
|
|
var jsDict = names[name];
|
|
if (!isDict(jsDict)) {
|
|
continue;
|
|
}
|
|
var type = jsDict.get('S');
|
|
if (!isName(type) || type.name !== 'JavaScript') {
|
|
continue;
|
|
}
|
|
var js = jsDict.get('JS');
|
|
if (!isString(js) && !isStream(js)) {
|
|
continue;
|
|
}
|
|
if (isStream(js)) {
|
|
js = bytesToString(js.getBytes());
|
|
}
|
|
javaScript.push(stringToPDFString(js));
|
|
}
|
|
}
|
|
|
|
// Append OpenAction actions to javaScript array
|
|
var openactionDict = this.catDict.get('OpenAction');
|
|
if (isDict(openactionDict)) {
|
|
var objType = openactionDict.get('Type');
|
|
var actionType = openactionDict.get('S');
|
|
var action = openactionDict.get('N');
|
|
var isPrintAction = (isName(objType) && objType.name === 'Action' &&
|
|
isName(actionType) && actionType.name === 'Named' &&
|
|
isName(action) && action.name === 'Print');
|
|
|
|
if (isPrintAction) {
|
|
javaScript.push('print(true);');
|
|
}
|
|
}
|
|
|
|
return shadow(this, 'javaScript', javaScript);
|
|
},
|
|
|
|
cleanup: function Catalog_cleanup() {
|
|
var promises = [];
|
|
this.fontCache.forEach(function (promise) {
|
|
promises.push(promise);
|
|
});
|
|
return Promise.all(promises).then(function (translatedFonts) {
|
|
for (var i = 0, ii = translatedFonts.length; i < ii; i++) {
|
|
var font = translatedFonts[i].dict;
|
|
delete font.translated;
|
|
}
|
|
this.fontCache.clear();
|
|
}.bind(this));
|
|
},
|
|
|
|
getPage: function Catalog_getPage(pageIndex) {
|
|
if (!(pageIndex in this.pagePromises)) {
|
|
this.pagePromises[pageIndex] = this.getPageDict(pageIndex).then(
|
|
function (a) {
|
|
var dict = a[0];
|
|
var ref = a[1];
|
|
return new Page(this.pdfManager, this.xref, pageIndex, dict, ref,
|
|
this.fontCache);
|
|
}.bind(this)
|
|
);
|
|
}
|
|
return this.pagePromises[pageIndex];
|
|
},
|
|
|
|
getPageDict: function Catalog_getPageDict(pageIndex) {
|
|
var capability = createPromiseCapability();
|
|
var nodesToVisit = [this.catDict.getRaw('Pages')];
|
|
var currentPageIndex = 0;
|
|
var xref = this.xref;
|
|
|
|
function next() {
|
|
while (nodesToVisit.length) {
|
|
var currentNode = nodesToVisit.pop();
|
|
|
|
if (isRef(currentNode)) {
|
|
xref.fetchAsync(currentNode).then(function (obj) {
|
|
if ((isDict(obj, 'Page') || (isDict(obj) && !obj.has('Kids')))) {
|
|
if (pageIndex === currentPageIndex) {
|
|
capability.resolve([obj, currentNode]);
|
|
} else {
|
|
currentPageIndex++;
|
|
next();
|
|
}
|
|
return;
|
|
}
|
|
nodesToVisit.push(obj);
|
|
next();
|
|
}.bind(this), capability.reject.bind(capability));
|
|
return;
|
|
}
|
|
|
|
// must be a child page dictionary
|
|
assert(
|
|
isDict(currentNode),
|
|
'page dictionary kid reference points to wrong type of object'
|
|
);
|
|
var count = currentNode.get('Count');
|
|
// Skip nodes where the page can't be.
|
|
if (currentPageIndex + count <= pageIndex) {
|
|
currentPageIndex += count;
|
|
continue;
|
|
}
|
|
|
|
var kids = currentNode.get('Kids');
|
|
assert(isArray(kids), 'page dictionary kids object is not an array');
|
|
if (count === kids.length) {
|
|
// Nodes that don't have the page have been skipped and this is the
|
|
// bottom of the tree which means the page requested must be a
|
|
// descendant of this pages node. Ideally we would just resolve the
|
|
// promise with the page ref here, but there is the case where more
|
|
// pages nodes could link to single a page (see issue 3666 pdf). To
|
|
// handle this push it back on the queue so if it is a pages node it
|
|
// will be descended into.
|
|
nodesToVisit = [kids[pageIndex - currentPageIndex]];
|
|
currentPageIndex = pageIndex;
|
|
continue;
|
|
} else {
|
|
for (var last = kids.length - 1; last >= 0; last--) {
|
|
nodesToVisit.push(kids[last]);
|
|
}
|
|
}
|
|
}
|
|
capability.reject('Page index ' + pageIndex + ' not found.');
|
|
}
|
|
next();
|
|
return capability.promise;
|
|
},
|
|
|
|
getPageIndex: function Catalog_getPageIndex(ref) {
|
|
// The page tree nodes have the count of all the leaves below them. To get
|
|
// how many pages are before we just have to walk up the tree and keep
|
|
// adding the count of siblings to the left of the node.
|
|
var xref = this.xref;
|
|
function pagesBeforeRef(kidRef) {
|
|
var total = 0;
|
|
var parentRef;
|
|
return xref.fetchAsync(kidRef).then(function (node) {
|
|
if (!node) {
|
|
return null;
|
|
}
|
|
parentRef = node.getRaw('Parent');
|
|
return node.getAsync('Parent');
|
|
}).then(function (parent) {
|
|
if (!parent) {
|
|
return null;
|
|
}
|
|
return parent.getAsync('Kids');
|
|
}).then(function (kids) {
|
|
if (!kids) {
|
|
return null;
|
|
}
|
|
var kidPromises = [];
|
|
var found = false;
|
|
for (var i = 0; i < kids.length; i++) {
|
|
var kid = kids[i];
|
|
assert(isRef(kid), 'kids must be a ref');
|
|
if (kid.num == kidRef.num) {
|
|
found = true;
|
|
break;
|
|
}
|
|
kidPromises.push(xref.fetchAsync(kid).then(function (kid) {
|
|
if (kid.has('Count')) {
|
|
var count = kid.get('Count');
|
|
total += count;
|
|
} else { // page leaf node
|
|
total++;
|
|
}
|
|
}));
|
|
}
|
|
if (!found) {
|
|
error('kid ref not found in parents kids');
|
|
}
|
|
return Promise.all(kidPromises).then(function () {
|
|
return [total, parentRef];
|
|
});
|
|
});
|
|
}
|
|
|
|
var total = 0;
|
|
function next(ref) {
|
|
return pagesBeforeRef(ref).then(function (args) {
|
|
if (!args) {
|
|
return total;
|
|
}
|
|
var count = args[0];
|
|
var parentRef = args[1];
|
|
total += count;
|
|
return next(parentRef);
|
|
});
|
|
}
|
|
|
|
return next(ref);
|
|
}
|
|
};
|
|
|
|
return Catalog;
|
|
})();
|
|
|
|
var XRef = (function XRefClosure() {
|
|
function XRef(stream, password) {
|
|
this.stream = stream;
|
|
this.entries = [];
|
|
this.xrefstms = {};
|
|
// prepare the XRef cache
|
|
this.cache = [];
|
|
this.password = password;
|
|
this.stats = {
|
|
streamTypes: [],
|
|
fontTypes: []
|
|
};
|
|
}
|
|
|
|
XRef.prototype = {
|
|
setStartXRef: function XRef_setStartXRef(startXRef) {
|
|
// Store the starting positions of xref tables as we process them
|
|
// so we can recover from missing data errors
|
|
this.startXRefQueue = [startXRef];
|
|
},
|
|
|
|
parse: function XRef_parse(recoveryMode) {
|
|
var trailerDict;
|
|
if (!recoveryMode) {
|
|
trailerDict = this.readXRef();
|
|
} else {
|
|
warn('Indexing all PDF objects');
|
|
trailerDict = this.indexObjects();
|
|
}
|
|
trailerDict.assignXref(this);
|
|
this.trailer = trailerDict;
|
|
var encrypt = trailerDict.get('Encrypt');
|
|
if (encrypt) {
|
|
var ids = trailerDict.get('ID');
|
|
var fileId = (ids && ids.length) ? ids[0] : '';
|
|
this.encrypt = new CipherTransformFactory(encrypt, fileId,
|
|
this.password);
|
|
}
|
|
|
|
// get the root dictionary (catalog) object
|
|
if (!(this.root = trailerDict.get('Root'))) {
|
|
error('Invalid root reference');
|
|
}
|
|
},
|
|
|
|
processXRefTable: function XRef_processXRefTable(parser) {
|
|
if (!('tableState' in this)) {
|
|
// Stores state of the table as we process it so we can resume
|
|
// from middle of table in case of missing data error
|
|
this.tableState = {
|
|
entryNum: 0,
|
|
streamPos: parser.lexer.stream.pos,
|
|
parserBuf1: parser.buf1,
|
|
parserBuf2: parser.buf2
|
|
};
|
|
}
|
|
|
|
var obj = this.readXRefTable(parser);
|
|
|
|
// Sanity check
|
|
if (!isCmd(obj, 'trailer')) {
|
|
error('Invalid XRef table: could not find trailer dictionary');
|
|
}
|
|
// Read trailer dictionary, e.g.
|
|
// trailer
|
|
// << /Size 22
|
|
// /Root 20R
|
|
// /Info 10R
|
|
// /ID [ <81b14aafa313db63dbd6f981e49f94f4> ]
|
|
// >>
|
|
// The parser goes through the entire stream << ... >> and provides
|
|
// a getter interface for the key-value table
|
|
var dict = parser.getObj();
|
|
|
|
// The pdflib PDF generator can generate a nested trailer dictionary
|
|
if (!isDict(dict) && dict.dict) {
|
|
dict = dict.dict;
|
|
}
|
|
if (!isDict(dict)) {
|
|
error('Invalid XRef table: could not parse trailer dictionary');
|
|
}
|
|
delete this.tableState;
|
|
|
|
return dict;
|
|
},
|
|
|
|
readXRefTable: function XRef_readXRefTable(parser) {
|
|
// Example of cross-reference table:
|
|
// xref
|
|
// 0 1 <-- subsection header (first obj #, obj count)
|
|
// 0000000000 65535 f <-- actual object (offset, generation #, f/n)
|
|
// 23 2 <-- subsection header ... and so on ...
|
|
// 0000025518 00002 n
|
|
// 0000025635 00000 n
|
|
// trailer
|
|
// ...
|
|
|
|
var stream = parser.lexer.stream;
|
|
var tableState = this.tableState;
|
|
stream.pos = tableState.streamPos;
|
|
parser.buf1 = tableState.parserBuf1;
|
|
parser.buf2 = tableState.parserBuf2;
|
|
|
|
// Outer loop is over subsection headers
|
|
var obj;
|
|
|
|
while (true) {
|
|
if (!('firstEntryNum' in tableState) || !('entryCount' in tableState)) {
|
|
if (isCmd(obj = parser.getObj(), 'trailer')) {
|
|
break;
|
|
}
|
|
tableState.firstEntryNum = obj;
|
|
tableState.entryCount = parser.getObj();
|
|
}
|
|
|
|
var first = tableState.firstEntryNum;
|
|
var count = tableState.entryCount;
|
|
if (!isInt(first) || !isInt(count)) {
|
|
error('Invalid XRef table: wrong types in subsection header');
|
|
}
|
|
// Inner loop is over objects themselves
|
|
for (var i = tableState.entryNum; i < count; i++) {
|
|
tableState.streamPos = stream.pos;
|
|
tableState.entryNum = i;
|
|
tableState.parserBuf1 = parser.buf1;
|
|
tableState.parserBuf2 = parser.buf2;
|
|
|
|
var entry = {};
|
|
entry.offset = parser.getObj();
|
|
entry.gen = parser.getObj();
|
|
var type = parser.getObj();
|
|
|
|
if (isCmd(type, 'f')) {
|
|
entry.free = true;
|
|
} else if (isCmd(type, 'n')) {
|
|
entry.uncompressed = true;
|
|
}
|
|
|
|
// Validate entry obj
|
|
if (!isInt(entry.offset) || !isInt(entry.gen) ||
|
|
!(entry.free || entry.uncompressed)) {
|
|
console.log(entry.offset, entry.gen, entry.free,
|
|
entry.uncompressed);
|
|
error('Invalid entry in XRef subsection: ' + first + ', ' + count);
|
|
}
|
|
|
|
if (!this.entries[i + first]) {
|
|
this.entries[i + first] = entry;
|
|
}
|
|
}
|
|
|
|
tableState.entryNum = 0;
|
|
tableState.streamPos = stream.pos;
|
|
tableState.parserBuf1 = parser.buf1;
|
|
tableState.parserBuf2 = parser.buf2;
|
|
delete tableState.firstEntryNum;
|
|
delete tableState.entryCount;
|
|
}
|
|
|
|
// Per issue 3248: hp scanners generate bad XRef
|
|
if (first === 1 && this.entries[1] && this.entries[1].free) {
|
|
// shifting the entries
|
|
this.entries.shift();
|
|
}
|
|
|
|
// Sanity check: as per spec, first object must be free
|
|
if (this.entries[0] && !this.entries[0].free) {
|
|
error('Invalid XRef table: unexpected first object');
|
|
}
|
|
return obj;
|
|
},
|
|
|
|
processXRefStream: function XRef_processXRefStream(stream) {
|
|
if (!('streamState' in this)) {
|
|
// Stores state of the stream as we process it so we can resume
|
|
// from middle of stream in case of missing data error
|
|
var streamParameters = stream.dict;
|
|
var byteWidths = streamParameters.get('W');
|
|
var range = streamParameters.get('Index');
|
|
if (!range) {
|
|
range = [0, streamParameters.get('Size')];
|
|
}
|
|
|
|
this.streamState = {
|
|
entryRanges: range,
|
|
byteWidths: byteWidths,
|
|
entryNum: 0,
|
|
streamPos: stream.pos
|
|
};
|
|
}
|
|
this.readXRefStream(stream);
|
|
delete this.streamState;
|
|
|
|
return stream.dict;
|
|
},
|
|
|
|
readXRefStream: function XRef_readXRefStream(stream) {
|
|
var i, j;
|
|
var streamState = this.streamState;
|
|
stream.pos = streamState.streamPos;
|
|
|
|
var byteWidths = streamState.byteWidths;
|
|
var typeFieldWidth = byteWidths[0];
|
|
var offsetFieldWidth = byteWidths[1];
|
|
var generationFieldWidth = byteWidths[2];
|
|
|
|
var entryRanges = streamState.entryRanges;
|
|
while (entryRanges.length > 0) {
|
|
var first = entryRanges[0];
|
|
var n = entryRanges[1];
|
|
|
|
if (!isInt(first) || !isInt(n)) {
|
|
error('Invalid XRef range fields: ' + first + ', ' + n);
|
|
}
|
|
if (!isInt(typeFieldWidth) || !isInt(offsetFieldWidth) ||
|
|
!isInt(generationFieldWidth)) {
|
|
error('Invalid XRef entry fields length: ' + first + ', ' + n);
|
|
}
|
|
for (i = streamState.entryNum; i < n; ++i) {
|
|
streamState.entryNum = i;
|
|
streamState.streamPos = stream.pos;
|
|
|
|
var type = 0, offset = 0, generation = 0;
|
|
for (j = 0; j < typeFieldWidth; ++j) {
|
|
type = (type << 8) | stream.getByte();
|
|
}
|
|
// if type field is absent, its default value is 1
|
|
if (typeFieldWidth === 0) {
|
|
type = 1;
|
|
}
|
|
for (j = 0; j < offsetFieldWidth; ++j) {
|
|
offset = (offset << 8) | stream.getByte();
|
|
}
|
|
for (j = 0; j < generationFieldWidth; ++j) {
|
|
generation = (generation << 8) | stream.getByte();
|
|
}
|
|
var entry = {};
|
|
entry.offset = offset;
|
|
entry.gen = generation;
|
|
switch (type) {
|
|
case 0:
|
|
entry.free = true;
|
|
break;
|
|
case 1:
|
|
entry.uncompressed = true;
|
|
break;
|
|
case 2:
|
|
break;
|
|
default:
|
|
error('Invalid XRef entry type: ' + type);
|
|
}
|
|
if (!this.entries[first + i]) {
|
|
this.entries[first + i] = entry;
|
|
}
|
|
}
|
|
|
|
streamState.entryNum = 0;
|
|
streamState.streamPos = stream.pos;
|
|
entryRanges.splice(0, 2);
|
|
}
|
|
},
|
|
|
|
indexObjects: function XRef_indexObjects() {
|
|
// Simple scan through the PDF content to find objects,
|
|
// trailers and XRef streams.
|
|
function readToken(data, offset) {
|
|
var token = '', ch = data[offset];
|
|
while (ch !== 13 && ch !== 10) {
|
|
if (++offset >= data.length) {
|
|
break;
|
|
}
|
|
token += String.fromCharCode(ch);
|
|
ch = data[offset];
|
|
}
|
|
return token;
|
|
}
|
|
function skipUntil(data, offset, what) {
|
|
var length = what.length, dataLength = data.length;
|
|
var skipped = 0;
|
|
// finding byte sequence
|
|
while (offset < dataLength) {
|
|
var i = 0;
|
|
while (i < length && data[offset + i] == what[i]) {
|
|
++i;
|
|
}
|
|
if (i >= length) {
|
|
break; // sequence found
|
|
}
|
|
offset++;
|
|
skipped++;
|
|
}
|
|
return skipped;
|
|
}
|
|
var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]);
|
|
var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114,
|
|
101, 102]);
|
|
var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]);
|
|
var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
|
|
|
|
var stream = this.stream;
|
|
stream.pos = 0;
|
|
var buffer = stream.getBytes();
|
|
var position = stream.start, length = buffer.length;
|
|
var trailers = [], xrefStms = [];
|
|
while (position < length) {
|
|
var ch = buffer[position];
|
|
if (ch === 32 || ch === 9 || ch === 13 || ch === 10) {
|
|
++position;
|
|
continue;
|
|
}
|
|
if (ch === 37) { // %-comment
|
|
do {
|
|
++position;
|
|
if (position >= length) {
|
|
break;
|
|
}
|
|
ch = buffer[position];
|
|
} while (ch !== 13 && ch !== 10);
|
|
continue;
|
|
}
|
|
var token = readToken(buffer, position);
|
|
var m;
|
|
if (token === 'xref') {
|
|
position += skipUntil(buffer, position, trailerBytes);
|
|
trailers.push(position);
|
|
position += skipUntil(buffer, position, startxrefBytes);
|
|
} else if ((m = /^(\d+)\s+(\d+)\s+obj\b/.exec(token))) {
|
|
this.entries[m[1]] = {
|
|
offset: position,
|
|
gen: m[2] | 0,
|
|
uncompressed: true
|
|
};
|
|
|
|
var contentLength = skipUntil(buffer, position, endobjBytes) + 7;
|
|
var content = buffer.subarray(position, position + contentLength);
|
|
|
|
// checking XRef stream suspect
|
|
// (it shall have '/XRef' and next char is not a letter)
|
|
var xrefTagOffset = skipUntil(content, 0, xrefBytes);
|
|
if (xrefTagOffset < contentLength &&
|
|
content[xrefTagOffset + 5] < 64) {
|
|
xrefStms.push(position);
|
|
this.xrefstms[position] = 1; // don't read it recursively
|
|
}
|
|
|
|
position += contentLength;
|
|
} else {
|
|
position += token.length + 1;
|
|
}
|
|
}
|
|
// reading XRef streams
|
|
var i, ii;
|
|
for (i = 0, ii = xrefStms.length; i < ii; ++i) {
|
|
this.startXRefQueue.push(xrefStms[i]);
|
|
this.readXRef(/* recoveryMode */ true);
|
|
}
|
|
// finding main trailer
|
|
var dict;
|
|
for (i = 0, ii = trailers.length; i < ii; ++i) {
|
|
stream.pos = trailers[i];
|
|
var parser = new Parser(new Lexer(stream), true, this);
|
|
var obj = parser.getObj();
|
|
if (!isCmd(obj, 'trailer')) {
|
|
continue;
|
|
}
|
|
// read the trailer dictionary
|
|
if (!isDict(dict = parser.getObj())) {
|
|
continue;
|
|
}
|
|
// taking the first one with 'ID'
|
|
if (dict.has('ID')) {
|
|
return dict;
|
|
}
|
|
}
|
|
// no tailer with 'ID', taking last one (if exists)
|
|
if (dict) {
|
|
return dict;
|
|
}
|
|
// nothing helps
|
|
// calling error() would reject worker with an UnknownErrorException.
|
|
throw new InvalidPDFException('Invalid PDF structure');
|
|
},
|
|
|
|
readXRef: function XRef_readXRef(recoveryMode) {
|
|
var stream = this.stream;
|
|
|
|
try {
|
|
while (this.startXRefQueue.length) {
|
|
var startXRef = this.startXRefQueue[0];
|
|
|
|
stream.pos = startXRef + stream.start;
|
|
|
|
var parser = new Parser(new Lexer(stream), true, this);
|
|
var obj = parser.getObj();
|
|
var dict;
|
|
|
|
// Get dictionary
|
|
if (isCmd(obj, 'xref')) {
|
|
// Parse end-of-file XRef
|
|
dict = this.processXRefTable(parser);
|
|
if (!this.topDict) {
|
|
this.topDict = dict;
|
|
}
|
|
|
|
// Recursively get other XRefs 'XRefStm', if any
|
|
obj = dict.get('XRefStm');
|
|
if (isInt(obj)) {
|
|
var pos = obj;
|
|
// ignore previously loaded xref streams
|
|
// (possible infinite recursion)
|
|
if (!(pos in this.xrefstms)) {
|
|
this.xrefstms[pos] = 1;
|
|
this.startXRefQueue.push(pos);
|
|
}
|
|
}
|
|
} else if (isInt(obj)) {
|
|
// Parse in-stream XRef
|
|
if (!isInt(parser.getObj()) ||
|
|
!isCmd(parser.getObj(), 'obj') ||
|
|
!isStream(obj = parser.getObj())) {
|
|
error('Invalid XRef stream');
|
|
}
|
|
dict = this.processXRefStream(obj);
|
|
if (!this.topDict) {
|
|
this.topDict = dict;
|
|
}
|
|
if (!dict) {
|
|
error('Failed to read XRef stream');
|
|
}
|
|
} else {
|
|
error('Invalid XRef stream header');
|
|
}
|
|
|
|
// Recursively get previous dictionary, if any
|
|
obj = dict.get('Prev');
|
|
if (isInt(obj)) {
|
|
this.startXRefQueue.push(obj);
|
|
} else if (isRef(obj)) {
|
|
// The spec says Prev must not be a reference, i.e. "/Prev NNN"
|
|
// This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
|
|
this.startXRefQueue.push(obj.num);
|
|
}
|
|
|
|
this.startXRefQueue.shift();
|
|
}
|
|
|
|
return this.topDict;
|
|
} catch (e) {
|
|
if (e instanceof MissingDataException) {
|
|
throw e;
|
|
}
|
|
info('(while reading XRef): ' + e);
|
|
}
|
|
|
|
if (recoveryMode) {
|
|
return;
|
|
}
|
|
throw new XRefParseException();
|
|
},
|
|
|
|
getEntry: function XRef_getEntry(i) {
|
|
var xrefEntry = this.entries[i];
|
|
if (xrefEntry && !xrefEntry.free && xrefEntry.offset) {
|
|
return xrefEntry;
|
|
}
|
|
return null;
|
|
},
|
|
|
|
fetchIfRef: function XRef_fetchIfRef(obj) {
|
|
if (!isRef(obj)) {
|
|
return obj;
|
|
}
|
|
return this.fetch(obj);
|
|
},
|
|
|
|
fetch: function XRef_fetch(ref, suppressEncryption) {
|
|
assert(isRef(ref), 'ref object is not a reference');
|
|
var num = ref.num;
|
|
if (num in this.cache) {
|
|
var cacheEntry = this.cache[num];
|
|
return cacheEntry;
|
|
}
|
|
|
|
var xrefEntry = this.getEntry(num);
|
|
|
|
// the referenced entry can be free
|
|
if (xrefEntry === null) {
|
|
return (this.cache[num] = null);
|
|
}
|
|
|
|
if (xrefEntry.uncompressed) {
|
|
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
|
|
} else {
|
|
xrefEntry = this.fetchCompressed(xrefEntry, suppressEncryption);
|
|
}
|
|
if (isDict(xrefEntry)){
|
|
xrefEntry.objId = ref.toString();
|
|
} else if (isStream(xrefEntry)) {
|
|
xrefEntry.dict.objId = ref.toString();
|
|
}
|
|
return xrefEntry;
|
|
},
|
|
|
|
fetchUncompressed: function XRef_fetchUncompressed(ref, xrefEntry,
|
|
suppressEncryption) {
|
|
var gen = ref.gen;
|
|
var num = ref.num;
|
|
if (xrefEntry.gen !== gen) {
|
|
error('inconsistent generation in XRef');
|
|
}
|
|
var stream = this.stream.makeSubStream(xrefEntry.offset +
|
|
this.stream.start);
|
|
var parser = new Parser(new Lexer(stream), true, this);
|
|
var obj1 = parser.getObj();
|
|
var obj2 = parser.getObj();
|
|
var obj3 = parser.getObj();
|
|
if (!isInt(obj1) || parseInt(obj1, 10) !== num ||
|
|
!isInt(obj2) || parseInt(obj2, 10) !== gen ||
|
|
!isCmd(obj3)) {
|
|
error('bad XRef entry');
|
|
}
|
|
if (!isCmd(obj3, 'obj')) {
|
|
// some bad PDFs use "obj1234" and really mean 1234
|
|
if (obj3.cmd.indexOf('obj') === 0) {
|
|
num = parseInt(obj3.cmd.substring(3), 10);
|
|
if (!isNaN(num)) {
|
|
return num;
|
|
}
|
|
}
|
|
error('bad XRef entry');
|
|
}
|
|
if (this.encrypt && !suppressEncryption) {
|
|
xrefEntry = parser.getObj(this.encrypt.createCipherTransform(num, gen));
|
|
} else {
|
|
xrefEntry = parser.getObj();
|
|
}
|
|
if (!isStream(xrefEntry)) {
|
|
this.cache[num] = xrefEntry;
|
|
}
|
|
return xrefEntry;
|
|
},
|
|
|
|
fetchCompressed: function XRef_fetchCompressed(xrefEntry,
|
|
suppressEncryption) {
|
|
var tableOffset = xrefEntry.offset;
|
|
var stream = this.fetch(new Ref(tableOffset, 0));
|
|
if (!isStream(stream)) {
|
|
error('bad ObjStm stream');
|
|
}
|
|
var first = stream.dict.get('First');
|
|
var n = stream.dict.get('N');
|
|
if (!isInt(first) || !isInt(n)) {
|
|
error('invalid first and n parameters for ObjStm stream');
|
|
}
|
|
var parser = new Parser(new Lexer(stream), false, this);
|
|
parser.allowStreams = true;
|
|
var i, entries = [], num, nums = [];
|
|
// read the object numbers to populate cache
|
|
for (i = 0; i < n; ++i) {
|
|
num = parser.getObj();
|
|
if (!isInt(num)) {
|
|
error('invalid object number in the ObjStm stream: ' + num);
|
|
}
|
|
nums.push(num);
|
|
var offset = parser.getObj();
|
|
if (!isInt(offset)) {
|
|
error('invalid object offset in the ObjStm stream: ' + offset);
|
|
}
|
|
}
|
|
// read stream objects for cache
|
|
for (i = 0; i < n; ++i) {
|
|
entries.push(parser.getObj());
|
|
num = nums[i];
|
|
var entry = this.entries[num];
|
|
if (entry && entry.offset === tableOffset && entry.gen === i) {
|
|
this.cache[num] = entries[i];
|
|
}
|
|
}
|
|
xrefEntry = entries[xrefEntry.gen];
|
|
if (xrefEntry === undefined) {
|
|
error('bad XRef entry for compressed object');
|
|
}
|
|
return xrefEntry;
|
|
},
|
|
|
|
fetchIfRefAsync: function XRef_fetchIfRefAsync(obj) {
|
|
if (!isRef(obj)) {
|
|
return Promise.resolve(obj);
|
|
}
|
|
return this.fetchAsync(obj);
|
|
},
|
|
|
|
fetchAsync: function XRef_fetchAsync(ref, suppressEncryption) {
|
|
return new Promise(function (resolve, reject) {
|
|
var tryFetch = function () {
|
|
try {
|
|
resolve(this.fetch(ref, suppressEncryption));
|
|
} catch (e) {
|
|
if (e instanceof MissingDataException) {
|
|
this.stream.manager.requestRange(e.begin, e.end, tryFetch);
|
|
return;
|
|
}
|
|
reject(e);
|
|
}
|
|
}.bind(this);
|
|
tryFetch();
|
|
}.bind(this));
|
|
},
|
|
|
|
getCatalogObj: function XRef_getCatalogObj() {
|
|
return this.root;
|
|
}
|
|
};
|
|
|
|
return XRef;
|
|
})();
|
|
|
|
/**
|
|
* A NameTree is like a Dict but has some advantageous properties, see the
|
|
* spec (7.9.6) for more details.
|
|
* TODO: implement all the Dict functions and make this more efficent.
|
|
*/
|
|
var NameTree = (function NameTreeClosure() {
|
|
function NameTree(root, xref) {
|
|
this.root = root;
|
|
this.xref = xref;
|
|
}
|
|
|
|
NameTree.prototype = {
|
|
getAll: function NameTree_getAll() {
|
|
var dict = {};
|
|
if (!this.root) {
|
|
return dict;
|
|
}
|
|
var xref = this.xref;
|
|
// reading name tree
|
|
var processed = new RefSet();
|
|
processed.put(this.root);
|
|
var queue = [this.root];
|
|
while (queue.length > 0) {
|
|
var i, n;
|
|
var obj = xref.fetchIfRef(queue.shift());
|
|
if (!isDict(obj)) {
|
|
continue;
|
|
}
|
|
if (obj.has('Kids')) {
|
|
var kids = obj.get('Kids');
|
|
for (i = 0, n = kids.length; i < n; i++) {
|
|
var kid = kids[i];
|
|
if (processed.has(kid)) {
|
|
error('invalid destinations');
|
|
}
|
|
queue.push(kid);
|
|
processed.put(kid);
|
|
}
|
|
continue;
|
|
}
|
|
var names = obj.get('Names');
|
|
if (names) {
|
|
for (i = 0, n = names.length; i < n; i += 2) {
|
|
dict[names[i]] = xref.fetchIfRef(names[i + 1]);
|
|
}
|
|
}
|
|
}
|
|
return dict;
|
|
}
|
|
};
|
|
return NameTree;
|
|
})();
|
|
|
|
/**
|
|
* "A PDF file can refer to the contents of another file by using a File
|
|
* Specification (PDF 1.1)", see the spec (7.11) for more details.
|
|
* NOTE: Only embedded files are supported (as part of the attachments support)
|
|
* TODO: support the 'URL' file system (with caching if !/V), portable
|
|
* collections attributes and related files (/RF)
|
|
*/
|
|
var FileSpec = (function FileSpecClosure() {
|
|
function FileSpec(root, xref) {
|
|
if (!root || !isDict(root)) {
|
|
return;
|
|
}
|
|
this.xref = xref;
|
|
this.root = root;
|
|
if (root.has('FS')) {
|
|
this.fs = root.get('FS');
|
|
}
|
|
this.description = root.has('Desc') ?
|
|
stringToPDFString(root.get('Desc')) :
|
|
'';
|
|
if (root.has('RF')) {
|
|
warn('Related file specifications are not supported');
|
|
}
|
|
this.contentAvailable = true;
|
|
if (!root.has('EF')) {
|
|
this.contentAvailable = false;
|
|
warn('Non-embedded file specifications are not supported');
|
|
}
|
|
}
|
|
|
|
function pickPlatformItem(dict) {
|
|
// Look for the filename in this order:
|
|
// UF, F, Unix, Mac, DOS
|
|
if (dict.has('UF')) {
|
|
return dict.get('UF');
|
|
} else if (dict.has('F')) {
|
|
return dict.get('F');
|
|
} else if (dict.has('Unix')) {
|
|
return dict.get('Unix');
|
|
} else if (dict.has('Mac')) {
|
|
return dict.get('Mac');
|
|
} else if (dict.has('DOS')) {
|
|
return dict.get('DOS');
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
FileSpec.prototype = {
|
|
get filename() {
|
|
if (!this._filename && this.root) {
|
|
var filename = pickPlatformItem(this.root) || 'unnamed';
|
|
this._filename = stringToPDFString(filename).
|
|
replace(/\\\\/g, '\\').
|
|
replace(/\\\//g, '/').
|
|
replace(/\\/g, '/');
|
|
}
|
|
return this._filename;
|
|
},
|
|
get content() {
|
|
if (!this.contentAvailable) {
|
|
return null;
|
|
}
|
|
if (!this.contentRef && this.root) {
|
|
this.contentRef = pickPlatformItem(this.root.get('EF'));
|
|
}
|
|
var content = null;
|
|
if (this.contentRef) {
|
|
var xref = this.xref;
|
|
var fileObj = xref.fetchIfRef(this.contentRef);
|
|
if (fileObj && isStream(fileObj)) {
|
|
content = fileObj.getBytes();
|
|
} else {
|
|
warn('Embedded file specification points to non-existing/invalid ' +
|
|
'content');
|
|
}
|
|
} else {
|
|
warn('Embedded file specification does not have a content');
|
|
}
|
|
return content;
|
|
},
|
|
get serializable() {
|
|
return {
|
|
filename: this.filename,
|
|
content: this.content
|
|
};
|
|
}
|
|
};
|
|
return FileSpec;
|
|
})();
|
|
|
|
/**
|
|
* A helper for loading missing data in object graphs. It traverses the graph
|
|
* depth first and queues up any objects that have missing data. Once it has
|
|
* has traversed as many objects that are available it attempts to bundle the
|
|
* missing data requests and then resume from the nodes that weren't ready.
|
|
*
|
|
* NOTE: It provides protection from circular references by keeping track of
|
|
* of loaded references. However, you must be careful not to load any graphs
|
|
* that have references to the catalog or other pages since that will cause the
|
|
* entire PDF document object graph to be traversed.
|
|
*/
|
|
var ObjectLoader = (function() {
|
|
function mayHaveChildren(value) {
|
|
return isRef(value) || isDict(value) || isArray(value) || isStream(value);
|
|
}
|
|
|
|
function addChildren(node, nodesToVisit) {
|
|
var value;
|
|
if (isDict(node) || isStream(node)) {
|
|
var map;
|
|
if (isDict(node)) {
|
|
map = node.map;
|
|
} else {
|
|
map = node.dict.map;
|
|
}
|
|
for (var key in map) {
|
|
value = map[key];
|
|
if (mayHaveChildren(value)) {
|
|
nodesToVisit.push(value);
|
|
}
|
|
}
|
|
} else if (isArray(node)) {
|
|
for (var i = 0, ii = node.length; i < ii; i++) {
|
|
value = node[i];
|
|
if (mayHaveChildren(value)) {
|
|
nodesToVisit.push(value);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
function ObjectLoader(obj, keys, xref) {
|
|
this.obj = obj;
|
|
this.keys = keys;
|
|
this.xref = xref;
|
|
this.refSet = null;
|
|
}
|
|
|
|
ObjectLoader.prototype = {
|
|
load: function ObjectLoader_load() {
|
|
var keys = this.keys;
|
|
this.capability = createPromiseCapability();
|
|
// Don't walk the graph if all the data is already loaded.
|
|
if (!(this.xref.stream instanceof ChunkedStream) ||
|
|
this.xref.stream.getMissingChunks().length === 0) {
|
|
this.capability.resolve();
|
|
return this.capability.promise;
|
|
}
|
|
|
|
this.refSet = new RefSet();
|
|
// Setup the initial nodes to visit.
|
|
var nodesToVisit = [];
|
|
for (var i = 0; i < keys.length; i++) {
|
|
nodesToVisit.push(this.obj[keys[i]]);
|
|
}
|
|
|
|
this.walk(nodesToVisit);
|
|
return this.capability.promise;
|
|
},
|
|
|
|
walk: function ObjectLoader_walk(nodesToVisit) {
|
|
var nodesToRevisit = [];
|
|
var pendingRequests = [];
|
|
// DFS walk of the object graph.
|
|
while (nodesToVisit.length) {
|
|
var currentNode = nodesToVisit.pop();
|
|
|
|
// Only references or chunked streams can cause missing data exceptions.
|
|
if (isRef(currentNode)) {
|
|
// Skip nodes that have already been visited.
|
|
if (this.refSet.has(currentNode)) {
|
|
continue;
|
|
}
|
|
try {
|
|
var ref = currentNode;
|
|
this.refSet.put(ref);
|
|
currentNode = this.xref.fetch(currentNode);
|
|
} catch (e) {
|
|
if (!(e instanceof MissingDataException)) {
|
|
throw e;
|
|
}
|
|
nodesToRevisit.push(currentNode);
|
|
pendingRequests.push({ begin: e.begin, end: e.end });
|
|
}
|
|
}
|
|
if (currentNode && currentNode.getBaseStreams) {
|
|
var baseStreams = currentNode.getBaseStreams();
|
|
var foundMissingData = false;
|
|
for (var i = 0; i < baseStreams.length; i++) {
|
|
var stream = baseStreams[i];
|
|
if (stream.getMissingChunks && stream.getMissingChunks().length) {
|
|
foundMissingData = true;
|
|
pendingRequests.push({
|
|
begin: stream.start,
|
|
end: stream.end
|
|
});
|
|
}
|
|
}
|
|
if (foundMissingData) {
|
|
nodesToRevisit.push(currentNode);
|
|
}
|
|
}
|
|
|
|
addChildren(currentNode, nodesToVisit);
|
|
}
|
|
|
|
if (pendingRequests.length) {
|
|
this.xref.stream.manager.requestRanges(pendingRequests,
|
|
function pendingRequestCallback() {
|
|
nodesToVisit = nodesToRevisit;
|
|
for (var i = 0; i < nodesToRevisit.length; i++) {
|
|
var node = nodesToRevisit[i];
|
|
// Remove any reference nodes from the currrent refset so they
|
|
// aren't skipped when we revist them.
|
|
if (isRef(node)) {
|
|
this.refSet.remove(node);
|
|
}
|
|
}
|
|
this.walk(nodesToVisit);
|
|
}.bind(this));
|
|
return;
|
|
}
|
|
// Everything is loaded.
|
|
this.refSet = null;
|
|
this.capability.resolve();
|
|
}
|
|
};
|
|
|
|
return ObjectLoader;
|
|
})();
|