pdf.js/src/core/obj.js

1782 lines
57 KiB
JavaScript
Raw Normal View History

2012-09-01 07:48:21 +09:00
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2011-10-26 10:18:22 +09:00
'use strict';
(function (root, factory) {
if (typeof define === 'function' && define.amd) {
define('pdfjs/core/obj', ['exports', 'pdfjs/shared/util',
'pdfjs/core/primitives', 'pdfjs/core/crypto', 'pdfjs/core/parser',
'pdfjs/core/chunked_stream', 'pdfjs/core/colorspace'], factory);
} else if (typeof exports !== 'undefined') {
factory(exports, require('../shared/util.js'), require('./primitives.js'),
require('./crypto.js'), require('./parser.js'),
require('./chunked_stream.js'), require('./colorspace.js'));
} else {
factory((root.pdfjsCoreObj = {}), root.pdfjsSharedUtil,
root.pdfjsCorePrimitives, root.pdfjsCoreCrypto, root.pdfjsCoreParser,
root.pdfjsCoreChunkedStream, root.pdfjsCoreColorSpace);
2011-10-25 08:55:23 +09:00
}
}(this, function (exports, sharedUtil, corePrimitives, coreCrypto, coreParser,
coreChunkedStream, coreColorSpace) {
var InvalidPDFException = sharedUtil.InvalidPDFException;
var MissingDataException = sharedUtil.MissingDataException;
var XRefParseException = sharedUtil.XRefParseException;
var assert = sharedUtil.assert;
var bytesToString = sharedUtil.bytesToString;
var createPromiseCapability = sharedUtil.createPromiseCapability;
var error = sharedUtil.error;
var info = sharedUtil.info;
var isArray = sharedUtil.isArray;
var isBool = sharedUtil.isBool;
var isInt = sharedUtil.isInt;
var isString = sharedUtil.isString;
var shadow = sharedUtil.shadow;
var stringToPDFString = sharedUtil.stringToPDFString;
var stringToUTF8String = sharedUtil.stringToUTF8String;
var warn = sharedUtil.warn;
var createValidAbsoluteUrl = sharedUtil.createValidAbsoluteUrl;
2015-12-26 01:35:21 +09:00
var Util = sharedUtil.Util;
var Ref = corePrimitives.Ref;
var RefSet = corePrimitives.RefSet;
var RefSetCache = corePrimitives.RefSetCache;
var isName = corePrimitives.isName;
var isCmd = corePrimitives.isCmd;
var isDict = corePrimitives.isDict;
var isRef = corePrimitives.isRef;
var isRefsEqual = corePrimitives.isRefsEqual;
var isStream = corePrimitives.isStream;
var CipherTransformFactory = coreCrypto.CipherTransformFactory;
var Lexer = coreParser.Lexer;
var Parser = coreParser.Parser;
var ChunkedStream = coreChunkedStream.ChunkedStream;
var ColorSpace = coreColorSpace.ColorSpace;
2013-06-26 02:33:53 +09:00
2011-12-07 07:18:40 +09:00
var Catalog = (function CatalogClosure() {
function Catalog(pdfManager, xref, pageFactory) {
this.pdfManager = pdfManager;
2011-10-25 08:55:23 +09:00
this.xref = xref;
2013-02-07 08:19:29 +09:00
this.catDict = xref.getCatalogObj();
this.fontCache = new RefSetCache();
assert(isDict(this.catDict),
2013-02-07 08:19:29 +09:00
'catalog object is not a dictionary');
// TODO refactor to move getPage() to the PDFDocument.
this.pageFactory = pageFactory;
2013-02-07 08:19:29 +09:00
this.pagePromises = [];
2011-10-25 08:55:23 +09:00
}
2011-12-07 07:18:40 +09:00
Catalog.prototype = {
2012-03-25 03:59:51 +09:00
get metadata() {
2012-05-28 08:03:04 +09:00
var streamRef = this.catDict.getRaw('Metadata');
if (!isRef(streamRef)) {
2012-05-28 08:03:04 +09:00
return shadow(this, 'metadata', null);
}
2012-05-28 08:03:04 +09:00
var encryptMetadata = (!this.xref.encrypt ? false :
this.xref.encrypt.encryptMetadata);
2012-05-28 08:03:04 +09:00
var stream = this.xref.fetch(streamRef, !encryptMetadata);
2012-03-27 07:05:14 +09:00
var metadata;
if (stream && isDict(stream.dict)) {
var type = stream.dict.get('Type');
var subtype = stream.dict.get('Subtype');
2012-03-25 03:59:51 +09:00
if (isName(type, 'Metadata') && isName(subtype, 'XML')) {
// XXX: This should examine the charset the XML document defines,
// however since there are currently no real means to decode
// arbitrary charsets, let's just hope that the author of the PDF
// was reasonable enough to stick with the XML default charset,
// which is UTF-8.
2012-05-28 09:00:13 +09:00
try {
metadata = stringToUTF8String(bytesToString(stream.getBytes()));
} catch (e) {
2012-05-30 01:01:46 +09:00
info('Skipping invalid metadata.');
2012-05-28 09:00:13 +09:00
}
2012-03-25 03:59:51 +09:00
}
}
2012-03-27 07:05:14 +09:00
return shadow(this, 'metadata', metadata);
2012-03-25 03:59:51 +09:00
},
2011-10-25 08:55:23 +09:00
get toplevelPagesDict() {
var pagesObj = this.catDict.get('Pages');
assert(isDict(pagesObj), 'invalid top-level pages dictionary');
2011-10-25 08:55:23 +09:00
// shadow the prototype getter
return shadow(this, 'toplevelPagesDict', pagesObj);
2011-10-25 08:55:23 +09:00
},
get documentOutline() {
var obj = null;
try {
obj = this.readDocumentOutline();
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn('Unable to read document outline');
}
return shadow(this, 'documentOutline', obj);
},
readDocumentOutline: function Catalog_readDocumentOutline() {
var obj = this.catDict.get('Outlines');
if (!isDict(obj)) {
return null;
}
obj = obj.getRaw('First');
if (!isRef(obj)) {
return null;
}
2011-10-25 08:55:23 +09:00
var root = { items: [] };
var queue = [{obj: obj, parent: root}];
// To avoid recursion, keep track of the already processed items.
var processed = new RefSet();
processed.put(obj);
var xref = this.xref, blackColor = new Uint8Array(3);
while (queue.length > 0) {
var i = queue.shift();
var outlineDict = xref.fetchIfRef(i.obj);
if (outlineDict === null) {
continue;
}
assert(outlineDict.has('Title'), 'Invalid outline item');
var data = { url: null, dest: null, };
Catalog.parseDestDictionary({
destDict: outlineDict,
resultObj: data,
docBaseUrl: this.pdfManager.docBaseUrl,
});
var title = outlineDict.get('Title');
var flags = outlineDict.get('F') || 0;
var color = outlineDict.getArray('C'), rgbColor = blackColor;
// We only need to parse the color when it's valid, and non-default.
if (isArray(color) && color.length === 3 &&
(color[0] !== 0 || color[1] !== 0 || color[2] !== 0)) {
rgbColor = ColorSpace.singletons.rgb.getRgb(color, 0);
}
var outlineItem = {
dest: data.dest,
url: data.url,
unsafeUrl: data.unsafeUrl,
newWindow: data.newWindow,
title: stringToPDFString(title),
color: rgbColor,
count: outlineDict.get('Count'),
bold: !!(flags & 2),
italic: !!(flags & 1),
items: []
};
i.parent.items.push(outlineItem);
obj = outlineDict.getRaw('First');
if (isRef(obj) && !processed.has(obj)) {
queue.push({obj: obj, parent: outlineItem});
processed.put(obj);
}
obj = outlineDict.getRaw('Next');
if (isRef(obj) && !processed.has(obj)) {
queue.push({obj: obj, parent: i.parent});
processed.put(obj);
2011-10-25 08:55:23 +09:00
}
}
return (root.items.length > 0 ? root.items : null);
2011-10-25 08:55:23 +09:00
},
get numPages() {
var obj = this.toplevelPagesDict.get('Count');
assert(
2011-10-25 08:55:23 +09:00
isInt(obj),
'page count in top level pages object is not an integer'
);
// shadow the prototype getter
return shadow(this, 'num', obj);
},
get destinations() {
2012-04-05 07:29:50 +09:00
function fetchDestination(dest) {
2011-10-25 08:55:23 +09:00
return isDict(dest) ? dest.get('D') : dest;
}
var xref = this.xref;
var dests = {}, nameTreeRef, nameDictionaryRef;
var obj = this.catDict.get('Names');
if (obj && obj.has('Dests')) {
nameTreeRef = obj.getRaw('Dests');
} else if (this.catDict.has('Dests')) {
2011-10-25 08:55:23 +09:00
nameDictionaryRef = this.catDict.get('Dests');
}
2011-10-25 08:55:23 +09:00
if (nameDictionaryRef) {
// reading simple destination dictionary
obj = nameDictionaryRef;
2011-10-25 08:55:23 +09:00
obj.forEach(function catalogForEach(key, value) {
if (!value) {
return;
}
2012-04-05 07:29:50 +09:00
dests[key] = fetchDestination(value);
2011-10-25 08:55:23 +09:00
});
}
if (nameTreeRef) {
var nameTree = new NameTree(nameTreeRef, xref);
var names = nameTree.getAll();
for (var name in names) {
dests[name] = fetchDestination(names[name]);
2011-10-25 08:55:23 +09:00
}
}
return shadow(this, 'destinations', dests);
},
getDestination: function Catalog_getDestination(destinationId) {
function fetchDestination(dest) {
return isDict(dest) ? dest.get('D') : dest;
}
var xref = this.xref;
var dest = null, nameTreeRef, nameDictionaryRef;
var obj = this.catDict.get('Names');
if (obj && obj.has('Dests')) {
nameTreeRef = obj.getRaw('Dests');
} else if (this.catDict.has('Dests')) {
nameDictionaryRef = this.catDict.get('Dests');
}
if (nameDictionaryRef) { // Simple destination dictionary.
var value = nameDictionaryRef.get(destinationId);
if (value) {
dest = fetchDestination(value);
}
}
if (nameTreeRef) {
var nameTree = new NameTree(nameTreeRef, xref);
dest = fetchDestination(nameTree.get(destinationId));
}
return dest;
},
get pageLabels() {
var obj = null;
try {
obj = this.readPageLabels();
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn('Unable to read page labels.');
}
return shadow(this, 'pageLabels', obj);
},
readPageLabels: function Catalog_readPageLabels() {
var obj = this.catDict.getRaw('PageLabels');
if (!obj) {
return null;
}
var pageLabels = new Array(this.numPages);
var style = null;
var prefix = '';
var numberTree = new NumberTree(obj, this.xref);
var nums = numberTree.getAll();
var currentLabel = '', currentIndex = 1;
for (var i = 0, ii = this.numPages; i < ii; i++) {
if (i in nums) {
var labelDict = nums[i];
assert(isDict(labelDict), 'The PageLabel is not a dictionary.');
var type = labelDict.get('Type');
assert(!type || isName(type, 'PageLabel'),
'Invalid type in PageLabel dictionary.');
var s = labelDict.get('S');
assert(!s || isName(s), 'Invalid style in PageLabel dictionary.');
style = s ? s.name : null;
var p = labelDict.get('P');
assert(!p || isString(p), 'Invalid prefix in PageLabel dictionary.');
prefix = p ? stringToPDFString(p) : '';
var st = labelDict.get('St');
assert(!st || (isInt(st) && st >= 1),
'Invalid start in PageLabel dictionary.');
currentIndex = st || 1;
}
switch (style) {
case 'D':
currentLabel = currentIndex;
break;
case 'R':
case 'r':
currentLabel = Util.toRoman(currentIndex, style === 'r');
break;
case 'A':
case 'a':
var LIMIT = 26; // Use only the characters A--Z, or a--z.
var A_UPPER_CASE = 0x41, A_LOWER_CASE = 0x61;
var baseCharCode = (style === 'a' ? A_LOWER_CASE : A_UPPER_CASE);
var letterIndex = currentIndex - 1;
var character = String.fromCharCode(baseCharCode +
(letterIndex % LIMIT));
var charBuf = [];
for (var j = 0, jj = (letterIndex / LIMIT) | 0; j <= jj; j++) {
charBuf.push(character);
}
currentLabel = charBuf.join('');
break;
default:
assert(!style,
'Invalid style "' + style + '" in PageLabel dictionary.');
}
pageLabels[i] = prefix + currentLabel;
currentLabel = '';
currentIndex++;
}
return pageLabels;
},
get attachments() {
var xref = this.xref;
2014-05-19 06:35:29 +09:00
var attachments = null, nameTreeRef;
var obj = this.catDict.get('Names');
if (obj) {
nameTreeRef = obj.getRaw('EmbeddedFiles');
}
if (nameTreeRef) {
var nameTree = new NameTree(nameTreeRef, xref);
var names = nameTree.getAll();
for (var name in names) {
var fs = new FileSpec(names[name], xref);
if (!attachments) {
attachments = Object.create(null);
}
attachments[stringToPDFString(name)] = fs.serializable;
}
}
return shadow(this, 'attachments', attachments);
},
get javaScript() {
var xref = this.xref;
var obj = this.catDict.get('Names');
var javaScript = [];
function appendIfJavaScriptDict(jsDict) {
var type = jsDict.get('S');
if (!isName(type, 'JavaScript')) {
return;
}
var js = jsDict.get('JS');
if (isStream(js)) {
js = bytesToString(js.getBytes());
} else if (!isString(js)) {
return;
}
javaScript.push(stringToPDFString(js));
}
if (obj && obj.has('JavaScript')) {
var nameTree = new NameTree(obj.getRaw('JavaScript'), xref);
var names = nameTree.getAll();
for (var name in names) {
// We don't really use the JavaScript right now. This code is
// defensive so we don't cause errors on document load.
var jsDict = names[name];
if (isDict(jsDict)) {
appendIfJavaScriptDict(jsDict);
}
}
}
2014-05-24 09:07:25 +09:00
// Append OpenAction actions to javaScript array
var openactionDict = this.catDict.get('OpenAction');
if (isDict(openactionDict, 'Action')) {
2014-05-24 09:07:25 +09:00
var actionType = openactionDict.get('S');
if (isName(actionType, 'Named')) {
// The named Print action is not a part of the PDF 1.7 specification,
// but is supported by many PDF readers/writers (including Adobe's).
var action = openactionDict.get('N');
if (isName(action, 'Print')) {
javaScript.push('print({});');
}
} else {
appendIfJavaScriptDict(openactionDict);
2014-05-24 09:07:25 +09:00
}
}
return shadow(this, 'javaScript', javaScript);
},
2013-02-07 08:19:29 +09:00
cleanup: function Catalog_cleanup() {
2014-05-10 10:21:15 +09:00
var promises = [];
this.fontCache.forEach(function (promise) {
promises.push(promise);
});
2014-05-21 11:57:04 +09:00
return Promise.all(promises).then(function (translatedFonts) {
for (var i = 0, ii = translatedFonts.length; i < ii; i++) {
var font = translatedFonts[i].dict;
delete font.translated;
2014-05-10 10:21:15 +09:00
}
this.fontCache.clear();
}.bind(this));
},
2013-02-07 08:19:29 +09:00
getPage: function Catalog_getPage(pageIndex) {
if (!(pageIndex in this.pagePromises)) {
this.pagePromises[pageIndex] = this.getPageDict(pageIndex).then(
function (a) {
var dict = a[0];
var ref = a[1];
return this.pageFactory.createPage(pageIndex, dict, ref,
this.fontCache);
}.bind(this)
);
2013-02-07 08:19:29 +09:00
}
return this.pagePromises[pageIndex];
},
getPageDict: function Catalog_getPageDict(pageIndex) {
var capability = createPromiseCapability();
var nodesToVisit = [this.catDict.getRaw('Pages')];
var currentPageIndex = 0;
var xref = this.xref;
var checkAllKids = false;
function next() {
while (nodesToVisit.length) {
var currentNode = nodesToVisit.pop();
if (isRef(currentNode)) {
xref.fetchAsync(currentNode).then(function (obj) {
if (isDict(obj, 'Page') || (isDict(obj) && !obj.has('Kids'))) {
if (pageIndex === currentPageIndex) {
capability.resolve([obj, currentNode]);
} else {
currentPageIndex++;
next();
}
return;
}
nodesToVisit.push(obj);
next();
}, capability.reject);
return;
2013-02-07 08:19:29 +09:00
}
// Must be a child page dictionary.
2013-02-07 08:19:29 +09:00
assert(
isDict(currentNode),
2013-02-07 08:19:29 +09:00
'page dictionary kid reference points to wrong type of object'
);
var count = currentNode.get('Count');
// If the current node doesn't have any children, avoid getting stuck
// in an empty node further down in the tree (see issue5644.pdf).
if (count === 0) {
checkAllKids = true;
}
// Skip nodes where the page can't be.
if (currentPageIndex + count <= pageIndex) {
currentPageIndex += count;
continue;
}
2013-02-07 08:19:29 +09:00
var kids = currentNode.get('Kids');
assert(isArray(kids), 'page dictionary kids object is not an array');
if (!checkAllKids && count === kids.length) {
// Nodes that don't have the page have been skipped and this is the
// bottom of the tree which means the page requested must be a
// descendant of this pages node. Ideally we would just resolve the
// promise with the page ref here, but there is the case where more
// pages nodes could link to single a page (see issue 3666 pdf). To
// handle this push it back on the queue so if it is a pages node it
// will be descended into.
nodesToVisit = [kids[pageIndex - currentPageIndex]];
currentPageIndex = pageIndex;
continue;
} else {
for (var last = kids.length - 1; last >= 0; last--) {
nodesToVisit.push(kids[last]);
}
}
2013-02-07 08:19:29 +09:00
}
capability.reject('Page index ' + pageIndex + ' not found.');
}
next();
return capability.promise;
},
getPageIndex: function Catalog_getPageIndex(pageRef) {
// The page tree nodes have the count of all the leaves below them. To get
// how many pages are before we just have to walk up the tree and keep
// adding the count of siblings to the left of the node.
var xref = this.xref;
function pagesBeforeRef(kidRef) {
var total = 0;
var parentRef;
return xref.fetchAsync(kidRef).then(function (node) {
if (isRefsEqual(kidRef, pageRef) && !isDict(node, 'Page') &&
!(isDict(node) && !node.has('Type') && node.has('Contents'))) {
throw new Error('The reference does not point to a /Page Dict.');
}
if (!node) {
return null;
}
assert(isDict(node), 'node must be a Dict.');
parentRef = node.getRaw('Parent');
return node.getAsync('Parent');
}).then(function (parent) {
if (!parent) {
return null;
}
assert(isDict(parent), 'parent must be a Dict.');
return parent.getAsync('Kids');
}).then(function (kids) {
if (!kids) {
return null;
}
var kidPromises = [];
var found = false;
for (var i = 0; i < kids.length; i++) {
var kid = kids[i];
assert(isRef(kid), 'kid must be a Ref.');
if (kid.num === kidRef.num) {
found = true;
break;
}
kidPromises.push(xref.fetchAsync(kid).then(function (kid) {
if (kid.has('Count')) {
var count = kid.get('Count');
total += count;
} else { // page leaf node
total++;
}
}));
}
if (!found) {
error('kid ref not found in parents kids');
}
return Promise.all(kidPromises).then(function () {
return [total, parentRef];
});
});
2011-10-25 08:55:23 +09:00
}
var total = 0;
function next(ref) {
return pagesBeforeRef(ref).then(function (args) {
if (!args) {
return total;
}
var count = args[0];
var parentRef = args[1];
total += count;
return next(parentRef);
});
}
return next(pageRef);
2011-10-25 08:55:23 +09:00
}
};
/**
* @typedef ParseDestDictionaryParameters
* @property {Dict} destDict - The dictionary containing the destination.
* @property {Object} resultObj - The object where the parsed destination
* properties will be placed.
* @property {string} docBaseUrl - (optional) The document base URL that is
* used when attempting to recover valid absolute URLs from relative ones.
*/
/**
* Helper function used to parse the contents of destination dictionaries.
* @param {ParseDestDictionaryParameters} params
*/
Catalog.parseDestDictionary = function Catalog_parseDestDictionary(params) {
// Lets URLs beginning with 'www.' default to using the 'http://' protocol.
function addDefaultProtocolToUrl(url) {
if (url.indexOf('www.') === 0) {
return ('http://' + url);
}
return url;
}
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding, see Bugzilla 1122280.
function tryConvertUrlEncoding(url) {
try {
return stringToUTF8String(url);
} catch (e) {
return url;
}
}
var destDict = params.destDict;
if (!isDict(destDict)) {
warn('Catalog_parseDestDictionary: "destDict" must be a dictionary.');
return;
}
var resultObj = params.resultObj;
if (typeof resultObj !== 'object') {
warn('Catalog_parseDestDictionary: "resultObj" must be an object.');
return;
}
var docBaseUrl = params.docBaseUrl || null;
var action = destDict.get('A'), url, dest;
if (isDict(action)) {
var linkType = action.get('S').name;
switch (linkType) {
case 'URI':
url = action.get('URI');
if (isName(url)) {
// Some bad PDFs do not put parentheses around relative URLs.
url = '/' + url.name;
} else if (isString(url)) {
url = addDefaultProtocolToUrl(url);
}
// TODO: pdf spec mentions urls can be relative to a Base
// entry in the dictionary.
break;
case 'GoTo':
dest = action.get('D');
break;
case 'Launch':
// We neither want, nor can, support arbitrary 'Launch' actions.
// However, in practice they are mostly used for linking to other PDF
// files, which we thus attempt to support (utilizing `docBaseUrl`).
/* falls through */
case 'GoToR':
var urlDict = action.get('F');
if (isDict(urlDict)) {
// We assume that we found a FileSpec dictionary
// and fetch the URL without checking any further.
url = urlDict.get('F') || null;
} else if (isString(urlDict)) {
url = urlDict;
}
// NOTE: the destination is relative to the *remote* document.
var remoteDest = action.get('D');
if (remoteDest) {
if (isName(remoteDest)) {
remoteDest = remoteDest.name;
}
if (isString(url)) {
var baseUrl = url.split('#')[0];
if (isString(remoteDest)) {
// In practice, a named destination may contain only a number.
// If that happens, use the '#nameddest=' form to avoid the link
// redirecting to a page, instead of the correct destination.
url = baseUrl + '#' +
(/^\d+$/.test(remoteDest) ? 'nameddest=' : '') + remoteDest;
} else if (isArray(remoteDest)) {
url = baseUrl + '#' + JSON.stringify(remoteDest);
}
}
}
// The 'NewWindow' property, equal to `LinkTarget.BLANK`.
var newWindow = action.get('NewWindow');
if (isBool(newWindow)) {
resultObj.newWindow = newWindow;
}
break;
case 'Named':
var namedAction = action.get('N');
if (isName(namedAction)) {
resultObj.action = namedAction.name;
}
break;
case 'JavaScript':
var jsAction = action.get('JS'), js;
if (isStream(jsAction)) {
js = bytesToString(jsAction.getBytes());
} else if (isString(jsAction)) {
js = jsAction;
}
if (js) {
// Attempt to recover valid URLs from 'JS' entries with certain
// white-listed formats, e.g.
// - window.open('http://example.com')
// - app.launchURL('http://example.com', true)
var URL_OPEN_METHODS = [
'app.launchURL',
'window.open'
];
var regex = new RegExp('^(?:' + URL_OPEN_METHODS.join('|') + ')' +
'\\((?:\'|\")(\\S+)(?:\'|\")(?:,|\\))');
var jsUrl = regex.exec(stringToPDFString(js), 'i');
if (jsUrl && jsUrl[1]) {
url = jsUrl[1];
break;
}
}
/* falls through */
default:
warn('Catalog_parseDestDictionary: Unrecognized link type "' +
linkType + '".');
break;
}
} else if (destDict.has('Dest')) { // Simple destination link.
dest = destDict.get('Dest');
}
if (isString(url)) {
url = tryConvertUrlEncoding(url);
var absoluteUrl = createValidAbsoluteUrl(url, docBaseUrl);
if (absoluteUrl) {
resultObj.url = absoluteUrl.href;
}
resultObj.unsafeUrl = url;
}
if (dest) {
if (isName(dest)) {
dest = dest.name;
}
if (isString(dest) || isArray(dest)) {
resultObj.dest = dest;
}
}
};
2011-12-07 07:18:40 +09:00
return Catalog;
2011-10-25 08:55:23 +09:00
})();
2011-12-07 07:18:40 +09:00
var XRef = (function XRefClosure() {
function XRef(stream, pdfManager) {
2011-10-25 08:55:23 +09:00
this.stream = stream;
this.pdfManager = pdfManager;
2011-10-25 08:55:23 +09:00
this.entries = [];
this.xrefstms = Object.create(null);
2011-10-25 08:55:23 +09:00
// prepare the XRef cache
this.cache = [];
this.stats = {
streamTypes: [],
fontTypes: []
};
2011-10-25 08:55:23 +09:00
}
2011-12-07 07:18:40 +09:00
XRef.prototype = {
2013-02-07 08:19:29 +09:00
setStartXRef: function XRef_setStartXRef(startXRef) {
// Store the starting positions of xref tables as we process them
// so we can recover from missing data errors
this.startXRefQueue = [startXRef];
},
parse: function XRef_parse(recoveryMode) {
var trailerDict;
if (!recoveryMode) {
trailerDict = this.readXRef();
} else {
warn('Indexing all PDF objects');
trailerDict = this.indexObjects();
}
trailerDict.assignXref(this);
this.trailer = trailerDict;
var encrypt = trailerDict.get('Encrypt');
if (isDict(encrypt)) {
2013-02-07 08:19:29 +09:00
var ids = trailerDict.get('ID');
var fileId = (ids && ids.length) ? ids[0] : '';
// The 'Encrypt' dictionary itself should not be encrypted, and by
// setting `suppressEncryption` we can prevent an infinite loop inside
// of `XRef_fetchUncompressed` if the dictionary contains indirect
// objects (fixes issue7665.pdf).
encrypt.suppressEncryption = true;
this.encrypt = new CipherTransformFactory(encrypt, fileId,
this.pdfManager.password);
2013-02-07 08:19:29 +09:00
}
// get the root dictionary (catalog) object
if (!(this.root = trailerDict.get('Root'))) {
error('Invalid root reference');
}
},
processXRefTable: function XRef_processXRefTable(parser) {
if (!('tableState' in this)) {
// Stores state of the table as we process it so we can resume
// from middle of table in case of missing data error
this.tableState = {
entryNum: 0,
streamPos: parser.lexer.stream.pos,
parserBuf1: parser.buf1,
parserBuf2: parser.buf2
};
}
var obj = this.readXRefTable(parser);
// Sanity check
if (!isCmd(obj, 'trailer')) {
2013-02-07 08:19:29 +09:00
error('Invalid XRef table: could not find trailer dictionary');
}
2013-02-07 08:19:29 +09:00
// Read trailer dictionary, e.g.
// trailer
// << /Size 22
// /Root 20R
// /Info 10R
// /ID [ <81b14aafa313db63dbd6f981e49f94f4> ]
// >>
// The parser goes through the entire stream << ... >> and provides
// a getter interface for the key-value table
var dict = parser.getObj();
// The pdflib PDF generator can generate a nested trailer dictionary
if (!isDict(dict) && dict.dict) {
dict = dict.dict;
}
if (!isDict(dict)) {
2013-02-07 08:19:29 +09:00
error('Invalid XRef table: could not parse trailer dictionary');
}
2013-02-07 08:19:29 +09:00
delete this.tableState;
return dict;
},
readXRefTable: function XRef_readXRefTable(parser) {
2012-01-31 23:01:04 +09:00
// Example of cross-reference table:
// xref
// 0 1 <-- subsection header (first obj #, obj count)
// 0000000000 65535 f <-- actual object (offset, generation #, f/n)
// 23 2 <-- subsection header ... and so on ...
2012-02-01 00:57:32 +09:00
// 0000025518 00002 n
2012-01-31 23:01:04 +09:00
// 0000025635 00000 n
// trailer
// ...
2012-02-01 00:57:32 +09:00
2013-02-07 08:19:29 +09:00
var stream = parser.lexer.stream;
var tableState = this.tableState;
stream.pos = tableState.streamPos;
parser.buf1 = tableState.parserBuf1;
parser.buf2 = tableState.parserBuf2;
2012-01-31 23:01:04 +09:00
// Outer loop is over subsection headers
2011-10-25 08:55:23 +09:00
var obj;
2012-01-31 23:01:04 +09:00
2013-02-07 08:19:29 +09:00
while (true) {
if (!('firstEntryNum' in tableState) || !('entryCount' in tableState)) {
if (isCmd(obj = parser.getObj(), 'trailer')) {
break;
}
tableState.firstEntryNum = obj;
tableState.entryCount = parser.getObj();
}
var first = tableState.firstEntryNum;
var count = tableState.entryCount;
if (!isInt(first) || !isInt(count)) {
2012-01-31 23:57:12 +09:00
error('Invalid XRef table: wrong types in subsection header');
}
2012-01-31 23:01:04 +09:00
// Inner loop is over objects themselves
2013-02-07 08:19:29 +09:00
for (var i = tableState.entryNum; i < count; i++) {
tableState.streamPos = stream.pos;
tableState.entryNum = i;
tableState.parserBuf1 = parser.buf1;
tableState.parserBuf2 = parser.buf2;
2011-10-25 08:55:23 +09:00
var entry = {};
2012-01-31 23:01:04 +09:00
entry.offset = parser.getObj();
entry.gen = parser.getObj();
var type = parser.getObj();
if (isCmd(type, 'f')) {
2011-10-25 08:55:23 +09:00
entry.free = true;
} else if (isCmd(type, 'n')) {
2012-01-31 23:01:04 +09:00
entry.uncompressed = true;
}
2011-10-25 08:55:23 +09:00
2012-01-31 23:01:04 +09:00
// Validate entry obj
2012-02-01 00:57:32 +09:00
if (!isInt(entry.offset) || !isInt(entry.gen) ||
!(entry.free || entry.uncompressed)) {
2012-02-01 00:49:06 +09:00
error('Invalid entry in XRef subsection: ' + first + ', ' + count);
2011-10-25 08:55:23 +09:00
}
// The first xref table entry, i.e. obj 0, should be free. Attempting
// to adjust an incorrect first obj # (fixes issue 3248 and 7229).
if (i === 0 && entry.free && first === 1) {
first = 0;
}
if (!this.entries[i + first]) {
2012-01-31 23:57:12 +09:00
this.entries[i + first] = entry;
}
2011-10-25 08:55:23 +09:00
}
2013-02-07 08:19:29 +09:00
tableState.entryNum = 0;
tableState.streamPos = stream.pos;
tableState.parserBuf1 = parser.buf1;
tableState.parserBuf2 = parser.buf2;
delete tableState.firstEntryNum;
delete tableState.entryCount;
2011-10-25 08:55:23 +09:00
}
// Sanity check: as per spec, first object must be free
if (this.entries[0] && !this.entries[0].free) {
2012-02-01 00:49:06 +09:00
error('Invalid XRef table: unexpected first object');
}
2013-02-07 08:19:29 +09:00
return obj;
},
2012-02-01 00:49:06 +09:00
2013-02-07 08:19:29 +09:00
processXRefStream: function XRef_processXRefStream(stream) {
if (!('streamState' in this)) {
// Stores state of the stream as we process it so we can resume
// from middle of stream in case of missing data error
var streamParameters = stream.dict;
2013-02-07 08:19:29 +09:00
var byteWidths = streamParameters.get('W');
var range = streamParameters.get('Index');
if (!range) {
range = [0, streamParameters.get('Size')];
}
2011-10-25 08:55:23 +09:00
2013-02-07 08:19:29 +09:00
this.streamState = {
entryRanges: range,
byteWidths: byteWidths,
entryNum: 0,
streamPos: stream.pos
};
}
this.readXRefStream(stream);
delete this.streamState;
return stream.dict;
2011-10-25 08:55:23 +09:00
},
2013-02-07 08:19:29 +09:00
readXRefStream: function XRef_readXRefStream(stream) {
2011-10-25 08:55:23 +09:00
var i, j;
2013-02-07 08:19:29 +09:00
var streamState = this.streamState;
stream.pos = streamState.streamPos;
var byteWidths = streamState.byteWidths;
var typeFieldWidth = byteWidths[0];
var offsetFieldWidth = byteWidths[1];
var generationFieldWidth = byteWidths[2];
var entryRanges = streamState.entryRanges;
while (entryRanges.length > 0) {
var first = entryRanges[0];
var n = entryRanges[1];
if (!isInt(first) || !isInt(n)) {
2011-10-25 08:55:23 +09:00
error('Invalid XRef range fields: ' + first + ', ' + n);
}
2011-10-25 08:55:23 +09:00
if (!isInt(typeFieldWidth) || !isInt(offsetFieldWidth) ||
!isInt(generationFieldWidth)) {
error('Invalid XRef entry fields length: ' + first + ', ' + n);
}
2013-02-07 08:19:29 +09:00
for (i = streamState.entryNum; i < n; ++i) {
streamState.entryNum = i;
streamState.streamPos = stream.pos;
2011-10-25 08:55:23 +09:00
var type = 0, offset = 0, generation = 0;
for (j = 0; j < typeFieldWidth; ++j) {
2011-10-25 08:55:23 +09:00
type = (type << 8) | stream.getByte();
}
// if type field is absent, its default value is 1
if (typeFieldWidth === 0) {
2011-10-25 08:55:23 +09:00
type = 1;
}
for (j = 0; j < offsetFieldWidth; ++j) {
2011-10-25 08:55:23 +09:00
offset = (offset << 8) | stream.getByte();
}
for (j = 0; j < generationFieldWidth; ++j) {
2011-10-25 08:55:23 +09:00
generation = (generation << 8) | stream.getByte();
}
2011-10-25 08:55:23 +09:00
var entry = {};
entry.offset = offset;
entry.gen = generation;
switch (type) {
case 0:
entry.free = true;
break;
case 1:
entry.uncompressed = true;
break;
case 2:
break;
default:
error('Invalid XRef entry type: ' + type);
}
if (!this.entries[first + i]) {
2011-10-25 08:55:23 +09:00
this.entries[first + i] = entry;
}
2011-10-25 08:55:23 +09:00
}
2013-02-07 08:19:29 +09:00
streamState.entryNum = 0;
streamState.streamPos = stream.pos;
entryRanges.splice(0, 2);
2011-10-25 08:55:23 +09:00
}
},
indexObjects: function XRef_indexObjects() {
2011-10-25 08:55:23 +09:00
// Simple scan through the PDF content to find objects,
// trailers and XRef streams.
var TAB = 0x9, LF = 0xA, CR = 0xD, SPACE = 0x20;
var PERCENT = 0x25, LT = 0x3C;
2011-10-25 08:55:23 +09:00
function readToken(data, offset) {
var token = '', ch = data[offset];
while (ch !== LF && ch !== CR && ch !== LT) {
if (++offset >= data.length) {
2011-10-25 08:55:23 +09:00
break;
}
2011-10-25 08:55:23 +09:00
token += String.fromCharCode(ch);
ch = data[offset];
}
return token;
}
function skipUntil(data, offset, what) {
var length = what.length, dataLength = data.length;
var skipped = 0;
// finding byte sequence
while (offset < dataLength) {
var i = 0;
while (i < length && data[offset + i] === what[i]) {
2011-10-25 08:55:23 +09:00
++i;
}
if (i >= length) {
2011-10-25 08:55:23 +09:00
break; // sequence found
}
2011-10-25 08:55:23 +09:00
offset++;
skipped++;
}
return skipped;
}
var objRegExp = /^(\d+)\s+(\d+)\s+obj\b/;
2011-10-25 08:55:23 +09:00
var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]);
var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114,
101, 102]);
var endobjBytes = new Uint8Array([101, 110, 100, 111, 98, 106]);
var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
// Clear out any existing entries, since they may be bogus.
this.entries.length = 0;
2011-10-25 08:55:23 +09:00
var stream = this.stream;
stream.pos = 0;
var buffer = stream.getBytes();
var position = stream.start, length = buffer.length;
var trailers = [], xrefStms = [];
while (position < length) {
var ch = buffer[position];
if (ch === TAB || ch === LF || ch === CR || ch === SPACE) {
2011-10-25 08:55:23 +09:00
++position;
continue;
}
if (ch === PERCENT) { // %-comment
2011-10-25 08:55:23 +09:00
do {
++position;
2013-08-24 02:57:11 +09:00
if (position >= length) {
break;
}
2011-10-25 08:55:23 +09:00
ch = buffer[position];
} while (ch !== LF && ch !== CR);
2011-10-25 08:55:23 +09:00
continue;
}
var token = readToken(buffer, position);
var m;
if (token.indexOf('xref') === 0 &&
(token.length === 4 || /\s/.test(token[4]))) {
2011-10-25 08:55:23 +09:00
position += skipUntil(buffer, position, trailerBytes);
trailers.push(position);
position += skipUntil(buffer, position, startxrefBytes);
} else if ((m = objRegExp.exec(token))) {
2015-04-04 15:15:31 +09:00
if (typeof this.entries[m[1]] === 'undefined') {
this.entries[m[1]] = {
offset: position - stream.start,
2015-04-04 15:15:31 +09:00
gen: m[2] | 0,
uncompressed: true
};
}
2011-10-25 08:55:23 +09:00
var contentLength = skipUntil(buffer, position, endobjBytes) + 7;
var content = buffer.subarray(position, position + contentLength);
// checking XRef stream suspect
// (it shall have '/XRef' and next char is not a letter)
var xrefTagOffset = skipUntil(content, 0, xrefBytes);
if (xrefTagOffset < contentLength &&
content[xrefTagOffset + 5] < 64) {
xrefStms.push(position - stream.start);
this.xrefstms[position - stream.start] = 1; // Avoid recursion
2011-10-25 08:55:23 +09:00
}
position += contentLength;
} else if (token.indexOf('trailer') === 0 &&
(token.length === 7 || /\s/.test(token[7]))) {
trailers.push(position);
position += skipUntil(buffer, position, startxrefBytes);
} else {
2011-10-25 08:55:23 +09:00
position += token.length + 1;
}
2011-10-25 08:55:23 +09:00
}
// reading XRef streams
2014-04-08 06:42:54 +09:00
var i, ii;
for (i = 0, ii = xrefStms.length; i < ii; ++i) {
2013-02-07 08:19:29 +09:00
this.startXRefQueue.push(xrefStms[i]);
this.readXRef(/* recoveryMode */ true);
2011-10-25 08:55:23 +09:00
}
// finding main trailer
var dict;
2014-04-08 06:42:54 +09:00
for (i = 0, ii = trailers.length; i < ii; ++i) {
2011-10-25 08:55:23 +09:00
stream.pos = trailers[i];
var parser = new Parser(new Lexer(stream), /* allowStreams = */ true,
/* xref = */ this, /* recoveryMode = */ true);
2011-10-25 08:55:23 +09:00
var obj = parser.getObj();
if (!isCmd(obj, 'trailer')) {
2011-10-25 08:55:23 +09:00
continue;
}
2011-10-25 08:55:23 +09:00
// read the trailer dictionary
dict = parser.getObj();
if (!isDict(dict)) {
2011-10-25 08:55:23 +09:00
continue;
}
2011-10-25 08:55:23 +09:00
// taking the first one with 'ID'
if (dict.has('ID')) {
2011-10-25 08:55:23 +09:00
return dict;
}
2011-10-25 08:55:23 +09:00
}
// no tailer with 'ID', taking last one (if exists)
if (dict) {
2011-10-25 08:55:23 +09:00
return dict;
}
2011-10-25 08:55:23 +09:00
// nothing helps
// calling error() would reject worker with an UnknownErrorException.
throw new InvalidPDFException('Invalid PDF structure');
2011-10-25 08:55:23 +09:00
},
2013-02-07 08:19:29 +09:00
readXRef: function XRef_readXRef(recoveryMode) {
2011-10-25 08:55:23 +09:00
var stream = this.stream;
2011-12-03 06:35:18 +09:00
try {
2013-02-07 08:19:29 +09:00
while (this.startXRefQueue.length) {
var startXRef = this.startXRefQueue[0];
2012-02-01 00:49:06 +09:00
stream.pos = startXRef + stream.start;
2012-02-01 00:49:06 +09:00
var parser = new Parser(new Lexer(stream), true, this);
2013-02-07 08:19:29 +09:00
var obj = parser.getObj();
var dict;
// Get dictionary
if (isCmd(obj, 'xref')) {
// Parse end-of-file XRef
dict = this.processXRefTable(parser);
if (!this.topDict) {
this.topDict = dict;
2012-02-01 00:49:06 +09:00
}
2013-02-07 08:19:29 +09:00
// Recursively get other XRefs 'XRefStm', if any
obj = dict.get('XRefStm');
if (isInt(obj)) {
var pos = obj;
// ignore previously loaded xref streams
// (possible infinite recursion)
if (!(pos in this.xrefstms)) {
this.xrefstms[pos] = 1;
this.startXRefQueue.push(pos);
}
}
} else if (isInt(obj)) {
// Parse in-stream XRef
if (!isInt(parser.getObj()) ||
!isCmd(parser.getObj(), 'obj') ||
!isStream(obj = parser.getObj())) {
error('Invalid XRef stream');
}
dict = this.processXRefStream(obj);
if (!this.topDict) {
this.topDict = dict;
}
if (!dict) {
2013-02-07 08:19:29 +09:00
error('Failed to read XRef stream');
}
} else {
error('Invalid XRef stream header');
2012-02-01 00:49:06 +09:00
}
2013-02-07 08:19:29 +09:00
// Recursively get previous dictionary, if any
obj = dict.get('Prev');
if (isInt(obj)) {
this.startXRefQueue.push(obj);
} else if (isRef(obj)) {
// The spec says Prev must not be a reference, i.e. "/Prev NNN"
// This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
this.startXRefQueue.push(obj.num);
}
2012-02-01 00:49:06 +09:00
2013-02-07 08:19:29 +09:00
this.startXRefQueue.shift();
2011-10-25 08:55:23 +09:00
}
2012-02-01 00:49:06 +09:00
2013-02-07 08:19:29 +09:00
return this.topDict;
2011-12-03 06:35:18 +09:00
} catch (e) {
2013-02-07 08:19:29 +09:00
if (e instanceof MissingDataException) {
throw e;
}
2014-01-16 06:28:31 +09:00
info('(while reading XRef): ' + e);
2011-10-25 08:55:23 +09:00
}
2011-12-03 06:35:18 +09:00
if (recoveryMode) {
return;
}
2013-02-07 08:19:29 +09:00
throw new XRefParseException();
2011-10-25 08:55:23 +09:00
},
2013-02-07 08:19:29 +09:00
getEntry: function XRef_getEntry(i) {
var xrefEntry = this.entries[i];
if (xrefEntry && !xrefEntry.free && xrefEntry.offset) {
return xrefEntry;
}
return null;
2011-10-25 08:55:23 +09:00
},
fetchIfRef: function XRef_fetchIfRef(obj, suppressEncryption) {
if (!isRef(obj)) {
2011-10-25 08:55:23 +09:00
return obj;
}
return this.fetch(obj, suppressEncryption);
2011-10-25 08:55:23 +09:00
},
fetch: function XRef_fetch(ref, suppressEncryption) {
assert(isRef(ref), 'ref object is not a reference');
2011-10-25 08:55:23 +09:00
var num = ref.num;
if (num in this.cache) {
var cacheEntry = this.cache[num];
return cacheEntry;
}
var xrefEntry = this.getEntry(num);
// the referenced entry can be free
if (xrefEntry === null) {
return (this.cache[num] = null);
}
if (xrefEntry.uncompressed) {
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
} else {
xrefEntry = this.fetchCompressed(xrefEntry, suppressEncryption);
}
if (isDict(xrefEntry)) {
xrefEntry.objId = ref.toString();
} else if (isStream(xrefEntry)) {
xrefEntry.dict.objId = ref.toString();
}
return xrefEntry;
},
2011-10-25 08:55:23 +09:00
fetchUncompressed: function XRef_fetchUncompressed(ref, xrefEntry,
suppressEncryption) {
2011-10-25 08:55:23 +09:00
var gen = ref.gen;
var num = ref.num;
if (xrefEntry.gen !== gen) {
error('inconsistent generation in XRef');
}
var stream = this.stream.makeSubStream(xrefEntry.offset +
this.stream.start);
var parser = new Parser(new Lexer(stream), true, this);
var obj1 = parser.getObj();
var obj2 = parser.getObj();
var obj3 = parser.getObj();
if (!isInt(obj1) || parseInt(obj1, 10) !== num ||
!isInt(obj2) || parseInt(obj2, 10) !== gen ||
!isCmd(obj3)) {
error('bad XRef entry');
}
if (!isCmd(obj3, 'obj')) {
// some bad PDFs use "obj1234" and really mean 1234
if (obj3.cmd.indexOf('obj') === 0) {
num = parseInt(obj3.cmd.substring(3), 10);
if (!isNaN(num)) {
return num;
2011-10-25 08:55:23 +09:00
}
}
error('bad XRef entry');
}
if (this.encrypt && !suppressEncryption) {
xrefEntry = parser.getObj(this.encrypt.createCipherTransform(num, gen));
} else {
xrefEntry = parser.getObj();
2011-10-25 08:55:23 +09:00
}
if (!isStream(xrefEntry)) {
this.cache[num] = xrefEntry;
}
return xrefEntry;
},
2011-10-25 08:55:23 +09:00
fetchCompressed: function XRef_fetchCompressed(xrefEntry,
suppressEncryption) {
var tableOffset = xrefEntry.offset;
var stream = this.fetch(new Ref(tableOffset, 0));
if (!isStream(stream)) {
2011-10-25 08:55:23 +09:00
error('bad ObjStm stream');
}
var first = stream.dict.get('First');
var n = stream.dict.get('N');
2011-10-25 08:55:23 +09:00
if (!isInt(first) || !isInt(n)) {
error('invalid first and n parameters for ObjStm stream');
}
var parser = new Parser(new Lexer(stream), false, this);
2012-11-02 22:26:45 +09:00
parser.allowStreams = true;
var i, entries = [], num, nums = [];
2011-10-25 08:55:23 +09:00
// read the object numbers to populate cache
for (i = 0; i < n; ++i) {
num = parser.getObj();
if (!isInt(num)) {
error('invalid object number in the ObjStm stream: ' + num);
}
nums.push(num);
var offset = parser.getObj();
if (!isInt(offset)) {
error('invalid object offset in the ObjStm stream: ' + offset);
}
}
// read stream objects for cache
for (i = 0; i < n; ++i) {
entries.push(parser.getObj());
// The ObjStm should not contain 'endobj'. If it's present, skip over it
// to support corrupt PDFs (fixes issue 5241, bug 898610, bug 1037816).
if (isCmd(parser.buf1, 'endobj')) {
parser.shift();
}
num = nums[i];
var entry = this.entries[num];
if (entry && entry.offset === tableOffset && entry.gen === i) {
this.cache[num] = entries[i];
}
2011-10-25 08:55:23 +09:00
}
xrefEntry = entries[xrefEntry.gen];
if (xrefEntry === undefined) {
2011-10-25 08:55:23 +09:00
error('bad XRef entry for compressed object');
}
return xrefEntry;
2011-10-25 08:55:23 +09:00
},
fetchIfRefAsync: function XRef_fetchIfRefAsync(obj, suppressEncryption) {
if (!isRef(obj)) {
return Promise.resolve(obj);
}
return this.fetchAsync(obj, suppressEncryption);
},
fetchAsync: function XRef_fetchAsync(ref, suppressEncryption) {
var streamManager = this.stream.manager;
var xref = this;
return new Promise(function tryFetch(resolve, reject) {
try {
resolve(xref.fetch(ref, suppressEncryption));
} catch (e) {
if (e instanceof MissingDataException) {
streamManager.requestRange(e.begin, e.end).then(function () {
tryFetch(resolve, reject);
}, reject);
return;
}
reject(e);
}
});
},
getCatalogObj: function XRef_getCatalogObj() {
2012-04-06 00:12:48 +09:00
return this.root;
2011-10-25 08:55:23 +09:00
}
};
2011-12-07 07:18:40 +09:00
return XRef;
2011-10-25 08:55:23 +09:00
})();
/**
2015-12-26 01:35:21 +09:00
* A NameTree/NumberTree is like a Dict but has some advantageous properties,
* see the specification (7.9.6 and 7.9.7) for additional details.
* TODO: implement all the Dict functions and make this more efficient.
*/
2015-12-26 01:35:21 +09:00
var NameOrNumberTree = (function NameOrNumberTreeClosure() {
function NameOrNumberTree(root, xref) {
throw new Error('Cannot initialize NameOrNumberTree.');
}
2015-12-26 01:35:21 +09:00
NameOrNumberTree.prototype = {
getAll: function NameOrNumberTree_getAll() {
var dict = Object.create(null);
if (!this.root) {
return dict;
}
var xref = this.xref;
2015-12-26 01:35:21 +09:00
// Reading Name/Number tree.
var processed = new RefSet();
processed.put(this.root);
var queue = [this.root];
while (queue.length > 0) {
var i, n;
2013-03-02 23:00:17 +09:00
var obj = xref.fetchIfRef(queue.shift());
if (!isDict(obj)) {
continue;
}
if (obj.has('Kids')) {
var kids = obj.get('Kids');
for (i = 0, n = kids.length; i < n; i++) {
var kid = kids[i];
2015-12-26 01:35:21 +09:00
assert(!processed.has(kid),
'Duplicate entry in "' + this._type + '" tree.');
queue.push(kid);
processed.put(kid);
}
continue;
}
2015-12-26 01:35:21 +09:00
var entries = obj.get(this._type);
if (isArray(entries)) {
for (i = 0, n = entries.length; i < n; i += 2) {
dict[xref.fetchIfRef(entries[i])] = xref.fetchIfRef(entries[i + 1]);
}
}
}
return dict;
},
2015-12-26 01:35:21 +09:00
get: function NameOrNumberTree_get(key) {
if (!this.root) {
return null;
}
var xref = this.xref;
2015-12-26 01:35:21 +09:00
var kidsOrEntries = xref.fetchIfRef(this.root);
var loopCount = 0;
2015-12-26 01:35:21 +09:00
var MAX_LEVELS = 10;
var l, r, m;
// Perform a binary search to quickly find the entry that
2015-12-26 01:35:21 +09:00
// contains the key we are looking for.
while (kidsOrEntries.has('Kids')) {
if (++loopCount > MAX_LEVELS) {
warn('Search depth limit reached for "' + this._type + '" tree.');
return null;
}
2015-02-03 00:12:52 +09:00
2015-12-26 01:35:21 +09:00
var kids = kidsOrEntries.get('Kids');
if (!isArray(kids)) {
return null;
}
l = 0;
r = kids.length - 1;
while (l <= r) {
m = (l + r) >> 1;
var kid = xref.fetchIfRef(kids[m]);
var limits = kid.get('Limits');
2015-12-26 01:35:21 +09:00
if (key < xref.fetchIfRef(limits[0])) {
r = m - 1;
2015-12-26 01:35:21 +09:00
} else if (key > xref.fetchIfRef(limits[1])) {
l = m + 1;
} else {
2015-12-26 01:35:21 +09:00
kidsOrEntries = xref.fetchIfRef(kids[m]);
break;
}
}
if (l > r) {
return null;
}
}
2015-12-26 01:35:21 +09:00
// If we get here, then we have found the right entry. Now go through the
// entries in the dictionary until we find the key we're looking for.
var entries = kidsOrEntries.get(this._type);
if (isArray(entries)) {
// Perform a binary search to reduce the lookup time.
l = 0;
2015-12-26 01:35:21 +09:00
r = entries.length - 2;
while (l <= r) {
// Check only even indices (0, 2, 4, ...) because the
2015-12-26 01:35:21 +09:00
// odd indices contain the actual data.
m = (l + r) & ~1;
2015-12-26 01:35:21 +09:00
var currentKey = xref.fetchIfRef(entries[m]);
if (key < currentKey) {
r = m - 2;
2015-12-26 01:35:21 +09:00
} else if (key > currentKey) {
l = m + 2;
} else {
2015-12-26 01:35:21 +09:00
return xref.fetchIfRef(entries[m + 1]);
}
}
}
return null;
}
};
2015-12-26 01:35:21 +09:00
return NameOrNumberTree;
})();
var NameTree = (function NameTreeClosure() {
function NameTree(root, xref) {
this.root = root;
this.xref = xref;
this._type = 'Names';
}
Util.inherit(NameTree, NameOrNumberTree, {});
return NameTree;
})();
2015-12-26 01:35:21 +09:00
var NumberTree = (function NumberTreeClosure() {
function NumberTree(root, xref) {
this.root = root;
this.xref = xref;
this._type = 'Nums';
}
Util.inherit(NumberTree, NameOrNumberTree, {});
return NumberTree;
})();
/**
2015-02-03 00:12:52 +09:00
* "A PDF file can refer to the contents of another file by using a File
* Specification (PDF 1.1)", see the spec (7.11) for more details.
* NOTE: Only embedded files are supported (as part of the attachments support)
2015-02-03 00:12:52 +09:00
* TODO: support the 'URL' file system (with caching if !/V), portable
* collections attributes and related files (/RF)
*/
var FileSpec = (function FileSpecClosure() {
function FileSpec(root, xref) {
if (!root || !isDict(root)) {
return;
}
this.xref = xref;
this.root = root;
if (root.has('FS')) {
this.fs = root.get('FS');
}
this.description = root.has('Desc') ?
stringToPDFString(root.get('Desc')) :
'';
if (root.has('RF')) {
warn('Related file specifications are not supported');
}
this.contentAvailable = true;
if (!root.has('EF')) {
this.contentAvailable = false;
warn('Non-embedded file specifications are not supported');
}
}
function pickPlatformItem(dict) {
// Look for the filename in this order:
// UF, F, Unix, Mac, DOS
if (dict.has('UF')) {
return dict.get('UF');
} else if (dict.has('F')) {
return dict.get('F');
} else if (dict.has('Unix')) {
return dict.get('Unix');
} else if (dict.has('Mac')) {
return dict.get('Mac');
} else if (dict.has('DOS')) {
return dict.get('DOS');
}
return null;
}
FileSpec.prototype = {
get filename() {
if (!this._filename && this.root) {
var filename = pickPlatformItem(this.root) || 'unnamed';
this._filename = stringToPDFString(filename).
replace(/\\\\/g, '\\').
replace(/\\\//g, '/').
replace(/\\/g, '/');
}
return this._filename;
},
get content() {
if (!this.contentAvailable) {
return null;
}
if (!this.contentRef && this.root) {
this.contentRef = pickPlatformItem(this.root.get('EF'));
}
var content = null;
if (this.contentRef) {
var xref = this.xref;
var fileObj = xref.fetchIfRef(this.contentRef);
if (fileObj && isStream(fileObj)) {
content = fileObj.getBytes();
} else {
warn('Embedded file specification points to non-existing/invalid ' +
'content');
}
} else {
warn('Embedded file specification does not have a content');
}
return content;
},
get serializable() {
return {
filename: this.filename,
content: this.content
};
}
};
return FileSpec;
})();
/**
* A helper for loading missing data in object graphs. It traverses the graph
* depth first and queues up any objects that have missing data. Once it has
* has traversed as many objects that are available it attempts to bundle the
* missing data requests and then resume from the nodes that weren't ready.
*
* NOTE: It provides protection from circular references by keeping track of
* of loaded references. However, you must be careful not to load any graphs
* that have references to the catalog or other pages since that will cause the
* entire PDF document object graph to be traversed.
*/
var ObjectLoader = (function() {
function mayHaveChildren(value) {
return isRef(value) || isDict(value) || isArray(value) || isStream(value);
}
function addChildren(node, nodesToVisit) {
2014-04-08 06:42:54 +09:00
var value;
if (isDict(node) || isStream(node)) {
var map;
if (isDict(node)) {
map = node.map;
} else {
map = node.dict.map;
}
for (var key in map) {
2014-04-08 06:42:54 +09:00
value = map[key];
if (mayHaveChildren(value)) {
nodesToVisit.push(value);
}
}
} else if (isArray(node)) {
for (var i = 0, ii = node.length; i < ii; i++) {
2014-04-08 06:42:54 +09:00
value = node[i];
if (mayHaveChildren(value)) {
nodesToVisit.push(value);
}
}
}
}
function ObjectLoader(obj, keys, xref) {
this.obj = obj;
this.keys = keys;
this.xref = xref;
this.refSet = null;
this.capability = null;
}
ObjectLoader.prototype = {
load: function ObjectLoader_load() {
var keys = this.keys;
this.capability = createPromiseCapability();
// Don't walk the graph if all the data is already loaded.
if (!(this.xref.stream instanceof ChunkedStream) ||
this.xref.stream.getMissingChunks().length === 0) {
this.capability.resolve();
return this.capability.promise;
}
this.refSet = new RefSet();
// Setup the initial nodes to visit.
var nodesToVisit = [];
for (var i = 0; i < keys.length; i++) {
nodesToVisit.push(this.obj[keys[i]]);
}
this._walk(nodesToVisit);
return this.capability.promise;
},
_walk: function ObjectLoader_walk(nodesToVisit) {
var nodesToRevisit = [];
var pendingRequests = [];
// DFS walk of the object graph.
while (nodesToVisit.length) {
var currentNode = nodesToVisit.pop();
// Only references or chunked streams can cause missing data exceptions.
if (isRef(currentNode)) {
// Skip nodes that have already been visited.
if (this.refSet.has(currentNode)) {
continue;
}
try {
var ref = currentNode;
this.refSet.put(ref);
currentNode = this.xref.fetch(currentNode);
} catch (e) {
if (!(e instanceof MissingDataException)) {
throw e;
}
nodesToRevisit.push(currentNode);
pendingRequests.push({ begin: e.begin, end: e.end });
}
}
if (currentNode && currentNode.getBaseStreams) {
var baseStreams = currentNode.getBaseStreams();
var foundMissingData = false;
for (var i = 0; i < baseStreams.length; i++) {
var stream = baseStreams[i];
if (stream.getMissingChunks && stream.getMissingChunks().length) {
foundMissingData = true;
pendingRequests.push({
begin: stream.start,
end: stream.end
});
}
}
if (foundMissingData) {
nodesToRevisit.push(currentNode);
}
}
addChildren(currentNode, nodesToVisit);
}
if (pendingRequests.length) {
this.xref.stream.manager.requestRanges(pendingRequests).then(
function pendingRequestCallback() {
nodesToVisit = nodesToRevisit;
for (var i = 0; i < nodesToRevisit.length; i++) {
var node = nodesToRevisit[i];
// Remove any reference nodes from the currrent refset so they
// aren't skipped when we revist them.
if (isRef(node)) {
this.refSet.remove(node);
}
}
this._walk(nodesToVisit);
}.bind(this), this.capability.reject);
return;
}
// Everything is loaded.
this.refSet = null;
this.capability.resolve();
}
};
return ObjectLoader;
})();
exports.Catalog = Catalog;
exports.ObjectLoader = ObjectLoader;
exports.XRef = XRef;
exports.FileSpec = FileSpec;
}));