diff --git a/src/core/annotation.js b/src/core/annotation.js index f3c670ea0..3d6afebfb 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -34,7 +34,7 @@ import { Util, warn, } from "../shared/util.js"; -import { Catalog, FileSpec, ObjectLoader } from "./obj.js"; +import { Catalog, FileSpec } from "./obj.js"; import { collectActions, getInheritableProperty } from "./core_utils.js"; import { createDefaultAppearance, @@ -50,6 +50,7 @@ import { RefSet, } from "./primitives.js"; import { ColorSpace } from "./colorspace.js"; +import { ObjectLoader } from "./object_loader.js"; import { OperatorList } from "./operator_list.js"; import { StringStream } from "./stream.js"; import { writeDict } from "./writer.js"; diff --git a/src/core/document.js b/src/core/document.js index 4db6248be..c22395f3a 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -34,7 +34,7 @@ import { Util, warn, } from "../shared/util.js"; -import { Catalog, ObjectLoader, XRef } from "./obj.js"; +import { Catalog, XRef } from "./obj.js"; import { clearPrimitiveCaches, Dict, @@ -56,6 +56,7 @@ import { NullStream, Stream, StreamsSequenceStream } from "./stream.js"; import { AnnotationFactory } from "./annotation.js"; import { calculateMD5 } from "./crypto.js"; import { Linearization } from "./parser.js"; +import { ObjectLoader } from "./object_loader.js"; import { OperatorList } from "./operator_list.js"; import { PartialEvaluator } from "./evaluator.js"; import { StructTreePage } from "./struct_tree.js"; diff --git a/src/core/obj.js b/src/core/obj.js index c5b2c7b82..49e2bb353 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -2512,141 +2512,4 @@ var FileSpec = (function FileSpecClosure() { return FileSpec; })(); -/** - * A helper for loading missing data in `Dict` graphs. It traverses the graph - * depth first and queues up any objects that have missing data. Once it has - * has traversed as many objects that are available it attempts to bundle the - * missing data requests and then resume from the nodes that weren't ready. - * - * NOTE: It provides protection from circular references by keeping track of - * loaded references. However, you must be careful not to load any graphs - * that have references to the catalog or other pages since that will cause the - * entire PDF document object graph to be traversed. - */ -const ObjectLoader = (function () { - function mayHaveChildren(value) { - return ( - value instanceof Ref || - value instanceof Dict || - Array.isArray(value) || - isStream(value) - ); - } - - function addChildren(node, nodesToVisit) { - if (node instanceof Dict) { - node = node.getRawValues(); - } else if (isStream(node)) { - node = node.dict.getRawValues(); - } else if (!Array.isArray(node)) { - return; - } - for (const rawValue of node) { - if (mayHaveChildren(rawValue)) { - nodesToVisit.push(rawValue); - } - } - } - - // eslint-disable-next-line no-shadow - function ObjectLoader(dict, keys, xref) { - this.dict = dict; - this.keys = keys; - this.xref = xref; - this.refSet = null; - } - - ObjectLoader.prototype = { - async load() { - // Don't walk the graph if all the data is already loaded; note that only - // `ChunkedStream` instances have a `allChunksLoaded` method. - if ( - !this.xref.stream.allChunksLoaded || - this.xref.stream.allChunksLoaded() - ) { - return undefined; - } - - const { keys, dict } = this; - this.refSet = new RefSet(); - // Setup the initial nodes to visit. - const nodesToVisit = []; - for (let i = 0, ii = keys.length; i < ii; i++) { - const rawValue = dict.getRaw(keys[i]); - // Skip nodes that are guaranteed to be empty. - if (rawValue !== undefined) { - nodesToVisit.push(rawValue); - } - } - return this._walk(nodesToVisit); - }, - - async _walk(nodesToVisit) { - const nodesToRevisit = []; - const pendingRequests = []; - // DFS walk of the object graph. - while (nodesToVisit.length) { - let currentNode = nodesToVisit.pop(); - - // Only references or chunked streams can cause missing data exceptions. - if (currentNode instanceof Ref) { - // Skip nodes that have already been visited. - if (this.refSet.has(currentNode)) { - continue; - } - try { - this.refSet.put(currentNode); - currentNode = this.xref.fetch(currentNode); - } catch (ex) { - if (!(ex instanceof MissingDataException)) { - warn(`ObjectLoader._walk - requesting all data: "${ex}".`); - this.refSet = null; - - const { manager } = this.xref.stream; - return manager.requestAllChunks(); - } - nodesToRevisit.push(currentNode); - pendingRequests.push({ begin: ex.begin, end: ex.end }); - } - } - if (currentNode && currentNode.getBaseStreams) { - const baseStreams = currentNode.getBaseStreams(); - let foundMissingData = false; - for (let i = 0, ii = baseStreams.length; i < ii; i++) { - const stream = baseStreams[i]; - if (stream.allChunksLoaded && !stream.allChunksLoaded()) { - foundMissingData = true; - pendingRequests.push({ begin: stream.start, end: stream.end }); - } - } - if (foundMissingData) { - nodesToRevisit.push(currentNode); - } - } - - addChildren(currentNode, nodesToVisit); - } - - if (pendingRequests.length) { - await this.xref.stream.manager.requestRanges(pendingRequests); - - for (let i = 0, ii = nodesToRevisit.length; i < ii; i++) { - const node = nodesToRevisit[i]; - // Remove any reference nodes from the current `RefSet` so they - // aren't skipped when we revist them. - if (node instanceof Ref) { - this.refSet.remove(node); - } - } - return this._walk(nodesToRevisit); - } - // Everything is loaded. - this.refSet = null; - return undefined; - }, - }; - - return ObjectLoader; -})(); - -export { Catalog, FileSpec, NumberTree, ObjectLoader, XRef }; +export { Catalog, FileSpec, NumberTree, XRef }; diff --git a/src/core/object_loader.js b/src/core/object_loader.js new file mode 100644 index 000000000..f2bff6ac0 --- /dev/null +++ b/src/core/object_loader.js @@ -0,0 +1,157 @@ +/* Copyright 2021 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { Dict, isStream, Ref, RefSet } from "./primitives.js"; +import { MissingDataException } from "./core_utils.js"; +import { warn } from "../shared/util.js"; + +/** + * A helper for loading missing data in `Dict` graphs. It traverses the graph + * depth first and queues up any objects that have missing data. Once it has + * has traversed as many objects that are available it attempts to bundle the + * missing data requests and then resume from the nodes that weren't ready. + * + * NOTE: It provides protection from circular references by keeping track of + * loaded references. However, you must be careful not to load any graphs + * that have references to the catalog or other pages since that will cause the + * entire PDF document object graph to be traversed. + */ +const ObjectLoader = (function () { + function mayHaveChildren(value) { + return ( + value instanceof Ref || + value instanceof Dict || + Array.isArray(value) || + isStream(value) + ); + } + + function addChildren(node, nodesToVisit) { + if (node instanceof Dict) { + node = node.getRawValues(); + } else if (isStream(node)) { + node = node.dict.getRawValues(); + } else if (!Array.isArray(node)) { + return; + } + for (const rawValue of node) { + if (mayHaveChildren(rawValue)) { + nodesToVisit.push(rawValue); + } + } + } + + // eslint-disable-next-line no-shadow + function ObjectLoader(dict, keys, xref) { + this.dict = dict; + this.keys = keys; + this.xref = xref; + this.refSet = null; + } + + ObjectLoader.prototype = { + async load() { + // Don't walk the graph if all the data is already loaded; note that only + // `ChunkedStream` instances have a `allChunksLoaded` method. + if ( + !this.xref.stream.allChunksLoaded || + this.xref.stream.allChunksLoaded() + ) { + return undefined; + } + + const { keys, dict } = this; + this.refSet = new RefSet(); + // Setup the initial nodes to visit. + const nodesToVisit = []; + for (let i = 0, ii = keys.length; i < ii; i++) { + const rawValue = dict.getRaw(keys[i]); + // Skip nodes that are guaranteed to be empty. + if (rawValue !== undefined) { + nodesToVisit.push(rawValue); + } + } + return this._walk(nodesToVisit); + }, + + async _walk(nodesToVisit) { + const nodesToRevisit = []; + const pendingRequests = []; + // DFS walk of the object graph. + while (nodesToVisit.length) { + let currentNode = nodesToVisit.pop(); + + // Only references or chunked streams can cause missing data exceptions. + if (currentNode instanceof Ref) { + // Skip nodes that have already been visited. + if (this.refSet.has(currentNode)) { + continue; + } + try { + this.refSet.put(currentNode); + currentNode = this.xref.fetch(currentNode); + } catch (ex) { + if (!(ex instanceof MissingDataException)) { + warn(`ObjectLoader._walk - requesting all data: "${ex}".`); + this.refSet = null; + + const { manager } = this.xref.stream; + return manager.requestAllChunks(); + } + nodesToRevisit.push(currentNode); + pendingRequests.push({ begin: ex.begin, end: ex.end }); + } + } + if (currentNode && currentNode.getBaseStreams) { + const baseStreams = currentNode.getBaseStreams(); + let foundMissingData = false; + for (let i = 0, ii = baseStreams.length; i < ii; i++) { + const stream = baseStreams[i]; + if (stream.allChunksLoaded && !stream.allChunksLoaded()) { + foundMissingData = true; + pendingRequests.push({ begin: stream.start, end: stream.end }); + } + } + if (foundMissingData) { + nodesToRevisit.push(currentNode); + } + } + + addChildren(currentNode, nodesToVisit); + } + + if (pendingRequests.length) { + await this.xref.stream.manager.requestRanges(pendingRequests); + + for (let i = 0, ii = nodesToRevisit.length; i < ii; i++) { + const node = nodesToRevisit[i]; + // Remove any reference nodes from the current `RefSet` so they + // aren't skipped when we revist them. + if (node instanceof Ref) { + this.refSet.remove(node); + } + } + return this._walk(nodesToRevisit); + } + // Everything is loaded. + this.refSet = null; + return undefined; + }, + }; + + return ObjectLoader; +})(); + +export { ObjectLoader };