Use the Linearization dictionary, if it exists, when fetching the first Page

Since PDF.js already supports range requests and streaming, not to mention chunked rendering, attempting to use the `Linearization` dictionary in `PDFDocument.getPage` probably isn't going to improve performance in any noticeable way.
Nonetheless, when `Linearization` data is available, it will allow looking up the first Page *directly* without having to descend into the `Pages` tree to find the correct object.
This commit is contained in:
Jonas Jenwald 2018-07-25 20:50:25 +02:00
parent fbb25ff4e2
commit ec3728b540

View File

@ -13,12 +13,13 @@
* limitations under the License. * limitations under the License.
*/ */
import { Catalog, ObjectLoader, XRef } from './obj';
import { Dict, isDict, isName, isStream } from './primitives';
import { import {
getInheritableProperty, info, isArrayBuffer, isNum, isSpace, isString, assert, FormatError, getInheritableProperty, info, isArrayBuffer, isNum,
MissingDataException, OPS, shadow, stringToBytes, stringToPDFString, Util isSpace, isString, MissingDataException, OPS, shadow, stringToBytes,
stringToPDFString, Util
} from '../shared/util'; } from '../shared/util';
import { Catalog, ObjectLoader, XRef } from './obj';
import { Dict, isDict, isName, isStream, Ref } from './primitives';
import { NullStream, Stream, StreamsSequenceStream } from './stream'; import { NullStream, Stream, StreamsSequenceStream } from './stream';
import { AnnotationFactory } from './annotation'; import { AnnotationFactory } from './annotation';
import { calculateMD5 } from './crypto'; import { calculateMD5 } from './crypto';
@ -586,25 +587,49 @@ var PDFDocument = (function PDFDocumentClosure() {
return shadow(this, 'fingerprint', fileID); return shadow(this, 'fingerprint', fileID);
}, },
_getLinearizationPage(pageIndex) {
const { catalog, linearization, } = this;
assert(linearization && linearization.pageFirst === pageIndex);
const ref = new Ref(linearization.objectNumberFirst, 0);
return this.xref.fetchAsync(ref).then((obj) => {
// Ensure that the object that was found is actually a Page dictionary.
if (isDict(obj, 'Page') ||
(isDict(obj) && !obj.has('Type') && obj.has('Contents'))) {
if (ref && !catalog.pageKidsCountCache.has(ref)) {
catalog.pageKidsCountCache.put(ref, 1); // Cache the Page reference.
}
return [obj, ref];
}
throw new FormatError('The Linearization dictionary doesn\'t point ' +
'to a valid Page dictionary.');
}).catch((reason) => {
info(reason);
return catalog.getPageDict(pageIndex);
});
},
getPage(pageIndex) { getPage(pageIndex) {
if (this._pagePromises[pageIndex] !== undefined) { if (this._pagePromises[pageIndex] !== undefined) {
return this._pagePromises[pageIndex]; return this._pagePromises[pageIndex];
} }
const catalog = this.catalog; const { catalog, linearization, } = this;
return this._pagePromises[pageIndex] = const promise = (linearization && linearization.pageFirst === pageIndex) ?
catalog.getPageDict(pageIndex).then(([pageDict, ref]) => { this._getLinearizationPage(pageIndex) : catalog.getPageDict(pageIndex);
return new Page({
pdfManager: this.pdfManager, return this._pagePromises[pageIndex] = promise.then(([pageDict, ref]) => {
xref: this.xref, return new Page({
pageIndex, pdfManager: this.pdfManager,
pageDict, xref: this.xref,
ref, pageIndex,
fontCache: catalog.fontCache, pageDict,
builtInCMapCache: catalog.builtInCMapCache, ref,
pdfFunctionFactory: this.pdfFunctionFactory, fontCache: catalog.fontCache,
}); builtInCMapCache: catalog.builtInCMapCache,
pdfFunctionFactory: this.pdfFunctionFactory,
}); });
});
}, },
cleanup: function PDFDocument_cleanup() { cleanup: function PDFDocument_cleanup() {