Improve the Page.content and Page.getContentStream methods

First of all, by using `Dict.getArray` in the `Page.content` getter we remove the need to manually iterate through and fetch the sub-streams (when they exist) in the `Page.getContentStream` method.
Secondly, we can simplify the code in `Page.{getOperatorList, extractTextContent}` by letting `Page.getContentStream` ensure that `content` is available and returning a Promise instead.
This commit is contained in:
Jonas Jenwald 2021-05-14 09:59:24 +02:00
parent 70113131de
commit 4248f0745c

View File

@ -54,6 +54,7 @@ import {
} from "./core_utils.js"; } from "./core_utils.js";
import { NullStream, Stream } from "./stream.js"; import { NullStream, Stream } from "./stream.js";
import { AnnotationFactory } from "./annotation.js"; import { AnnotationFactory } from "./annotation.js";
import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./crypto.js"; import { calculateMD5 } from "./crypto.js";
import { Catalog } from "./catalog.js"; import { Catalog } from "./catalog.js";
import { Linearization } from "./parser.js"; import { Linearization } from "./parser.js";
@ -136,7 +137,7 @@ class Page {
} }
get content() { get content() {
return this.pageDict.get("Contents"); return this.pageDict.getArray("Contents");
} }
get resources() { get resources() {
@ -229,25 +230,20 @@ class Page {
return shadow(this, "rotate", rotate); return shadow(this, "rotate", rotate);
} }
/**
* @returns {Promise<BaseStream>}
*/
getContentStream() { getContentStream() {
const content = this.content; return this.pdfManager.ensure(this, "content").then(content => {
let stream; if (content instanceof BaseStream) {
return content;
if (Array.isArray(content)) { }
// Fetching the individual streams from the array. if (Array.isArray(content)) {
const xref = this.xref; return new StreamsSequenceStream(content);
const streams = [];
for (const subStream of content) {
streams.push(xref.fetchIfRef(subStream));
} }
stream = new StreamsSequenceStream(streams);
} else if (isStream(content)) {
stream = content;
} else {
// Replace non-existent page content with empty content. // Replace non-existent page content with empty content.
stream = new NullStream(); return new NullStream();
} });
return stream;
} }
get xfaData() { get xfaData() {
@ -313,10 +309,7 @@ class Page {
renderInteractiveForms, renderInteractiveForms,
annotationStorage, annotationStorage,
}) { }) {
const contentStreamPromise = this.pdfManager.ensure( const contentStreamPromise = this.getContentStream();
this,
"getContentStream"
);
const resourcesPromise = this.loadResources([ const resourcesPromise = this.loadResources([
"ColorSpace", "ColorSpace",
"ExtGState", "ExtGState",
@ -420,10 +413,7 @@ class Page {
sink, sink,
combineTextItems, combineTextItems,
}) { }) {
const contentStreamPromise = this.pdfManager.ensure( const contentStreamPromise = this.getContentStream();
this,
"getContentStream"
);
const resourcesPromise = this.loadResources([ const resourcesPromise = this.loadResources([
"ExtGState", "ExtGState",
"Font", "Font",