Improve the Page.content and Page.getContentStream methods

First of all, by using `Dict.getArray` in the `Page.content` getter we remove the need to manually iterate through and fetch the sub-streams (when they exist) in the `Page.getContentStream` method.
Secondly, we can simplify the code in `Page.{getOperatorList, extractTextContent}` by letting `Page.getContentStream` ensure that `content` is available and returning a Promise instead.
This commit is contained in:
Jonas Jenwald 2021-05-14 09:59:24 +02:00
parent 70113131de
commit 4248f0745c

View File

@ -54,6 +54,7 @@ import {
} from "./core_utils.js";
import { NullStream, Stream } from "./stream.js";
import { AnnotationFactory } from "./annotation.js";
import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./crypto.js";
import { Catalog } from "./catalog.js";
import { Linearization } from "./parser.js";
@ -136,7 +137,7 @@ class Page {
}
get content() {
return this.pageDict.get("Contents");
return this.pageDict.getArray("Contents");
}
get resources() {
@ -229,25 +230,20 @@ class Page {
return shadow(this, "rotate", rotate);
}
/**
* @returns {Promise<BaseStream>}
*/
getContentStream() {
const content = this.content;
let stream;
if (Array.isArray(content)) {
// Fetching the individual streams from the array.
const xref = this.xref;
const streams = [];
for (const subStream of content) {
streams.push(xref.fetchIfRef(subStream));
return this.pdfManager.ensure(this, "content").then(content => {
if (content instanceof BaseStream) {
return content;
}
if (Array.isArray(content)) {
return new StreamsSequenceStream(content);
}
stream = new StreamsSequenceStream(streams);
} else if (isStream(content)) {
stream = content;
} else {
// Replace non-existent page content with empty content.
stream = new NullStream();
}
return stream;
return new NullStream();
});
}
get xfaData() {
@ -313,10 +309,7 @@ class Page {
renderInteractiveForms,
annotationStorage,
}) {
const contentStreamPromise = this.pdfManager.ensure(
this,
"getContentStream"
);
const contentStreamPromise = this.getContentStream();
const resourcesPromise = this.loadResources([
"ColorSpace",
"ExtGState",
@ -420,10 +413,7 @@ class Page {
sink,
combineTextItems,
}) {
const contentStreamPromise = this.pdfManager.ensure(
this,
"getContentStream"
);
const contentStreamPromise = this.getContentStream();
const resourcesPromise = this.loadResources([
"ExtGState",
"Font",