Improve the Page.content and Page.getContentStream methods

First of all, by using `Dict.getArray` in the `Page.content` getter we remove the need to manually iterate through and fetch the sub-streams (when they exist) in the `Page.getContentStream` method. Secondly, we can simplify the code in `Page.{getOperatorList, extractTextContent}` by letting `Page.getContentStream` ensure that `content` is available and returning a Promise instead.
2021-05-14 09:59:24 +02:00 · 2021-05-14 09:59:24 +02:00 · 4248f0745c
commit 4248f0745c
parent 70113131de
1 changed files with 15 additions and 25 deletions
--- a/src/core/document.js
+++ b/src/core/document.js
@ -54,6 +54,7 @@ import {
 } from "./core_utils.js";
 import { NullStream, Stream } from "./stream.js";
 import { AnnotationFactory } from "./annotation.js";
 import { BaseStream } from "./base_stream.js";
 import { calculateMD5 } from "./crypto.js";
 import { Catalog } from "./catalog.js";
 import { Linearization } from "./parser.js";
@ -136,7 +137,7 @@ class Page {
  }
  get content() {
-    return this.pageDict.get("Contents");
+    return this.pageDict.getArray("Contents");
  }
  get resources() {
@ -229,25 +230,20 @@ class Page {
    return shadow(this, "rotate", rotate);
  }
  /**
   * @returns {Promise<BaseStream>}
   */
  getContentStream() {
-    const content = this.content;
+    return this.pdfManager.ensure(this, "content").then(content => {
-    let stream;
+      if (content instanceof BaseStream) {
-
+        return content;
-    if (Array.isArray(content)) {
+      }
-      // Fetching the individual streams from the array.
+      if (Array.isArray(content)) {
-      const xref = this.xref;
+        return new StreamsSequenceStream(content);
      const streams = [];
      for (const subStream of content) {
        streams.push(xref.fetchIfRef(subStream));
      }
      stream = new StreamsSequenceStream(streams);
    } else if (isStream(content)) {
      stream = content;
    } else {
      // Replace non-existent page content with empty content.
-      stream = new NullStream();
+      return new NullStream();
-    }
+    });
    return stream;
  }
  get xfaData() {
@ -313,10 +309,7 @@ class Page {
    renderInteractiveForms,
    annotationStorage,
  }) {
-    const contentStreamPromise = this.pdfManager.ensure(
+    const contentStreamPromise = this.getContentStream();
      this,
      "getContentStream"
    );
    const resourcesPromise = this.loadResources([
      "ColorSpace",
      "ExtGState",
@ -420,10 +413,7 @@ class Page {
    sink,
    combineTextItems,
  }) {
-    const contentStreamPromise = this.pdfManager.ensure(
+    const contentStreamPromise = this.getContentStream();
      this,
      "getContentStream"
    );
    const resourcesPromise = this.loadResources([
      "ExtGState",
      "Font",