pdf.js/src/core/evaluator.js

/* Copyright 2012 Mozilla Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/* eslint-disable no-var */

import {
  AbortException,
  assert,
  CMapCompressionType,
  createPromiseCapability,
  FONT_IDENTITY_MATRIX,
  FormatError,
  IDENTITY_MATRIX,
  info,
  isArrayEqual,
  OPS,
  shadow,
  stringToPDFString,
  TextRenderingMode,
  UNSUPPORTED_FEATURES,
  Util,
  warn,
} from "../shared/util.js";
import { CMapFactory, IdentityCMap } from "./cmap.js";
import { Cmd, Dict, EOF, isName, Name, Ref, RefSet } from "./primitives.js";
import { ErrorFont, Font } from "./fonts.js";
import { FontFlags, getFontType } from "./fonts_utils.js";
import {
  getEncoding,
  MacRomanEncoding,
  StandardEncoding,
  SymbolSetEncoding,
  WinAnsiEncoding,
  ZapfDingbatsEncoding,
} from "./encodings.js";
import {
  getFontNameToFileMap,
  getSerifFonts,
  getStandardFontName,
  getStdFontMap,
  getSymbolsFonts,
} from "./standard_fonts.js";
import {
  getNormalizedUnicodes,
  getUnicodeForGlyph,
  reverseIfRtl,
} from "./unicode.js";
import { getTilingPatternIR, Pattern } from "./pattern.js";
import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
import { IdentityToUnicodeMap, ToUnicodeMap } from "./to_unicode_map.js";
import { isPDFFunction, PDFFunctionFactory } from "./function.js";
import { Lexer, Parser } from "./parser.js";
import {
  LocalColorSpaceCache,
  LocalGStateCache,
  LocalImageCache,
  LocalTilingPatternCache,
} from "./image_utils.js";
import { NullStream, Stream } from "./stream.js";
import { BaseStream } from "./base_stream.js";
import { bidi } from "./bidi.js";
import { ColorSpace } from "./colorspace.js";
import { DecodeStream } from "./decode_stream.js";
import { getGlyphsUnicode } from "./glyphlist.js";
import { getLookupTableFactory } from "./core_utils.js";
import { getMetrics } from "./metrics.js";
import { MurmurHash3_64 } from "./murmurhash3.js";
import { OperatorList } from "./operator_list.js";
import { PDFImage } from "./image.js";

const DefaultPartialEvaluatorOptions = Object.freeze({
  maxImageSize: -1,
  disableFontFace: false,
  ignoreErrors: false,
  isEvalSupported: true,
  fontExtraProperties: false,
  useSystemFonts: true,
  cMapUrl: null,
  standardFontDataUrl: null,
});

const PatternType = {
  TILING: 1,
  SHADING: 2,
};

// Optionally avoid sending individual, or very few, text chunks to reduce
// `postMessage` overhead with ReadableStream (see issue 13962).
//
// PLEASE NOTE: This value should *not* be too large (it's used as a lower limit
// in `enqueueChunk`), since that would cause streaming of textContent to become
// essentially useless in practice by sending all (or most) chunks at once.
// Also, a too large value would (indirectly) affect the main-thread `textLayer`
// building negatively by forcing all textContent to be handled at once, which
// could easily end up hurting *overall* performance (e.g. rendering as well).
const TEXT_CHUNK_BATCH_SIZE = 10;

const deferred = Promise.resolve();

// Convert PDF blend mode names to HTML5 blend mode names.
function normalizeBlendMode(value, parsingArray = false) {
  if (Array.isArray(value)) {
    // Use the first *supported* BM value in the Array (fixes issue11279.pdf).
    for (let i = 0, ii = value.length; i < ii; i++) {
      const maybeBM = normalizeBlendMode(value[i], /* parsingArray = */ true);
      if (maybeBM) {
        return maybeBM;
      }
    }
    warn(`Unsupported blend mode Array: ${value}`);
    return "source-over";
  }

  if (!(value instanceof Name)) {
    if (parsingArray) {
      return null;
    }
    return "source-over";
  }
  switch (value.name) {
    case "Normal":
    case "Compatible":
      return "source-over";
    case "Multiply":
      return "multiply";
    case "Screen":
      return "screen";
    case "Overlay":
      return "overlay";
    case "Darken":
      return "darken";
    case "Lighten":
      return "lighten";
    case "ColorDodge":
      return "color-dodge";
    case "ColorBurn":
      return "color-burn";
    case "HardLight":
      return "hard-light";
    case "SoftLight":
      return "soft-light";
    case "Difference":
      return "difference";
    case "Exclusion":
      return "exclusion";
    case "Hue":
      return "hue";
    case "Saturation":
      return "saturation";
    case "Color":
      return "color";
    case "Luminosity":
      return "luminosity";
  }
  if (parsingArray) {
    return null;
  }
  warn(`Unsupported blend mode: ${value.name}`);
  return "source-over";
}

// Trying to minimize Date.now() usage and check every 100 time.
class TimeSlotManager {
  static get TIME_SLOT_DURATION_MS() {
    return shadow(this, "TIME_SLOT_DURATION_MS", 20);
  }

  static get CHECK_TIME_EVERY() {
    return shadow(this, "CHECK_TIME_EVERY", 100);
  }

  constructor() {
    this.reset();
  }

  check() {
    if (++this.checked < TimeSlotManager.CHECK_TIME_EVERY) {
      return false;
    }
    this.checked = 0;
    return this.endTime <= Date.now();
  }

  reset() {
    this.endTime = Date.now() + TimeSlotManager.TIME_SLOT_DURATION_MS;
    this.checked = 0;
  }
}

class PartialEvaluator {
  constructor({
    xref,
    handler,
    pageIndex,
    idFactory,
    fontCache,
    builtInCMapCache,
    standardFontDataCache,
    globalImageCache,
    options = null,
  }) {
    this.xref = xref;
    this.handler = handler;
    this.pageIndex = pageIndex;
    this.idFactory = idFactory;
    this.fontCache = fontCache;
    this.builtInCMapCache = builtInCMapCache;
    this.standardFontDataCache = standardFontDataCache;
    this.globalImageCache = globalImageCache;
    this.options = options || DefaultPartialEvaluatorOptions;
    this.parsingType3Font = false;

    this._fetchBuiltInCMapBound = this.fetchBuiltInCMap.bind(this);
  }

  /**
   * Since Functions are only cached (locally) by reference, we can share one
   * `PDFFunctionFactory` instance within this `PartialEvaluator` instance.
   */
  get _pdfFunctionFactory() {
    const pdfFunctionFactory = new PDFFunctionFactory({
      xref: this.xref,
      isEvalSupported: this.options.isEvalSupported,
    });
    return shadow(this, "_pdfFunctionFactory", pdfFunctionFactory);
  }

  clone(newOptions = null) {
    const newEvaluator = Object.create(this);
    newEvaluator.options = Object.assign(
      Object.create(null),
      this.options,
      newOptions
    );
    return newEvaluator;
  }

  hasBlendModes(resources, nonBlendModesSet) {
    if (!(resources instanceof Dict)) {
      return false;
    }
    if (resources.objId && nonBlendModesSet.has(resources.objId)) {
      return false;
    }

    const processed = new RefSet(nonBlendModesSet);
    if (resources.objId) {
      processed.put(resources.objId);
    }

    const nodes = [resources],
      xref = this.xref;
    while (nodes.length) {
      const node = nodes.shift();
      // First check the current resources for blend modes.
      const graphicStates = node.get("ExtGState");
      if (graphicStates instanceof Dict) {
        for (let graphicState of graphicStates.getRawValues()) {
          if (graphicState instanceof Ref) {
            if (processed.has(graphicState)) {
              continue; // The ExtGState has already been processed.
            }
            try {
              graphicState = xref.fetch(graphicState);
            } catch (ex) {
              // Avoid parsing a corrupt ExtGState more than once.
              processed.put(graphicState);

              info(`hasBlendModes - ignoring ExtGState: "${ex}".`);
              continue;
            }
          }
          if (!(graphicState instanceof Dict)) {
            continue;
          }
          if (graphicState.objId) {
            processed.put(graphicState.objId);
          }

          const bm = graphicState.get("BM");
          if (bm instanceof Name) {
            if (bm.name !== "Normal") {
              return true;
            }
            continue;
          }
          if (bm !== undefined && Array.isArray(bm)) {
            for (const element of bm) {
              if (element instanceof Name && element.name !== "Normal") {
                return true;
              }
            }
          }
        }
      }
      // Descend into the XObjects to look for more resources and blend modes.
      const xObjects = node.get("XObject");
      if (!(xObjects instanceof Dict)) {
        continue;
      }
      for (let xObject of xObjects.getRawValues()) {
        if (xObject instanceof Ref) {
          if (processed.has(xObject)) {
            // The XObject has already been processed, and by avoiding a
            // redundant `xref.fetch` we can *significantly* reduce the load
            // time for badly generated PDF files (fixes issue6961.pdf).
            continue;
          }
          try {
            xObject = xref.fetch(xObject);
          } catch (ex) {
            // Avoid parsing a corrupt XObject more than once.
            processed.put(xObject);

            info(`hasBlendModes - ignoring XObject: "${ex}".`);
            continue;
          }
        }
        if (!(xObject instanceof BaseStream)) {
          continue;
        }
        if (xObject.dict.objId) {
          processed.put(xObject.dict.objId);
        }
        const xResources = xObject.dict.get("Resources");
        if (!(xResources instanceof Dict)) {
          continue;
        }
        // Checking objId to detect an infinite loop.
        if (xResources.objId && processed.has(xResources.objId)) {
          continue;
        }

        nodes.push(xResources);
        if (xResources.objId) {
          processed.put(xResources.objId);
        }
      }
    }

    // When no blend modes exist, there's no need re-fetch/re-parse any of the
    // processed `Ref`s again for subsequent pages. This helps reduce redundant
    // `XRef.fetch` calls for some documents (e.g. issue6961.pdf).
    for (const ref of processed) {
      nonBlendModesSet.put(ref);
    }
    return false;
  }

  async fetchBuiltInCMap(name) {
    const cachedData = this.builtInCMapCache.get(name);
    if (cachedData) {
      return cachedData;
    }
    let data;

    if (this.options.cMapUrl !== null) {
      // Only compressed CMaps are (currently) supported here.
      const url = `${this.options.cMapUrl}${name}.bcmap`;
      const response = await fetch(url);
      if (!response.ok) {
        throw new Error(
          `fetchBuiltInCMap: failed to fetch file "${url}" with "${response.statusText}".`
        );
      }
      data = {
        cMapData: new Uint8Array(await response.arrayBuffer()),
        compressionType: CMapCompressionType.BINARY,
      };
    } else {
      // Get the data on the main-thread instead.
      data = await this.handler.sendWithPromise("FetchBuiltInCMap", { name });
    }

    if (data.compressionType !== CMapCompressionType.NONE) {
      // Given the size of uncompressed CMaps, only cache compressed ones.
      this.builtInCMapCache.set(name, data);
    }
    return data;
  }

  async fetchStandardFontData(name) {
    const cachedData = this.standardFontDataCache.get(name);
    if (cachedData) {
      return new Stream(cachedData);
    }

    // The symbol fonts are not consistent across platforms, always load the
    // standard font data for them.
    if (
      this.options.useSystemFonts &&
      name !== "Symbol" &&
      name !== "ZapfDingbats"
    ) {
      return null;
    }

    const standardFontNameToFileName = getFontNameToFileMap(),
      filename = standardFontNameToFileName[name];
    let data;

    if (this.options.standardFontDataUrl !== null) {
      const url = `${this.options.standardFontDataUrl}${filename}`;
      const response = await fetch(url);
      if (!response.ok) {
        warn(
          `fetchStandardFontData: failed to fetch file "${url}" with "${response.statusText}".`
        );
      } else {
        data = await response.arrayBuffer();
      }
    } else {
      // Get the data on the main-thread instead.
      try {
        data = await this.handler.sendWithPromise("FetchStandardFontData", {
          filename,
        });
      } catch (e) {
        warn(
          `fetchStandardFontData: failed to fetch file "${filename}" with "${e}".`
        );
      }
    }

    if (!data) {
      return null;
    }
    // Cache the "raw" standard font data, to avoid fetching it repeateadly
    // (see e.g. issue 11399).
    this.standardFontDataCache.set(name, data);

    return new Stream(data);
  }

  async buildFormXObject(
    resources,
    xobj,
    smask,
    operatorList,
    task,
    initialState,
    localColorSpaceCache
  ) {
    const dict = xobj.dict;
    const matrix = dict.getArray("Matrix");
    let bbox = dict.getArray("BBox");
    if (Array.isArray(bbox) && bbox.length === 4) {
      bbox = Util.normalizeRect(bbox);
    } else {
      bbox = null;
    }

    let optionalContent, groupOptions;
    if (dict.has("OC")) {
      optionalContent = await this.parseMarkedContentProps(
        dict.get("OC"),
        resources
      );
    }
    if (optionalContent !== undefined) {
      operatorList.addOp(OPS.beginMarkedContentProps, ["OC", optionalContent]);
    }
    const group = dict.get("Group");
    if (group) {
      groupOptions = {
        matrix,
        bbox,
        smask,
        isolated: false,
        knockout: false,
      };

      const groupSubtype = group.get("S");
      let colorSpace = null;
      if (isName(groupSubtype, "Transparency")) {
        groupOptions.isolated = group.get("I") || false;
        groupOptions.knockout = group.get("K") || false;
        if (group.has("CS")) {
          const cs = group.getRaw("CS");

          const cachedColorSpace = ColorSpace.getCached(
            cs,
            this.xref,
            localColorSpaceCache
          );
          if (cachedColorSpace) {
            colorSpace = cachedColorSpace;
          } else {
            colorSpace = await this.parseColorSpace({
              cs,
              resources,
              localColorSpaceCache,
            });
          }
        }
      }

      if (smask && smask.backdrop) {
        colorSpace = colorSpace || ColorSpace.singletons.rgb;
        smask.backdrop = colorSpace.getRgb(smask.backdrop, 0);
      }

      operatorList.addOp(OPS.beginGroup, [groupOptions]);
    }

    // If it's a group, a new canvas will be created that is the size of the
    // bounding box and translated to the correct position so we don't need to
    // apply the bounding box to it.
    const args = group ? [matrix, null] : [matrix, bbox];
    operatorList.addOp(OPS.paintFormXObjectBegin, args);

    return this.getOperatorList({
      stream: xobj,
      task,
      resources: dict.get("Resources") || resources,
      operatorList,
      initialState,
    }).then(function () {
      operatorList.addOp(OPS.paintFormXObjectEnd, []);

      if (group) {
        operatorList.addOp(OPS.endGroup, [groupOptions]);
      }

      if (optionalContent !== undefined) {
        operatorList.addOp(OPS.endMarkedContent, []);
      }
    });
  }

  _sendImgData(objId, imgData, cacheGlobally = false) {
    const transfers = imgData ? [imgData.data.buffer] : null;

    if (this.parsingType3Font || cacheGlobally) {
      return this.handler.send(
        "commonobj",
        [objId, "Image", imgData],
        transfers
      );
    }
    return this.handler.send(
      "obj",
      [objId, this.pageIndex, "Image", imgData],
      transfers
    );
  }

  async buildPaintImageXObject({
    resources,
    image,
    isInline = false,
    operatorList,
    cacheKey,
    localImageCache,
    localColorSpaceCache,
  }) {
    const dict = image.dict;
    const imageRef = dict.objId;
    const w = dict.get("W", "Width");
    const h = dict.get("H", "Height");

    if (!(w && typeof w === "number") || !(h && typeof h === "number")) {
      warn("Image dimensions are missing, or not numbers.");
      return;
    }
    const maxImageSize = this.options.maxImageSize;
    if (maxImageSize !== -1 && w * h > maxImageSize) {
      const msg = "Image exceeded maximum allowed size and was removed.";

      if (this.options.ignoreErrors) {
        warn(msg);
        return;
      }
      throw new Error(msg);
    }

    let optionalContent;
    if (dict.has("OC")) {
      optionalContent = await this.parseMarkedContentProps(
        dict.get("OC"),
        resources
      );
    }
    if (optionalContent !== undefined) {
      operatorList.addOp(OPS.beginMarkedContentProps, ["OC", optionalContent]);
    }

    const imageMask = dict.get("IM", "ImageMask") || false;
    const interpolate = dict.get("I", "Interpolate");
    let imgData, args;
    if (imageMask) {
      // This depends on a tmpCanvas being filled with the
      // current fillStyle, such that processing the pixel
      // data can't be done here. Instead of creating a
      // complete PDFImage, only read the information needed
      // for later.
      const bitStrideLength = (w + 7) >> 3;
      const imgArray = image.getBytes(
        bitStrideLength * h,
        /* forceClamped = */ true
      );
      const decode = dict.getArray("D", "Decode");

      imgData = PDFImage.createMask({
        imgArray,
        width: w,
        height: h,
        imageIsFromDecodeStream: image instanceof DecodeStream,
        inverseDecode: !!decode && decode[0] > 0,
        interpolate,
      });
      imgData.cached = !!cacheKey;
      args = [imgData];

      operatorList.addOp(OPS.paintImageMaskXObject, args);
      if (cacheKey) {
        localImageCache.set(cacheKey, imageRef, {
          fn: OPS.paintImageMaskXObject,
          args,
        });
      }

      if (optionalContent !== undefined) {
        operatorList.addOp(OPS.endMarkedContent, []);
      }
      return;
    }

    const softMask = dict.get("SM", "SMask") || false;
    const mask = dict.get("Mask") || false;

    const SMALL_IMAGE_DIMENSIONS = 200;
    // Inlining small images into the queue as RGB data
    if (isInline && !softMask && !mask && w + h < SMALL_IMAGE_DIMENSIONS) {
      const imageObj = new PDFImage({
        xref: this.xref,
        res: resources,
        image,
        isInline,
        pdfFunctionFactory: this._pdfFunctionFactory,
        localColorSpaceCache,
      });
      // We force the use of RGBA_32BPP images here, because we can't handle
      // any other kind.
      imgData = imageObj.createImageData(/* forceRGBA = */ true);
      operatorList.addOp(OPS.paintInlineImageXObject, [imgData]);

      if (optionalContent !== undefined) {
        operatorList.addOp(OPS.endMarkedContent, []);
      }
      return;
    }

    // If there is no imageMask, create the PDFImage and a lot
    // of image processing can be done here.
    let objId = `img_${this.idFactory.createObjId()}`,
      cacheGlobally = false;

    if (this.parsingType3Font) {
      objId = `${this.idFactory.getDocId()}_type3_${objId}`;
    } else if (imageRef) {
      cacheGlobally = this.globalImageCache.shouldCache(
        imageRef,
        this.pageIndex
      );

      if (cacheGlobally) {
        objId = `${this.idFactory.getDocId()}_${objId}`;
      }
    }

    // Ensure that the dependency is added before the image is decoded.
    operatorList.addDependency(objId);
    args = [objId, w, h];

    PDFImage.buildImage({
      xref: this.xref,
      res: resources,
      image,
      isInline,
      pdfFunctionFactory: this._pdfFunctionFactory,
      localColorSpaceCache,
    })
      .then(imageObj => {
        imgData = imageObj.createImageData(/* forceRGBA = */ false);

        if (cacheKey && imageRef && cacheGlobally) {
          this.globalImageCache.addByteSize(imageRef, imgData.data.length);
        }
        return this._sendImgData(objId, imgData, cacheGlobally);
      })
      .catch(reason => {
        warn(`Unable to decode image "${objId}": "${reason}".`);

        return this._sendImgData(objId, /* imgData = */ null, cacheGlobally);
      });

    operatorList.addOp(OPS.paintImageXObject, args);
    if (cacheKey) {
      localImageCache.set(cacheKey, imageRef, {
        fn: OPS.paintImageXObject,
        args,
      });

      if (imageRef) {
        assert(!isInline, "Cannot cache an inline image globally.");
        this.globalImageCache.addPageIndex(imageRef, this.pageIndex);

        if (cacheGlobally) {
          this.globalImageCache.setData(imageRef, {
            objId,
            fn: OPS.paintImageXObject,
            args,
            byteSize: 0, // Temporary entry, note `addByteSize` above.
          });
        }
      }
    }

    if (optionalContent !== undefined) {
      operatorList.addOp(OPS.endMarkedContent, []);
    }
  }

  handleSMask(
    smask,
    resources,
    operatorList,
    task,
    stateManager,
    localColorSpaceCache
  ) {
    const smaskContent = smask.get("G");
    const smaskOptions = {
      subtype: smask.get("S").name,
      backdrop: smask.get("BC"),
    };

    // The SMask might have a alpha/luminosity value transfer function --
    // we will build a map of integer values in range 0..255 to be fast.
    const transferObj = smask.get("TR");
    if (isPDFFunction(transferObj)) {
      const transferFn = this._pdfFunctionFactory.create(transferObj);
      const transferMap = new Uint8Array(256);
      const tmp = new Float32Array(1);
      for (let i = 0; i < 256; i++) {
        tmp[0] = i / 255;
        transferFn(tmp, 0, tmp, 0);
        transferMap[i] = (tmp[0] * 255) | 0;
      }
      smaskOptions.transferMap = transferMap;
    }

    return this.buildFormXObject(
      resources,
      smaskContent,
      smaskOptions,
      operatorList,
      task,
      stateManager.state.clone(),
      localColorSpaceCache
    );
  }

  handleTransferFunction(tr) {
    let transferArray;
    if (Array.isArray(tr)) {
      transferArray = tr;
    } else if (isPDFFunction(tr)) {
      transferArray = [tr];
    } else {
      return null; // Not a valid transfer function entry.
    }

    const transferMaps = [];
    let numFns = 0,
      numEffectfulFns = 0;
    for (const entry of transferArray) {
      const transferObj = this.xref.fetchIfRef(entry);
      numFns++;

      if (isName(transferObj, "Identity")) {
        transferMaps.push(null);
        continue;
      } else if (!isPDFFunction(transferObj)) {
        return null; // Not a valid transfer function object.
      }

      const transferFn = this._pdfFunctionFactory.create(transferObj);
      const transferMap = new Uint8Array(256),
        tmp = new Float32Array(1);
      for (let j = 0; j < 256; j++) {
        tmp[0] = j / 255;
        transferFn(tmp, 0, tmp, 0);
        transferMap[j] = (tmp[0] * 255) | 0;
      }
      transferMaps.push(transferMap);
      numEffectfulFns++;
    }

    if (!(numFns === 1 || numFns === 4)) {
      return null; // Only 1 or 4 functions are supported, by the specification.
    }
    if (numEffectfulFns === 0) {
      return null; // Only /Identity transfer functions found, which are no-ops.
    }
    return transferMaps;
  }

  handleTilingType(
    fn,
    color,
    resources,
    pattern,
    patternDict,
    operatorList,
    task,
    localTilingPatternCache
  ) {
    // Create an IR of the pattern code.
    const tilingOpList = new OperatorList();
    // Merge the available resources, to prevent issues when the patternDict
    // is missing some /Resources entries (fixes issue6541.pdf).
    const patternResources = Dict.merge({
      xref: this.xref,
      dictArray: [patternDict.get("Resources"), resources],
    });

    return this.getOperatorList({
      stream: pattern,
      task,
      resources: patternResources,
      operatorList: tilingOpList,
    })
      .then(function () {
        const operatorListIR = tilingOpList.getIR();
        const tilingPatternIR = getTilingPatternIR(
          operatorListIR,
          patternDict,
          color
        );
        // Add the dependencies to the parent operator list so they are
        // resolved before the sub operator list is executed synchronously.
        operatorList.addDependencies(tilingOpList.dependencies);
        operatorList.addOp(fn, tilingPatternIR);

        if (patternDict.objId) {
          localTilingPatternCache.set(/* name = */ null, patternDict.objId, {
            operatorListIR,
            dict: patternDict,
          });
        }
      })
      .catch(reason => {
        if (reason instanceof AbortException) {
          return;
        }
        if (this.options.ignoreErrors) {
          // Error(s) in the TilingPattern -- sending unsupported feature
          // notification and allow rendering to continue.
          this.handler.send("UnsupportedFeature", {
            featureId: UNSUPPORTED_FEATURES.errorTilingPattern,
          });
          warn(`handleTilingType - ignoring pattern: "${reason}".`);
          return;
        }
        throw reason;
      });
  }

  handleSetFont(
    resources,
    fontArgs,
    fontRef,
    operatorList,
    task,
    state,
    fallbackFontDict = null,
    cssFontInfo = null
  ) {
    const fontName =
      fontArgs && fontArgs[0] instanceof Name ? fontArgs[0].name : null;

    return this.loadFont(
      fontName,
      fontRef,
      resources,
      fallbackFontDict,
      cssFontInfo
    )
      .then(translated => {
        if (!translated.font.isType3Font) {
          return translated;
        }
        return translated
          .loadType3Data(this, resources, task)
          .then(function () {
            // Add the dependencies to the parent operatorList so they are
            // resolved before Type3 operatorLists are executed synchronously.
            operatorList.addDependencies(translated.type3Dependencies);

            return translated;
          })
          .catch(reason => {
            // Error in the font data -- sending unsupported feature
            // notification.
            this.handler.send("UnsupportedFeature", {
              featureId: UNSUPPORTED_FEATURES.errorFontLoadType3,
            });
            return new TranslatedFont({
              loadedName: "g_font_error",
              font: new ErrorFont(`Type3 font load error: ${reason}`),
              dict: translated.font,
              evaluatorOptions: this.options,
            });
          });
      })
      .then(translated => {
        state.font = translated.font;
        translated.send(this.handler);
        return translated.loadedName;
      });
  }

  handleText(chars, state) {
    const font = state.font;
    const glyphs = font.charsToGlyphs(chars);

    if (font.data) {
      const isAddToPathSet = !!(
        state.textRenderingMode & TextRenderingMode.ADD_TO_PATH_FLAG
      );
      if (
        isAddToPathSet ||
        state.fillColorSpace.name === "Pattern" ||
        font.disableFontFace ||
        this.options.disableFontFace
      ) {
        PartialEvaluator.buildFontPaths(
          font,
          glyphs,
          this.handler,
          this.options
        );
      }
    }
    return glyphs;
  }

  ensureStateFont(state) {
    if (state.font) {
      return;
    }
    const reason = new FormatError(
      "Missing setFont (Tf) operator before text rendering operator."
    );

    if (this.options.ignoreErrors) {
      // Missing setFont operator before text rendering operator -- sending
      // unsupported feature notification and allow rendering to continue.
      this.handler.send("UnsupportedFeature", {
        featureId: UNSUPPORTED_FEATURES.errorFontState,
      });
      warn(`ensureStateFont: "${reason}".`);
      return;
    }
    throw reason;
  }

  async setGState({
    resources,
    gState,
    operatorList,
    cacheKey,
    task,
    stateManager,
    localGStateCache,
    localColorSpaceCache,
  }) {
    const gStateRef = gState.objId;
    let isSimpleGState = true;
    // This array holds the converted/processed state data.
    const gStateObj = [];
    const gStateKeys = gState.getKeys();
    let promise = Promise.resolve();
    for (let i = 0, ii = gStateKeys.length; i < ii; i++) {
      const key = gStateKeys[i];
      const value = gState.get(key);
      switch (key) {
        case "Type":
          break;
        case "LW":
        case "LC":
        case "LJ":
        case "ML":
        case "D":
        case "RI":
        case "FL":
        case "CA":
        case "ca":
          gStateObj.push([key, value]);
          break;
        case "Font":
          isSimpleGState = false;

          promise = promise.then(() => {
            return this.handleSetFont(
              resources,
              null,
              value[0],
              operatorList,
              task,
              stateManager.state
            ).then(function (loadedName) {
              operatorList.addDependency(loadedName);
              gStateObj.push([key, [loadedName, value[1]]]);
            });
          });
          break;
        case "BM":
          gStateObj.push([key, normalizeBlendMode(value)]);
          break;
        case "SMask":
          if (isName(value, "None")) {
            gStateObj.push([key, false]);
            break;
          }
          if (value instanceof Dict) {
            isSimpleGState = false;

            promise = promise.then(() => {
              return this.handleSMask(
                value,
                resources,
                operatorList,
                task,
                stateManager,
                localColorSpaceCache
              );
            });
            gStateObj.push([key, true]);
          } else {
            warn("Unsupported SMask type");
          }
          break;
        case "TR":
          const transferMaps = this.handleTransferFunction(value);
          gStateObj.push([key, transferMaps]);
          break;
        // Only generate info log messages for the following since
        // they are unlikely to have a big impact on the rendering.
        case "OP":
        case "op":
        case "OPM":
        case "BG":
        case "BG2":
        case "UCR":
        case "UCR2":
        case "TR2":
        case "HT":
        case "SM":
        case "SA":
        case "AIS":
        case "TK":
          // TODO implement these operators.
          info("graphic state operator " + key);
          break;
        default:
          info("Unknown graphic state operator " + key);
          break;
      }
    }
    return promise.then(function () {
      if (gStateObj.length > 0) {
        operatorList.addOp(OPS.setGState, [gStateObj]);
      }

      if (isSimpleGState) {
        localGStateCache.set(cacheKey, gStateRef, gStateObj);
      }
    });
  }

  loadFont(
    fontName,
    font,
    resources,
    fallbackFontDict = null,
    cssFontInfo = null
  ) {
    const errorFont = async () => {
      return new TranslatedFont({
        loadedName: "g_font_error",
        font: new ErrorFont(`Font "${fontName}" is not available.`),
        dict: font,
        evaluatorOptions: this.options,
      });
    };

    const xref = this.xref;
    let fontRef;
    if (font) {
      // Loading by ref.
      if (!(font instanceof Ref)) {
        throw new FormatError('The "font" object should be a reference.');
      }
      fontRef = font;
    } else {
      // Loading by name.
      const fontRes = resources.get("Font");
      if (fontRes) {
        fontRef = fontRes.getRaw(fontName);
      }
    }
    if (!fontRef) {
      const partialMsg = `Font "${
        fontName || (font && font.toString())
      }" is not available`;

      if (!this.options.ignoreErrors && !this.parsingType3Font) {
        warn(`${partialMsg}.`);
        return errorFont();
      }
      // Font not found -- sending unsupported feature notification.
      this.handler.send("UnsupportedFeature", {
        featureId: UNSUPPORTED_FEATURES.errorFontMissing,
      });
      warn(`${partialMsg} -- attempting to fallback to a default font.`);

      // Falling back to a default font to avoid completely broken rendering,
      // but note that there're no guarantees that things will look "correct".
      if (fallbackFontDict) {
        fontRef = fallbackFontDict;
      } else {
        fontRef = PartialEvaluator.fallbackFontDict;
      }
    }

    if (this.parsingType3Font && this.type3FontRefs.has(fontRef)) {
      return errorFont();
    }

    if (this.fontCache.has(fontRef)) {
      return this.fontCache.get(fontRef);
    }

    font = xref.fetchIfRef(fontRef);
    if (!(font instanceof Dict)) {
      return errorFont();
    }

    // We are holding `font.cacheKey` references only for `fontRef`s that
    // are not actually `Ref`s, but rather `Dict`s. See explanation below.
    if (font.cacheKey && this.fontCache.has(font.cacheKey)) {
      return this.fontCache.get(font.cacheKey);
    }

    const fontCapability = createPromiseCapability();

    let preEvaluatedFont;
    try {
      preEvaluatedFont = this.preEvaluateFont(font);
      preEvaluatedFont.cssFontInfo = cssFontInfo;
    } catch (reason) {
      warn(`loadFont - preEvaluateFont failed: "${reason}".`);
      return errorFont();
    }
    const { descriptor, hash } = preEvaluatedFont;

    const fontRefIsRef = fontRef instanceof Ref;
    let fontID;
    if (fontRefIsRef) {
      fontID = `f${fontRef.toString()}`;
    }

    if (hash && descriptor instanceof Dict) {
      if (!descriptor.fontAliases) {
        descriptor.fontAliases = Object.create(null);
      }
      const fontAliases = descriptor.fontAliases;

      if (fontAliases[hash]) {
        const aliasFontRef = fontAliases[hash].aliasRef;
        if (fontRefIsRef && aliasFontRef && this.fontCache.has(aliasFontRef)) {
          this.fontCache.putAlias(fontRef, aliasFontRef);
          return this.fontCache.get(fontRef);
        }
      } else {
        fontAliases[hash] = {
          fontID: this.idFactory.createFontId(),
        };
      }

      if (fontRefIsRef) {
        fontAliases[hash].aliasRef = fontRef;
      }
      fontID = fontAliases[hash].fontID;
    }

    // Workaround for bad PDF generators that reference fonts incorrectly,
    // where `fontRef` is a `Dict` rather than a `Ref` (fixes bug946506.pdf).
    // In this case we cannot put the font into `this.fontCache` (which is
    // a `RefSetCache`), since it's not possible to use a `Dict` as a key.
    //
    // However, if we don't cache the font it's not possible to remove it
    // when `cleanup` is triggered from the API, which causes issues on
    // subsequent rendering operations (see issue7403.pdf) and would force us
    // to unnecessarily load the same fonts over and over.
    //
    // Instead, we cheat a bit by using a modified `fontID` as a key in
    // `this.fontCache`, to allow the font to be cached.
    // NOTE: This works because `RefSetCache` calls `toString()` on provided
    //       keys. Also, since `fontRef` is used when getting cached fonts,
    //       we'll not accidentally match fonts cached with the `fontID`.
    if (fontRefIsRef) {
      this.fontCache.put(fontRef, fontCapability.promise);
    } else {
      if (!fontID) {
        fontID = this.idFactory.createFontId();
      }
      font.cacheKey = `cacheKey_${fontID}`;
      this.fontCache.put(font.cacheKey, fontCapability.promise);
    }
    assert(
      fontID && fontID.startsWith("f"),
      'The "fontID" must be (correctly) defined.'
    );

    // Keep track of each font we translated so the caller can
    // load them asynchronously before calling display on a page.
    font.loadedName = `${this.idFactory.getDocId()}_${fontID}`;

    this.translateFont(preEvaluatedFont)
      .then(translatedFont => {
        if (translatedFont.fontType !== undefined) {
          xref.stats.addFontType(translatedFont.fontType);
        }

        fontCapability.resolve(
          new TranslatedFont({
            loadedName: font.loadedName,
            font: translatedFont,
            dict: font,
            evaluatorOptions: this.options,
          })
        );
      })
      .catch(reason => {
        // TODO fontCapability.reject?
        // Error in the font data -- sending unsupported feature notification.
        this.handler.send("UnsupportedFeature", {
          featureId: UNSUPPORTED_FEATURES.errorFontTranslate,
        });
        warn(`loadFont - translateFont failed: "${reason}".`);

        try {
          // error, but it's still nice to have font type reported
          const fontFile3 = descriptor && descriptor.get("FontFile3");
          const subtype = fontFile3 && fontFile3.get("Subtype");
          const fontType = getFontType(
            preEvaluatedFont.type,
            subtype && subtype.name
          );
          if (fontType !== undefined) {
            xref.stats.addFontType(fontType);
          }
        } catch (ex) {}

        fontCapability.resolve(
          new TranslatedFont({
            loadedName: font.loadedName,
            font: new ErrorFont(
              reason instanceof Error ? reason.message : reason
            ),
            dict: font,
            evaluatorOptions: this.options,
          })
        );
      });
    return fontCapability.promise;
  }

  buildPath(operatorList, fn, args, parsingText = false) {
    const lastIndex = operatorList.length - 1;
    if (!args) {
      args = [];
    }
    if (
      lastIndex < 0 ||
      operatorList.fnArray[lastIndex] !== OPS.constructPath
    ) {
      // Handle corrupt PDF documents that contains path operators inside of
      // text objects, which may shift subsequent text, by enclosing the path
      // operator in save/restore operators (fixes issue10542_reduced.pdf).
      //
      // Note that this will effectively disable the optimization in the
      // `else` branch below, but given that this type of corruption is
      // *extremely* rare that shouldn't really matter much in practice.
      if (parsingText) {
        warn(`Encountered path operator "${fn}" inside of a text object.`);
        operatorList.addOp(OPS.save, null);
      }

      operatorList.addOp(OPS.constructPath, [[fn], args]);

      if (parsingText) {
        operatorList.addOp(OPS.restore, null);
      }
    } else {
      const opArgs = operatorList.argsArray[lastIndex];
      opArgs[0].push(fn);
      Array.prototype.push.apply(opArgs[1], args);
    }
  }

  parseColorSpace({ cs, resources, localColorSpaceCache }) {
    return ColorSpace.parseAsync({
      cs,
      xref: this.xref,
      resources,
      pdfFunctionFactory: this._pdfFunctionFactory,
      localColorSpaceCache,
    }).catch(reason => {
      if (reason instanceof AbortException) {
        return null;
      }
      if (this.options.ignoreErrors) {
        // Error(s) in the ColorSpace -- sending unsupported feature
        // notification and allow rendering to continue.
        this.handler.send("UnsupportedFeature", {
          featureId: UNSUPPORTED_FEATURES.errorColorSpace,
        });
        warn(`parseColorSpace - ignoring ColorSpace: "${reason}".`);
        return null;
      }
      throw reason;
    });
  }

  parseShading({
    shading,
    resources,
    localColorSpaceCache,
    localShadingPatternCache,
  }) {
    // Shadings and patterns may be referenced by the same name but the resource
    // dictionary could be different so we can't use the name for the cache key.
    let id = localShadingPatternCache.get(shading);
    if (!id) {
      var shadingFill = Pattern.parseShading(
        shading,
        this.xref,
        resources,
        this.handler,
        this._pdfFunctionFactory,
        localColorSpaceCache
      );
      const patternIR = shadingFill.getIR();
      id = `pattern_${this.idFactory.createObjId()}`;
      localShadingPatternCache.set(shading, id);
      this.handler.send("obj", [id, this.pageIndex, "Pattern", patternIR]);
    }
    return id;
  }

  handleColorN(
    operatorList,
    fn,
    args,
    cs,
    patterns,
    resources,
    task,
    localColorSpaceCache,
    localTilingPatternCache,
    localShadingPatternCache
  ) {
    // compile tiling patterns
    const patternName = args.pop();
    // SCN/scn applies patterns along with normal colors
    if (patternName instanceof Name) {
      const rawPattern = patterns.getRaw(patternName.name);

      const localTilingPattern =
        rawPattern instanceof Ref &&
        localTilingPatternCache.getByRef(rawPattern);
      if (localTilingPattern) {
        try {
          const color = cs.base ? cs.base.getRgb(args, 0) : null;
          const tilingPatternIR = getTilingPatternIR(
            localTilingPattern.operatorListIR,
            localTilingPattern.dict,
            color
          );
          operatorList.addOp(fn, tilingPatternIR);
          return undefined;
        } catch (ex) {
          // Handle any errors during normal TilingPattern parsing.
        }
      }

      const pattern = this.xref.fetchIfRef(rawPattern);
      if (pattern) {
        const dict = pattern instanceof BaseStream ? pattern.dict : pattern;
        const typeNum = dict.get("PatternType");

        if (typeNum === PatternType.TILING) {
          const color = cs.base ? cs.base.getRgb(args, 0) : null;
          return this.handleTilingType(
            fn,
            color,
            resources,
            pattern,
            dict,
            operatorList,
            task,
            localTilingPatternCache
          );
        } else if (typeNum === PatternType.SHADING) {
          const shading = dict.get("Shading");
          const matrix = dict.getArray("Matrix");
          const objId = this.parseShading({
            shading,
            resources,
            localColorSpaceCache,
            localShadingPatternCache,
          });
          operatorList.addOp(fn, ["Shading", objId, matrix]);
          return undefined;
        }
        throw new FormatError(`Unknown PatternType: ${typeNum}`);
      }
    }
    throw new FormatError(`Unknown PatternName: ${patternName}`);
  }

  _parseVisibilityExpression(array, nestingCounter, currentResult) {
    const MAX_NESTING = 10;
    if (++nestingCounter > MAX_NESTING) {
      warn("Visibility expression is too deeply nested");
      return;
    }
    const length = array.length;
    const operator = this.xref.fetchIfRef(array[0]);
    if (length < 2 || !(operator instanceof Name)) {
      warn("Invalid visibility expression");
      return;
    }
    switch (operator.name) {
      case "And":
      case "Or":
      case "Not":
        currentResult.push(operator.name);
        break;
      default:
        warn(`Invalid operator ${operator.name} in visibility expression`);
        return;
    }
    for (let i = 1; i < length; i++) {
      const raw = array[i];
      const object = this.xref.fetchIfRef(raw);
      if (Array.isArray(object)) {
        const nestedResult = [];
        currentResult.push(nestedResult);
        // Recursively parse a subarray.
        this._parseVisibilityExpression(object, nestingCounter, nestedResult);
      } else if (raw instanceof Ref) {
        // Reference to an OCG dictionary.
        currentResult.push(raw.toString());
      }
    }
  }

  async parseMarkedContentProps(contentProperties, resources) {
    let optionalContent;
    if (contentProperties instanceof Name) {
      const properties = resources.get("Properties");
      optionalContent = properties.get(contentProperties.name);
    } else if (contentProperties instanceof Dict) {
      optionalContent = contentProperties;
    } else {
      throw new FormatError("Optional content properties malformed.");
    }

    const optionalContentType = optionalContent.get("Type").name;
    if (optionalContentType === "OCG") {
      return {
        type: optionalContentType,
        id: optionalContent.objId,
      };
    } else if (optionalContentType === "OCMD") {
      const expression = optionalContent.get("VE");
      if (Array.isArray(expression)) {
        const result = [];
        this._parseVisibilityExpression(expression, 0, result);
        if (result.length > 0) {
          return {
            type: "OCMD",
            expression: result,
          };
        }
      }

      const optionalContentGroups = optionalContent.get("OCGs");
      if (
        Array.isArray(optionalContentGroups) ||
        optionalContentGroups instanceof Dict
      ) {
        const groupIds = [];
        if (Array.isArray(optionalContentGroups)) {
          for (const ocg of optionalContentGroups) {
            groupIds.push(ocg.toString());
          }
        } else {
          // Dictionary, just use the obj id.
          groupIds.push(optionalContentGroups.objId);
        }

        return {
          type: optionalContentType,
          ids: groupIds,
          policy:
            optionalContent.get("P") instanceof Name
              ? optionalContent.get("P").name
              : null,
          expression: null,
        };
      } else if (optionalContentGroups instanceof Ref) {
        return {
          type: optionalContentType,
          id: optionalContentGroups.toString(),
        };
      }
    }
    return null;
  }

  getOperatorList({
    stream,
    task,
    resources,
    operatorList,
    initialState = null,
    fallbackFontDict = null,
  }) {
    // Ensure that `resources`/`initialState` is correctly initialized,
    // even if the provided parameter is e.g. `null`.
    resources = resources || Dict.empty;
    initialState = initialState || new EvalState();

    if (!operatorList) {
      throw new Error('getOperatorList: missing "operatorList" parameter');
    }

    const self = this;
    const xref = this.xref;
    let parsingText = false;
    const localImageCache = new LocalImageCache();
    const localColorSpaceCache = new LocalColorSpaceCache();
    const localGStateCache = new LocalGStateCache();
    const localTilingPatternCache = new LocalTilingPatternCache();
    const localShadingPatternCache = new Map();

    const xobjs = resources.get("XObject") || Dict.empty;
    const patterns = resources.get("Pattern") || Dict.empty;
    const stateManager = new StateManager(initialState);
    const preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
    const timeSlotManager = new TimeSlotManager();

    function closePendingRestoreOPS(argument) {
      for (let i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) {
        operatorList.addOp(OPS.restore, []);
      }
    }

    return new Promise(function promiseBody(resolve, reject) {
      const next = function (promise) {
        Promise.all([promise, operatorList.ready]).then(function () {
          try {
            promiseBody(resolve, reject);
          } catch (ex) {
            reject(ex);
          }
        }, reject);
      };
      task.ensureNotTerminated();
      timeSlotManager.reset();

      const operation = {};
      let stop, i, ii, cs, name, isValidName;
      while (!(stop = timeSlotManager.check())) {
        // The arguments parsed by read() are used beyond this loop, so we
        // cannot reuse the same array on each iteration. Therefore we pass
        // in |null| as the initial value (see the comment on
        // EvaluatorPreprocessor_read() for why).
        operation.args = null;
        if (!preprocessor.read(operation)) {
          break;
        }
        let args = operation.args;
        let fn = operation.fn;

        switch (fn | 0) {
          case OPS.paintXObject:
            // eagerly compile XForm objects
            isValidName = args[0] instanceof Name;
            name = args[0].name;

            if (isValidName) {
              const localImage = localImageCache.getByName(name);
              if (localImage) {
                operatorList.addOp(localImage.fn, localImage.args);
                args = null;
                continue;
              }
            }

            next(
              new Promise(function (resolveXObject, rejectXObject) {
                if (!isValidName) {
                  throw new FormatError("XObject must be referred to by name.");
                }

                let xobj = xobjs.getRaw(name);
                if (xobj instanceof Ref) {
                  const localImage = localImageCache.getByRef(xobj);
                  if (localImage) {
                    operatorList.addOp(localImage.fn, localImage.args);

                    resolveXObject();
                    return;
                  }

                  const globalImage = self.globalImageCache.getData(
                    xobj,
                    self.pageIndex
                  );
                  if (globalImage) {
                    operatorList.addDependency(globalImage.objId);
                    operatorList.addOp(globalImage.fn, globalImage.args);

                    resolveXObject();
                    return;
                  }

                  xobj = xref.fetch(xobj);
                }

                if (!(xobj instanceof BaseStream)) {
                  throw new FormatError("XObject should be a stream");
                }

                const type = xobj.dict.get("Subtype");
                if (!(type instanceof Name)) {
                  throw new FormatError("XObject should have a Name subtype");
                }

                if (type.name === "Form") {
                  stateManager.save();
                  self
                    .buildFormXObject(
                      resources,
                      xobj,
                      null,
                      operatorList,
                      task,
                      stateManager.state.clone(),
                      localColorSpaceCache
                    )
                    .then(function () {
                      stateManager.restore();
                      resolveXObject();
                    }, rejectXObject);
                  return;
                } else if (type.name === "Image") {
                  self
                    .buildPaintImageXObject({
                      resources,
                      image: xobj,
                      operatorList,
                      cacheKey: name,
                      localImageCache,
                      localColorSpaceCache,
                    })
                    .then(resolveXObject, rejectXObject);
                  return;
                } else if (type.name === "PS") {
                  // PostScript XObjects are unused when viewing documents.
                  // See section 4.7.1 of Adobe's PDF reference.
                  info("Ignored XObject subtype PS");
                } else {
                  throw new FormatError(
                    `Unhandled XObject subtype ${type.name}`
                  );
                }
                resolveXObject();
              }).catch(function (reason) {
                if (reason instanceof AbortException) {
                  return;
                }
                if (self.options.ignoreErrors) {
                  // Error(s) in the XObject -- sending unsupported feature
                  // notification and allow rendering to continue.
                  self.handler.send("UnsupportedFeature", {
                    featureId: UNSUPPORTED_FEATURES.errorXObject,
                  });
                  warn(`getOperatorList - ignoring XObject: "${reason}".`);
                  return;
                }
                throw reason;
              })
            );
            return;
          case OPS.setFont:
            var fontSize = args[1];
            // eagerly collect all fonts
            next(
              self
                .handleSetFont(
                  resources,
                  args,
                  null,
                  operatorList,
                  task,
                  stateManager.state,
                  fallbackFontDict
                )
                .then(function (loadedName) {
                  operatorList.addDependency(loadedName);
                  operatorList.addOp(OPS.setFont, [loadedName, fontSize]);
                })
            );
            return;
          case OPS.beginText:
            parsingText = true;
            break;
          case OPS.endText:
            parsingText = false;
            break;
          case OPS.endInlineImage:
            var cacheKey = args[0].cacheKey;
            if (cacheKey) {
              const localImage = localImageCache.getByName(cacheKey);
              if (localImage) {
                operatorList.addOp(localImage.fn, localImage.args);
                args = null;
                continue;
              }
            }
            next(
              self.buildPaintImageXObject({
                resources,
                image: args[0],
                isInline: true,
                operatorList,
                cacheKey,
                localImageCache,
                localColorSpaceCache,
              })
            );
            return;
          case OPS.showText:
            if (!stateManager.state.font) {
              self.ensureStateFont(stateManager.state);
              continue;
            }
            args[0] = self.handleText(args[0], stateManager.state);
            break;
          case OPS.showSpacedText:
            if (!stateManager.state.font) {
              self.ensureStateFont(stateManager.state);
              continue;
            }
            var arr = args[0];
            var combinedGlyphs = [];
            var arrLength = arr.length;
            var state = stateManager.state;
            for (i = 0; i < arrLength; ++i) {
              const arrItem = arr[i];
              if (typeof arrItem === "string") {
                Array.prototype.push.apply(
                  combinedGlyphs,
                  self.handleText(arrItem, state)
                );
              } else if (typeof arrItem === "number") {
                combinedGlyphs.push(arrItem);
              }
            }
            args[0] = combinedGlyphs;
            fn = OPS.showText;
            break;
          case OPS.nextLineShowText:
            if (!stateManager.state.font) {
              self.ensureStateFont(stateManager.state);
              continue;
            }
            operatorList.addOp(OPS.nextLine);
            args[0] = self.handleText(args[0], stateManager.state);
            fn = OPS.showText;
            break;
          case OPS.nextLineSetSpacingShowText:
            if (!stateManager.state.font) {
              self.ensureStateFont(stateManager.state);
              continue;
            }
            operatorList.addOp(OPS.nextLine);
            operatorList.addOp(OPS.setWordSpacing, [args.shift()]);
            operatorList.addOp(OPS.setCharSpacing, [args.shift()]);
            args[0] = self.handleText(args[0], stateManager.state);
            fn = OPS.showText;
            break;
          case OPS.setTextRenderingMode:
            stateManager.state.textRenderingMode = args[0];
            break;

          case OPS.setFillColorSpace: {
            const cachedColorSpace = ColorSpace.getCached(
              args[0],
              xref,
              localColorSpaceCache
            );
            if (cachedColorSpace) {
              stateManager.state.fillColorSpace = cachedColorSpace;
              continue;
            }

            next(
              self
                .parseColorSpace({
                  cs: args[0],
                  resources,
                  localColorSpaceCache,
                })
                .then(function (colorSpace) {
                  if (colorSpace) {
                    stateManager.state.fillColorSpace = colorSpace;
                  }
                })
            );
            return;
          }
          case OPS.setStrokeColorSpace: {
            const cachedColorSpace = ColorSpace.getCached(
              args[0],
              xref,
              localColorSpaceCache
            );
            if (cachedColorSpace) {
              stateManager.state.strokeColorSpace = cachedColorSpace;
              continue;
            }

            next(
              self
                .parseColorSpace({
                  cs: args[0],
                  resources,
                  localColorSpaceCache,
                })
                .then(function (colorSpace) {
                  if (colorSpace) {
                    stateManager.state.strokeColorSpace = colorSpace;
                  }
                })
            );
            return;
          }
          case OPS.setFillColor:
            cs = stateManager.state.fillColorSpace;
            args = cs.getRgb(args, 0);
            fn = OPS.setFillRGBColor;
            break;
          case OPS.setStrokeColor:
            cs = stateManager.state.strokeColorSpace;
            args = cs.getRgb(args, 0);
            fn = OPS.setStrokeRGBColor;
            break;
          case OPS.setFillGray:
            stateManager.state.fillColorSpace = ColorSpace.singletons.gray;
            args = ColorSpace.singletons.gray.getRgb(args, 0);
            fn = OPS.setFillRGBColor;
            break;
          case OPS.setStrokeGray:
            stateManager.state.strokeColorSpace = ColorSpace.singletons.gray;
            args = ColorSpace.singletons.gray.getRgb(args, 0);
            fn = OPS.setStrokeRGBColor;
            break;
          case OPS.setFillCMYKColor:
            stateManager.state.fillColorSpace = ColorSpace.singletons.cmyk;
            args = ColorSpace.singletons.cmyk.getRgb(args, 0);
            fn = OPS.setFillRGBColor;
            break;
          case OPS.setStrokeCMYKColor:
            stateManager.state.strokeColorSpace = ColorSpace.singletons.cmyk;
            args = ColorSpace.singletons.cmyk.getRgb(args, 0);
            fn = OPS.setStrokeRGBColor;
            break;
          case OPS.setFillRGBColor:
            stateManager.state.fillColorSpace = ColorSpace.singletons.rgb;
            args = ColorSpace.singletons.rgb.getRgb(args, 0);
            break;
          case OPS.setStrokeRGBColor:
            stateManager.state.strokeColorSpace = ColorSpace.singletons.rgb;
            args = ColorSpace.singletons.rgb.getRgb(args, 0);
            break;
          case OPS.setFillColorN:
            cs = stateManager.state.fillColorSpace;
            if (cs.name === "Pattern") {
              next(
                self.handleColorN(
                  operatorList,
                  OPS.setFillColorN,
                  args,
                  cs,
                  patterns,
                  resources,
                  task,
                  localColorSpaceCache,
                  localTilingPatternCache,
                  localShadingPatternCache
                )
              );
              return;
            }
            args = cs.getRgb(args, 0);
            fn = OPS.setFillRGBColor;
            break;
          case OPS.setStrokeColorN:
            cs = stateManager.state.strokeColorSpace;
            if (cs.name === "Pattern") {
              next(
                self.handleColorN(
                  operatorList,
                  OPS.setStrokeColorN,
                  args,
                  cs,
                  patterns,
                  resources,
                  task,
                  localColorSpaceCache,
                  localTilingPatternCache,
                  localShadingPatternCache
                )
              );
              return;
            }
            args = cs.getRgb(args, 0);
            fn = OPS.setStrokeRGBColor;
            break;

          case OPS.shadingFill:
            var shadingRes = resources.get("Shading");
            if (!shadingRes) {
              throw new FormatError("No shading resource found");
            }

            var shading = shadingRes.get(args[0].name);
            if (!shading) {
              throw new FormatError("No shading object found");
            }
            const patternId = self.parseShading({
              shading,
              resources,
              localColorSpaceCache,
              localShadingPatternCache,
            });
            args = [patternId];
            fn = OPS.shadingFill;
            break;
          case OPS.setGState:
            isValidName = args[0] instanceof Name;
            name = args[0].name;

            if (isValidName) {
              const localGStateObj = localGStateCache.getByName(name);
              if (localGStateObj) {
                if (localGStateObj.length > 0) {
                  operatorList.addOp(OPS.setGState, [localGStateObj]);
                }
                args = null;
                continue;
              }
            }

            next(
              new Promise(function (resolveGState, rejectGState) {
                if (!isValidName) {
                  throw new FormatError("GState must be referred to by name.");
                }

                const extGState = resources.get("ExtGState");
                if (!(extGState instanceof Dict)) {
                  throw new FormatError("ExtGState should be a dictionary.");
                }

                const gState = extGState.get(name);
                // TODO: Attempt to lookup cached GStates by reference as well,
                //       if and only if there are PDF documents where doing so
                //       would significantly improve performance.
                if (!(gState instanceof Dict)) {
                  throw new FormatError("GState should be a dictionary.");
                }

                self
                  .setGState({
                    resources,
                    gState,
                    operatorList,
                    cacheKey: name,
                    task,
                    stateManager,
                    localGStateCache,
                    localColorSpaceCache,
                  })
                  .then(resolveGState, rejectGState);
              }).catch(function (reason) {
                if (reason instanceof AbortException) {
                  return;
                }
                if (self.options.ignoreErrors) {
                  // Error(s) in the ExtGState -- sending unsupported feature
                  // notification and allow parsing/rendering to continue.
                  self.handler.send("UnsupportedFeature", {
                    featureId: UNSUPPORTED_FEATURES.errorExtGState,
                  });
                  warn(`getOperatorList - ignoring ExtGState: "${reason}".`);
                  return;
                }
                throw reason;
              })
            );
            return;
          case OPS.moveTo:
          case OPS.lineTo:
          case OPS.curveTo:
          case OPS.curveTo2:
          case OPS.curveTo3:
          case OPS.closePath:
          case OPS.rectangle:
            self.buildPath(operatorList, fn, args, parsingText);
            continue;
          case OPS.markPoint:
          case OPS.markPointProps:
          case OPS.beginCompat:
          case OPS.endCompat:
            // Ignore operators where the corresponding handlers are known to
            // be no-op in CanvasGraphics (display/canvas.js). This prevents
            // serialization errors and is also a bit more efficient.
            // We could also try to serialize all objects in a general way,
            // e.g. as done in https://github.com/mozilla/pdf.js/pull/6266,
            // but doing so is meaningless without knowing the semantics.
            continue;
          case OPS.beginMarkedContentProps:
            if (!(args[0] instanceof Name)) {
              warn(`Expected name for beginMarkedContentProps arg0=${args[0]}`);
              continue;
            }
            if (args[0].name === "OC") {
              next(
                self
                  .parseMarkedContentProps(args[1], resources)
                  .then(data => {
                    operatorList.addOp(OPS.beginMarkedContentProps, [
                      "OC",
                      data,
                    ]);
                  })
                  .catch(reason => {
                    if (reason instanceof AbortException) {
                      return;
                    }
                    if (self.options.ignoreErrors) {
                      self.handler.send("UnsupportedFeature", {
                        featureId: UNSUPPORTED_FEATURES.errorMarkedContent,
                      });
                      warn(
                        `getOperatorList - ignoring beginMarkedContentProps: "${reason}".`
                      );
                      return;
                    }
                    throw reason;
                  })
              );
              return;
            }
            // Other marked content types aren't supported yet.
            args = [
              args[0].name,
              args[1] instanceof Dict ? args[1].get("MCID") : null,
            ];

            break;
          case OPS.beginMarkedContent:
          case OPS.endMarkedContent:
          default:
            // Note: Ignore the operator if it has `Dict` arguments, since
            // those are non-serializable, otherwise postMessage will throw
            // "An object could not be cloned.".
            if (args !== null) {
              for (i = 0, ii = args.length; i < ii; i++) {
                if (args[i] instanceof Dict) {
                  break;
                }
              }
              if (i < ii) {
                warn("getOperatorList - ignoring operator: " + fn);
                continue;
              }
            }
        }
        operatorList.addOp(fn, args);
      }
      if (stop) {
        next(deferred);
        return;
      }
      // Some PDFs don't close all restores inside object/form.
      // Closing those for them.
      closePendingRestoreOPS();
      resolve();
    }).catch(reason => {
      if (reason instanceof AbortException) {
        return;
      }
      if (this.options.ignoreErrors) {
        // Error(s) in the OperatorList -- sending unsupported feature
        // notification and allow rendering to continue.
        this.handler.send("UnsupportedFeature", {
          featureId: UNSUPPORTED_FEATURES.errorOperatorList,
        });
        warn(
          `getOperatorList - ignoring errors during "${task.name}" ` +
            `task: "${reason}".`
        );

        closePendingRestoreOPS();
        return;
      }
      throw reason;
    });
  }

  getTextContent({
    stream,
    task,
    resources,
    stateManager = null,
    combineTextItems = false,
    includeMarkedContent = false,
    sink,
    seenStyles = new Set(),
    viewBox,
  }) {
    // Ensure that `resources`/`stateManager` is correctly initialized,
    // even if the provided parameter is e.g. `null`.
    resources = resources || Dict.empty;
    stateManager = stateManager || new StateManager(new TextState());

    const NormalizedUnicodes = getNormalizedUnicodes();

    const textContent = {
      items: [],
      styles: Object.create(null),
    };
    const textContentItem = {
      initialized: false,
      str: [],
      totalWidth: 0,
      totalHeight: 0,
      width: 0,
      height: 0,
      vertical: false,
      prevTransform: null,
      textAdvanceScale: 0,
      spaceInFlowMin: 0,
      spaceInFlowMax: 0,
      trackingSpaceMin: Infinity,
      negativeSpaceMax: -Infinity,
      transform: null,
      fontName: null,
      hasEOL: false,
    };

    // Used in addFakeSpaces.

    // A white <= fontSize * TRACKING_SPACE_FACTOR is a tracking space
    // so it doesn't count as a space.
    const TRACKING_SPACE_FACTOR = 0.1;

    // A negative white < fontSize * NEGATIVE_SPACE_FACTOR induces
    // a break (a new chunk of text is created).
    // It doesn't change anything when the text is copied but
    // it improves potential mismatch between text layer and canvas.
    const NEGATIVE_SPACE_FACTOR = -0.2;

    // A white with a width in [fontSize * MIN_FACTOR; fontSize * MAX_FACTOR]
    // is a space which will be inserted in the current flow of words.
    // If the width is outside of this range then the flow is broken
    // (which means a new span in the text layer).
    // It's useful to adjust the best as possible the span in the layer
    // to what is displayed in the canvas.
    const SPACE_IN_FLOW_MIN_FACTOR = 0.1;
    const SPACE_IN_FLOW_MAX_FACTOR = 0.6;

    const self = this;
    const xref = this.xref;
    const showSpacedTextBuffer = [];

    // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
    let xobjs = null;
    const emptyXObjectCache = new LocalImageCache();
    const emptyGStateCache = new LocalGStateCache();

    const preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);

    let textState;

    function getCurrentTextTransform() {
      // 9.4.4 Text Space Details
      const font = textState.font;
      const tsm = [
        textState.fontSize * textState.textHScale,
        0,
        0,
        textState.fontSize,
        0,
        textState.textRise,
      ];

      if (
        font.isType3Font &&
        (textState.fontSize <= 1 || font.isCharBBox) &&
        !isArrayEqual(textState.fontMatrix, FONT_IDENTITY_MATRIX)
      ) {
        const glyphHeight = font.bbox[3] - font.bbox[1];
        if (glyphHeight > 0) {
          tsm[3] *= glyphHeight * textState.fontMatrix[3];
        }
      }

      return Util.transform(
        textState.ctm,
        Util.transform(textState.textMatrix, tsm)
      );
    }

    function ensureTextContentItem() {
      if (textContentItem.initialized) {
        return textContentItem;
      }
      const font = textState.font,
        loadedName = font.loadedName;
      if (!seenStyles.has(loadedName)) {
        seenStyles.add(loadedName);

        textContent.styles[loadedName] = {
          fontFamily: font.fallbackName,
          ascent: font.ascent,
          descent: font.descent,
          vertical: font.vertical,
        };
      }
      textContentItem.fontName = loadedName;

      const trm = (textContentItem.transform = getCurrentTextTransform());
      if (!font.vertical) {
        textContentItem.width = textContentItem.totalWidth = 0;
        textContentItem.height = textContentItem.totalHeight = Math.hypot(
          trm[2],
          trm[3]
        );
        textContentItem.vertical = false;
      } else {
        textContentItem.width = textContentItem.totalWidth = Math.hypot(
          trm[0],
          trm[1]
        );
        textContentItem.height = textContentItem.totalHeight = 0;
        textContentItem.vertical = true;
      }

      const scaleLineX = Math.hypot(
        textState.textLineMatrix[0],
        textState.textLineMatrix[1]
      );
      const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
      textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;

      textContentItem.trackingSpaceMin =
        textState.fontSize * TRACKING_SPACE_FACTOR;
      textContentItem.negativeSpaceMax =
        textState.fontSize * NEGATIVE_SPACE_FACTOR;
      textContentItem.spaceInFlowMin =
        textState.fontSize * SPACE_IN_FLOW_MIN_FACTOR;
      textContentItem.spaceInFlowMax =
        textState.fontSize * SPACE_IN_FLOW_MAX_FACTOR;

      textContentItem.hasEOL = false;

      textContentItem.initialized = true;
      return textContentItem;
    }

    function updateAdvanceScale() {
      if (!textContentItem.initialized) {
        return;
      }

      const scaleLineX = Math.hypot(
        textState.textLineMatrix[0],
        textState.textLineMatrix[1]
      );
      const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
      const scaleFactor = scaleCtmX * scaleLineX;
      if (scaleFactor === textContentItem.textAdvanceScale) {
        return;
      }

      if (!textContentItem.vertical) {
        textContentItem.totalWidth +=
          textContentItem.width * textContentItem.textAdvanceScale;
        textContentItem.width = 0;
      } else {
        textContentItem.totalHeight +=
          textContentItem.height * textContentItem.textAdvanceScale;
        textContentItem.height = 0;
      }

      textContentItem.textAdvanceScale = scaleFactor;
    }

    function runBidiTransform(textChunk) {
      const text = textChunk.str.join("");
      const bidiResult = bidi(text, -1, textChunk.vertical);
      return {
        str: bidiResult.str,
        dir: bidiResult.dir,
        width: Math.abs(textChunk.totalWidth),
        height: Math.abs(textChunk.totalHeight),
        transform: textChunk.transform,
        fontName: textChunk.fontName,
        hasEOL: textChunk.hasEOL,
      };
    }

    function handleSetFont(fontName, fontRef) {
      return self
        .loadFont(fontName, fontRef, resources)
        .then(function (translated) {
          if (!translated.font.isType3Font) {
            return translated;
          }
          return translated
            .loadType3Data(self, resources, task)
            .catch(function () {
              // Ignore Type3-parsing errors, since we only use `loadType3Data`
              // here to ensure that we'll always obtain a useful /FontBBox.
            })
            .then(function () {
              return translated;
            });
        })
        .then(function (translated) {
          textState.font = translated.font;
          textState.fontMatrix =
            translated.font.fontMatrix || FONT_IDENTITY_MATRIX;
        });
    }

    function applyInverseRotation(x, y, matrix) {
      const scale = Math.hypot(matrix[0], matrix[1]);
      return [
        (matrix[0] * x + matrix[1] * y) / scale,
        (matrix[2] * x + matrix[3] * y) / scale,
      ];
    }

    function compareWithLastPosition() {
      const currentTransform = getCurrentTextTransform();
      let posX = currentTransform[4];
      let posY = currentTransform[5];

      const shiftedX = posX - viewBox[0];
      const shiftedY = posY - viewBox[1];

      if (
        shiftedX < 0 ||
        shiftedX > viewBox[2] ||
        shiftedY < 0 ||
        shiftedY > viewBox[3]
      ) {
        return false;
      }

      if (
        !combineTextItems ||
        !textState.font ||
        !textContentItem.prevTransform
      ) {
        return true;
      }

      let lastPosX = textContentItem.prevTransform[4];
      let lastPosY = textContentItem.prevTransform[5];

      if (lastPosX === posX && lastPosY === posY) {
        return true;
      }

      let rotate = -1;
      // Take into account the rotation is the current transform.
      if (
        currentTransform[0] &&
        currentTransform[1] === 0 &&
        currentTransform[2] === 0
      ) {
        rotate = currentTransform[0] > 0 ? 0 : 180;
      } else if (
        currentTransform[1] &&
        currentTransform[0] === 0 &&
        currentTransform[3] === 0
      ) {
        rotate = currentTransform[1] > 0 ? 90 : 270;
      }

      switch (rotate) {
        case 0:
          break;
        case 90:
          [posX, posY] = [posY, posX];
          [lastPosX, lastPosY] = [lastPosY, lastPosX];
          break;
        case 180:
          [posX, posY, lastPosX, lastPosY] = [
            -posX,
            -posY,
            -lastPosX,
            -lastPosY,
          ];
          break;
        case 270:
          [posX, posY] = [-posY, -posX];
          [lastPosX, lastPosY] = [-lastPosY, -lastPosX];
          break;
        default:
          // This is not a 0, 90, 180, 270 rotation so:
          //  - remove the scale factor from the matrix to get a rotation matrix
          //  - apply the inverse (which is the transposed) to the positions
          // and we can then compare positions of the glyphes to detect
          // a whitespace.
          [posX, posY] = applyInverseRotation(posX, posY, currentTransform);
          [lastPosX, lastPosY] = applyInverseRotation(
            lastPosX,
            lastPosY,
            textContentItem.prevTransform
          );
      }

      if (textState.font.vertical) {
        const advanceY = (lastPosY - posY) / textContentItem.textAdvanceScale;
        const advanceX = posX - lastPosX;

        // When the total height of the current chunk is negative
        // then we're writing from bottom to top.
        const textOrientation = Math.sign(textContentItem.height);
        if (advanceY < textOrientation * textContentItem.negativeSpaceMax) {
          if (
            Math.abs(advanceX) >
            0.5 * textContentItem.width /* not the same column */
          ) {
            appendEOL();
            return true;
          }

          flushTextContentItem();
          return true;
        }

        if (Math.abs(advanceX) > textContentItem.width) {
          appendEOL();
          return true;
        }
        if (advanceY <= textOrientation * textContentItem.trackingSpaceMin) {
          textContentItem.height += advanceY;
        } else if (
          !addFakeSpaces(
            advanceY,
            textContentItem.prevTransform,
            textOrientation
          )
        ) {
          if (textContentItem.str.length === 0) {
            textContent.items.push({
              str: " ",
              dir: "ltr",
              width: 0,
              height: Math.abs(advanceY),
              transform: textContentItem.prevTransform,
              fontName: textContentItem.fontName,
              hasEOL: false,
            });
          } else {
            textContentItem.height += advanceY;
          }
        }

        return true;
      }

      const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
      const advanceY = posY - lastPosY;

      // When the total width of the current chunk is negative
      // then we're writing from right to left.
      const textOrientation = Math.sign(textContentItem.width);
      if (advanceX < textOrientation * textContentItem.negativeSpaceMax) {
        if (
          Math.abs(advanceY) >
          0.5 * textContentItem.height /* not the same line */
        ) {
          appendEOL();
          return true;
        }
        flushTextContentItem();
        return true;
      }

      if (Math.abs(advanceY) > textContentItem.height) {
        appendEOL();
        return true;
      }

      if (advanceX <= textOrientation * textContentItem.trackingSpaceMin) {
        textContentItem.width += advanceX;
      } else if (
        !addFakeSpaces(advanceX, textContentItem.prevTransform, textOrientation)
      ) {
        if (textContentItem.str.length === 0) {
          textContent.items.push({
            str: " ",
            dir: "ltr",
            width: Math.abs(advanceX),
            height: 0,
            transform: textContentItem.prevTransform,
            fontName: textContentItem.fontName,
            hasEOL: false,
          });
        } else {
          textContentItem.width += advanceX;
        }
      }

      return true;
    }

    function buildTextContentItem({ chars, extraSpacing }) {
      const font = textState.font;
      if (!chars) {
        // Just move according to the space we have.
        const charSpacing = textState.charSpacing + extraSpacing;
        if (charSpacing) {
          if (!font.vertical) {
            textState.translateTextMatrix(
              charSpacing * textState.textHScale,
              0
            );
          } else {
            textState.translateTextMatrix(0, -charSpacing);
          }
        }

        return;
      }

      const glyphs = font.charsToGlyphs(chars);
      const scale = textState.fontMatrix[0] * textState.fontSize;

      for (let i = 0, ii = glyphs.length; i < ii; i++) {
        const glyph = glyphs[i];
        if (glyph.isInvisibleFormatMark) {
          continue;
        }
        let charSpacing =
          textState.charSpacing + (i + 1 === ii ? extraSpacing : 0);

        let glyphWidth = glyph.width;
        if (font.vertical) {
          glyphWidth = glyph.vmetric ? glyph.vmetric[0] : -glyphWidth;
        }
        let scaledDim = glyphWidth * scale;

        if (
          glyph.isWhitespace &&
          (i === 0 ||
            i + 1 === ii ||
            glyphs[i - 1].isWhitespace ||
            glyphs[i + 1].isWhitespace ||
            extraSpacing)
        ) {
          // Don't push a " " in the textContentItem
          // (except when it's between two non-spaces chars),
          // it will be done (if required) in next call to
          // compareWithLastPosition.
          // This way we can merge real spaces and spaces due to cursor moves.
          if (!font.vertical) {
            charSpacing += scaledDim + textState.wordSpacing;
            textState.translateTextMatrix(
              charSpacing * textState.textHScale,
              0
            );
          } else {
            charSpacing += -scaledDim + textState.wordSpacing;
            textState.translateTextMatrix(0, -charSpacing);
          }
          continue;
        }

        if (!compareWithLastPosition()) {
          // The glyph is not in page so just skip it.
          continue;
        }

        // Must be called after compareWithLastPosition because
        // the textContentItem could have been flushed.
        const textChunk = ensureTextContentItem();
        if (glyph.isZeroWidthDiacritic) {
          scaledDim = 0;
        }

        if (!font.vertical) {
          scaledDim *= textState.textHScale;
          textState.translateTextMatrix(scaledDim, 0);
          textChunk.width += scaledDim;
        } else {
          textState.translateTextMatrix(0, scaledDim);
          scaledDim = Math.abs(scaledDim);
          textChunk.height += scaledDim;
        }

        if (scaledDim) {
          // Save the position of the last visible character.
          textChunk.prevTransform = getCurrentTextTransform();
        }

        if (glyph.isWhitespace) {
          // Replaces all whitespaces with standard spaces (0x20), to avoid
          // alignment issues between the textLayer and the canvas if the text
          // contains e.g. tabs (fixes issue6612.pdf).
          textChunk.str.push(" ");
        } else {
          let glyphUnicode = glyph.unicode;
          glyphUnicode = NormalizedUnicodes[glyphUnicode] || glyphUnicode;
          glyphUnicode = reverseIfRtl(glyphUnicode);
          textChunk.str.push(glyphUnicode);
        }

        if (charSpacing) {
          if (!font.vertical) {
            textState.translateTextMatrix(
              charSpacing * textState.textHScale,
              0
            );
          } else {
            textState.translateTextMatrix(0, -charSpacing);
          }
        }
      }
    }

    function appendEOL() {
      if (textContentItem.initialized) {
        textContentItem.hasEOL = true;
        flushTextContentItem();
      } else {
        textContent.items.push({
          str: "",
          dir: "ltr",
          width: 0,
          height: 0,
          transform: getCurrentTextTransform(),
          fontName: textState.font.loadedName,
          hasEOL: true,
        });
      }
    }

    function addFakeSpaces(width, transf, textOrientation) {
      if (
        textOrientation * textContentItem.spaceInFlowMin <= width &&
        width <= textOrientation * textContentItem.spaceInFlowMax
      ) {
        if (textContentItem.initialized) {
          textContentItem.str.push(" ");
        }
        return false;
      }

      const fontName = textContentItem.fontName;

      let height = 0;
      if (textContentItem.vertical) {
        height = width;
        width = 0;
      }

      flushTextContentItem();
      textContent.items.push({
        str: " ",
        // TODO: check if using the orientation from last chunk is
        // better or not.
        dir: "ltr",
        width: Math.abs(width),
        height: Math.abs(height),
        transform: transf || getCurrentTextTransform(),
        fontName,
        hasEOL: false,
      });

      return true;
    }

    function flushTextContentItem() {
      if (!textContentItem.initialized || !textContentItem.str) {
        return;
      }

      // Do final text scaling.
      if (!textContentItem.vertical) {
        textContentItem.totalWidth +=
          textContentItem.width * textContentItem.textAdvanceScale;
      } else {
        textContentItem.totalHeight +=
          textContentItem.height * textContentItem.textAdvanceScale;
      }

      textContent.items.push(runBidiTransform(textContentItem));
      textContentItem.initialized = false;
      textContentItem.str.length = 0;
    }

    function enqueueChunk(batch = false) {
      const length = textContent.items.length;
      if (length === 0) {
        return;
      }
      if (batch && length < TEXT_CHUNK_BATCH_SIZE) {
        return;
      }
      sink.enqueue(textContent, length);
      textContent.items = [];
      textContent.styles = Object.create(null);
    }

    const timeSlotManager = new TimeSlotManager();

    return new Promise(function promiseBody(resolve, reject) {
      const next = function (promise) {
        enqueueChunk(/* batch = */ true);
        Promise.all([promise, sink.ready]).then(function () {
          try {
            promiseBody(resolve, reject);
          } catch (ex) {
            reject(ex);
          }
        }, reject);
      };
      task.ensureNotTerminated();
      timeSlotManager.reset();

      const operation = {};
      let stop,
        args = [];
      while (!(stop = timeSlotManager.check())) {
        // The arguments parsed by read() are not used beyond this loop, so
        // we can reuse the same array on every iteration, thus avoiding
        // unnecessary allocations.
        args.length = 0;
        operation.args = args;
        if (!preprocessor.read(operation)) {
          break;
        }
        textState = stateManager.state;
        const fn = operation.fn;
        args = operation.args;

        switch (fn | 0) {
          case OPS.setFont:
            // Optimization to ignore multiple identical Tf commands.
            var fontNameArg = args[0].name,
              fontSizeArg = args[1];
            if (
              textState.font &&
              fontNameArg === textState.fontName &&
              fontSizeArg === textState.fontSize
            ) {
              break;
            }

            flushTextContentItem();
            textState.fontName = fontNameArg;
            textState.fontSize = fontSizeArg;
            next(handleSetFont(fontNameArg, null));
            return;
          case OPS.setTextRise:
            textState.textRise = args[0];
            break;
          case OPS.setHScale:
            textState.textHScale = args[0] / 100;
            break;
          case OPS.setLeading:
            textState.leading = args[0];
            break;
          case OPS.moveText:
            textState.translateTextLineMatrix(args[0], args[1]);
            textState.textMatrix = textState.textLineMatrix.slice();
            break;
          case OPS.setLeadingMoveText:
            textState.leading = -args[1];
            textState.translateTextLineMatrix(args[0], args[1]);
            textState.textMatrix = textState.textLineMatrix.slice();
            break;
          case OPS.nextLine:
            textState.carriageReturn();
            break;
          case OPS.setTextMatrix:
            textState.setTextMatrix(
              args[0],
              args[1],
              args[2],
              args[3],
              args[4],
              args[5]
            );
            textState.setTextLineMatrix(
              args[0],
              args[1],
              args[2],
              args[3],
              args[4],
              args[5]
            );
            updateAdvanceScale();
            break;
          case OPS.setCharSpacing:
            textState.charSpacing = args[0];
            break;
          case OPS.setWordSpacing:
            textState.wordSpacing = args[0];
            break;
          case OPS.beginText:
            textState.textMatrix = IDENTITY_MATRIX.slice();
            textState.textLineMatrix = IDENTITY_MATRIX.slice();
            break;
          case OPS.showSpacedText:
            if (!stateManager.state.font) {
              self.ensureStateFont(stateManager.state);
              continue;
            }

            const spaceFactor =
              ((textState.font.vertical ? 1 : -1) * textState.fontSize) / 1000;
            const elements = args[0];
            for (let i = 0, ii = elements.length; i < ii - 1; i++) {
              const item = elements[i];
              if (typeof item === "string") {
                showSpacedTextBuffer.push(item);
              } else if (typeof item === "number" && item !== 0) {
                // PDF Specification 5.3.2 states:
                // The number is expressed in thousandths of a unit of text
                // space.
                // This amount is subtracted from the current horizontal or
                // vertical coordinate, depending on the writing mode.
                // In the default coordinate system, a positive adjustment
                // has the effect of moving the next glyph painted either to
                // the left or down by the given amount.
                const str = showSpacedTextBuffer.join("");
                showSpacedTextBuffer.length = 0;
                buildTextContentItem({
                  chars: str,
                  extraSpacing: item * spaceFactor,
                });
              }
            }

            const item = elements[elements.length - 1];
            if (typeof item === "string") {
              showSpacedTextBuffer.push(item);
            }

            if (showSpacedTextBuffer.length > 0) {
              const str = showSpacedTextBuffer.join("");
              showSpacedTextBuffer.length = 0;
              buildTextContentItem({
                chars: str,
                extraSpacing: 0,
              });
            }
            break;
          case OPS.showText:
            if (!stateManager.state.font) {
              self.ensureStateFont(stateManager.state);
              continue;
            }
            buildTextContentItem({
              chars: args[0],
              extraSpacing: 0,
            });
            break;
          case OPS.nextLineShowText:
            if (!stateManager.state.font) {
              self.ensureStateFont(stateManager.state);
              continue;
            }
            textState.carriageReturn();
            buildTextContentItem({
              chars: args[0],
              extraSpacing: 0,
            });
            break;
          case OPS.nextLineSetSpacingShowText:
            if (!stateManager.state.font) {
              self.ensureStateFont(stateManager.state);
              continue;
            }
            textState.wordSpacing = args[0];
            textState.charSpacing = args[1];
            textState.carriageReturn();
            buildTextContentItem({
              chars: args[2],
              extraSpacing: 0,
            });
            break;
          case OPS.paintXObject:
            flushTextContentItem();
            if (!xobjs) {
              xobjs = resources.get("XObject") || Dict.empty;
            }

            var isValidName = args[0] instanceof Name;
            var name = args[0].name;

            if (isValidName && emptyXObjectCache.getByName(name)) {
              break;
            }

            next(
              new Promise(function (resolveXObject, rejectXObject) {
                if (!isValidName) {
                  throw new FormatError("XObject must be referred to by name.");
                }

                let xobj = xobjs.getRaw(name);
                if (xobj instanceof Ref) {
                  if (emptyXObjectCache.getByRef(xobj)) {
                    resolveXObject();
                    return;
                  }

                  const globalImage = self.globalImageCache.getData(
                    xobj,
                    self.pageIndex
                  );
                  if (globalImage) {
                    resolveXObject();
                    return;
                  }

                  xobj = xref.fetch(xobj);
                }

                if (!(xobj instanceof BaseStream)) {
                  throw new FormatError("XObject should be a stream");
                }

                const type = xobj.dict.get("Subtype");
                if (!(type instanceof Name)) {
                  throw new FormatError("XObject should have a Name subtype");
                }

                if (type.name !== "Form") {
                  emptyXObjectCache.set(name, xobj.dict.objId, true);

                  resolveXObject();
                  return;
                }

                // Use a new `StateManager` to prevent incorrect positioning
                // of textItems *after* the Form XObject, since errors in the
                // data can otherwise prevent `restore` operators from
                // executing.
                // NOTE: Only an issue when `options.ignoreErrors === true`.
                const currentState = stateManager.state.clone();
                const xObjStateManager = new StateManager(currentState);

                const matrix = xobj.dict.getArray("Matrix");
                if (Array.isArray(matrix) && matrix.length === 6) {
                  xObjStateManager.transform(matrix);
                }

                // Enqueue the `textContent` chunk before parsing the /Form
                // XObject.
                enqueueChunk();
                const sinkWrapper = {
                  enqueueInvoked: false,

                  enqueue(chunk, size) {
                    this.enqueueInvoked = true;
                    sink.enqueue(chunk, size);
                  },

                  get desiredSize() {
                    return sink.desiredSize;
                  },

                  get ready() {
                    return sink.ready;
                  },
                };

                self
                  .getTextContent({
                    stream: xobj,
                    task,
                    resources: xobj.dict.get("Resources") || resources,
                    stateManager: xObjStateManager,
                    combineTextItems,
                    includeMarkedContent,
                    sink: sinkWrapper,
                    seenStyles,
                    viewBox,
                  })
                  .then(function () {
                    if (!sinkWrapper.enqueueInvoked) {
                      emptyXObjectCache.set(name, xobj.dict.objId, true);
                    }
                    resolveXObject();
                  }, rejectXObject);
              }).catch(function (reason) {
                if (reason instanceof AbortException) {
                  return;
                }
                if (self.options.ignoreErrors) {
                  // Error(s) in the XObject -- allow text-extraction to
                  // continue.
                  warn(`getTextContent - ignoring XObject: "${reason}".`);
                  return;
                }
                throw reason;
              })
            );
            return;
          case OPS.setGState:
            isValidName = args[0] instanceof Name;
            name = args[0].name;

            if (isValidName && emptyGStateCache.getByName(name)) {
              break;
            }

            next(
              new Promise(function (resolveGState, rejectGState) {
                if (!isValidName) {
                  throw new FormatError("GState must be referred to by name.");
                }

                const extGState = resources.get("ExtGState");
                if (!(extGState instanceof Dict)) {
                  throw new FormatError("ExtGState should be a dictionary.");
                }

                const gState = extGState.get(name);
                // TODO: Attempt to lookup cached GStates by reference as well,
                //       if and only if there are PDF documents where doing so
                //       would significantly improve performance.
                if (!(gState instanceof Dict)) {
                  throw new FormatError("GState should be a dictionary.");
                }

                const gStateFont = gState.get("Font");
                if (!gStateFont) {
                  emptyGStateCache.set(name, gState.objId, true);

                  resolveGState();
                  return;
                }
                flushTextContentItem();

                textState.fontName = null;
                textState.fontSize = gStateFont[1];
                handleSetFont(null, gStateFont[0]).then(
                  resolveGState,
                  rejectGState
                );
              }).catch(function (reason) {
                if (reason instanceof AbortException) {
                  return;
                }
                if (self.options.ignoreErrors) {
                  // Error(s) in the ExtGState -- allow text-extraction to
                  // continue.
                  warn(`getTextContent - ignoring ExtGState: "${reason}".`);
                  return;
                }
                throw reason;
              })
            );
            return;
          case OPS.beginMarkedContent:
            if (includeMarkedContent) {
              textContent.items.push({
                type: "beginMarkedContent",
                tag: args[0] instanceof Name ? args[0].name : null,
              });
            }
            break;
          case OPS.beginMarkedContentProps:
            if (includeMarkedContent) {
              flushTextContentItem();
              let mcid = null;
              if (args[1] instanceof Dict) {
                mcid = args[1].get("MCID");
              }
              textContent.items.push({
                type: "beginMarkedContentProps",
                id: Number.isInteger(mcid)
                  ? `${self.idFactory.getPageObjId()}_mcid${mcid}`
                  : null,
                tag: args[0] instanceof Name ? args[0].name : null,
              });
            }
            break;
          case OPS.endMarkedContent:
            if (includeMarkedContent) {
              flushTextContentItem();
              textContent.items.push({
                type: "endMarkedContent",
              });
            }
            break;
        } // switch
        if (textContent.items.length >= sink.desiredSize) {
          // Wait for ready, if we reach highWaterMark.
          stop = true;
          break;
        }
      } // while
      if (stop) {
        next(deferred);
        return;
      }
      flushTextContentItem();
      enqueueChunk();
      resolve();
    }).catch(reason => {
      if (reason instanceof AbortException) {
        return;
      }
      if (this.options.ignoreErrors) {
        // Error(s) in the TextContent -- allow text-extraction to continue.
        warn(
          `getTextContent - ignoring errors during "${task.name}" ` +
            `task: "${reason}".`
        );

        flushTextContentItem();
        enqueueChunk();
        return;
      }
      throw reason;
    });
  }

  extractDataStructures(dict, baseDict, properties) {
    const xref = this.xref;
    let cidToGidBytes;
    // 9.10.2
    const toUnicodePromise = this.readToUnicode(
      properties.toUnicode || dict.get("ToUnicode") || baseDict.get("ToUnicode")
    );

    if (properties.composite) {
      // CIDSystemInfo helps to match CID to glyphs
      const cidSystemInfo = dict.get("CIDSystemInfo");
      if (cidSystemInfo instanceof Dict) {
        properties.cidSystemInfo = {
          registry: stringToPDFString(cidSystemInfo.get("Registry")),
          ordering: stringToPDFString(cidSystemInfo.get("Ordering")),
          supplement: cidSystemInfo.get("Supplement"),
        };
      }

      const cidToGidMap = dict.get("CIDToGIDMap");
      if (cidToGidMap instanceof BaseStream) {
        cidToGidBytes = cidToGidMap.getBytes();
      }
    }

    // Based on 9.6.6 of the spec the encoding can come from multiple places
    // and depends on the font type. The base encoding and differences are
    // read here, but the encoding that is actually used is chosen during
    // glyph mapping in the font.
    // TODO: Loading the built in encoding in the font would allow the
    // differences to be merged in here not require us to hold on to it.
    const differences = [];
    let baseEncodingName = null;
    let encoding;
    if (dict.has("Encoding")) {
      encoding = dict.get("Encoding");
      if (encoding instanceof Dict) {
        baseEncodingName = encoding.get("BaseEncoding");
        baseEncodingName =
          baseEncodingName instanceof Name ? baseEncodingName.name : null;
        // Load the differences between the base and original
        if (encoding.has("Differences")) {
          const diffEncoding = encoding.get("Differences");
          let index = 0;
          for (let j = 0, jj = diffEncoding.length; j < jj; j++) {
            const data = xref.fetchIfRef(diffEncoding[j]);
            if (typeof data === "number") {
              index = data;
            } else if (data instanceof Name) {
              differences[index++] = data.name;
            } else {
              throw new FormatError(
                `Invalid entry in 'Differences' array: ${data}`
              );
            }
          }
        }
      } else if (encoding instanceof Name) {
        baseEncodingName = encoding.name;
      } else {
        throw new FormatError("Encoding is not a Name nor a Dict");
      }
      // According to table 114 if the encoding is a named encoding it must be
      // one of these predefined encodings.
      if (
        baseEncodingName !== "MacRomanEncoding" &&
        baseEncodingName !== "MacExpertEncoding" &&
        baseEncodingName !== "WinAnsiEncoding"
      ) {
        baseEncodingName = null;
      }
    }

    if (baseEncodingName) {
      properties.defaultEncoding = getEncoding(baseEncodingName);
    } else {
      const isSymbolicFont = !!(properties.flags & FontFlags.Symbolic);
      const isNonsymbolicFont = !!(properties.flags & FontFlags.Nonsymbolic);
      // According to "Table 114" in section "9.6.6.1 General" (under
      // "9.6.6 Character Encoding") of the PDF specification, a Nonsymbolic
      // font should use the `StandardEncoding` if no encoding is specified.
      encoding = StandardEncoding;
      if (properties.type === "TrueType" && !isNonsymbolicFont) {
        encoding = WinAnsiEncoding;
      }
      // The Symbolic attribute can be misused for regular fonts
      // Heuristic: we have to check if the font is a standard one also
      if (isSymbolicFont) {
        encoding = MacRomanEncoding;
        if (!properties.file || properties.isInternalFont) {
          if (/Symbol/i.test(properties.name)) {
            encoding = SymbolSetEncoding;
          } else if (/Dingbats|Wingdings/i.test(properties.name)) {
            encoding = ZapfDingbatsEncoding;
          }
        }
      }
      properties.defaultEncoding = encoding;
    }

    properties.differences = differences;
    properties.baseEncodingName = baseEncodingName;
    properties.hasEncoding = !!baseEncodingName || differences.length > 0;
    properties.dict = dict;
    return toUnicodePromise
      .then(readToUnicode => {
        properties.toUnicode = readToUnicode;
        return this.buildToUnicode(properties);
      })
      .then(builtToUnicode => {
        properties.toUnicode = builtToUnicode;
        if (cidToGidBytes) {
          properties.cidToGidMap = this.readCidToGidMap(
            cidToGidBytes,
            builtToUnicode
          );
        }
        return properties;
      });
  }

  /**
   * @returns {Array}
   * @private
   */
  _simpleFontToUnicode(properties, forceGlyphs = false) {
    assert(!properties.composite, "Must be a simple font.");

    const toUnicode = [];
    const encoding = properties.defaultEncoding.slice();
    const baseEncodingName = properties.baseEncodingName;
    // Merge in the differences array.
    const differences = properties.differences;
    for (const charcode in differences) {
      const glyphName = differences[charcode];
      if (glyphName === ".notdef") {
        // Skip .notdef to prevent rendering errors, e.g. boxes appearing
        // where there should be spaces (fixes issue5256.pdf).
        continue;
      }
      encoding[charcode] = glyphName;
    }
    const glyphsUnicodeMap = getGlyphsUnicode();
    for (const charcode in encoding) {
      // a) Map the character code to a character name.
      let glyphName = encoding[charcode];
      // b) Look up the character name in the Adobe Glyph List (see the
      //    Bibliography) to obtain the corresponding Unicode value.
      if (glyphName === "") {
        continue;
      } else if (glyphsUnicodeMap[glyphName] === undefined) {
        // (undocumented) c) Few heuristics to recognize unknown glyphs
        // NOTE: Adobe Reader does not do this step, but OSX Preview does
        let code = 0;
        switch (glyphName[0]) {
          case "G": // Gxx glyph
            if (glyphName.length === 3) {
              code = parseInt(glyphName.substring(1), 16);
            }
            break;
          case "g": // g00xx glyph
            if (glyphName.length === 5) {
              code = parseInt(glyphName.substring(1), 16);
            }
            break;
          case "C": // Cdd{d} glyph
          case "c": // cdd{d} glyph
            if (glyphName.length >= 3 && glyphName.length <= 4) {
              const codeStr = glyphName.substring(1);

              if (forceGlyphs) {
                code = parseInt(codeStr, 16);
                break;
              }
              // Normally the Cdd{d}/cdd{d} glyphName format will contain
              // regular, i.e. base 10, charCodes (see issue4550.pdf)...
              code = +codeStr;

              // ... however some PDF generators violate that assumption by
              // containing glyph, i.e. base 16, codes instead.
              // In that case we need to re-parse the *entire* encoding to
              // prevent broken text-selection (fixes issue9655_reduced.pdf).
              if (
                Number.isNaN(code) &&
                Number.isInteger(parseInt(codeStr, 16))
              ) {
                return this._simpleFontToUnicode(
                  properties,
                  /* forceGlyphs */ true
                );
              }
            }
            break;
          default: // 'uniXXXX'/'uXXXX{XX}' glyphs
            const unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
            if (unicode !== -1) {
              code = unicode;
            }
        }
        if (code > 0 && code <= 0x10ffff && Number.isInteger(code)) {
          // If `baseEncodingName` is one the predefined encodings, and `code`
          // equals `charcode`, using the glyph defined in the baseEncoding
          // seems to yield a better `toUnicode` mapping (fixes issue 5070).
          if (baseEncodingName && code === +charcode) {
            const baseEncoding = getEncoding(baseEncodingName);
            if (baseEncoding && (glyphName = baseEncoding[charcode])) {
              toUnicode[charcode] = String.fromCharCode(
                glyphsUnicodeMap[glyphName]
              );
              continue;
            }
          }
          toUnicode[charcode] = String.fromCodePoint(code);
        }
        continue;
      }
      toUnicode[charcode] = String.fromCharCode(glyphsUnicodeMap[glyphName]);
    }
    return toUnicode;
  }

  /**
   * Builds a char code to unicode map based on section 9.10 of the spec.
   * @param {Object} properties Font properties object.
   * @returns {Promise} A Promise that is resolved with a
   *   {ToUnicodeMap|IdentityToUnicodeMap} object.
   */
  async buildToUnicode(properties) {
    properties.hasIncludedToUnicodeMap =
      !!properties.toUnicode && properties.toUnicode.length > 0;

    // Section 9.10.2 Mapping Character Codes to Unicode Values
    if (properties.hasIncludedToUnicodeMap) {
      // Some fonts contain incomplete ToUnicode data, causing issues with
      // text-extraction. For simple fonts, containing encoding information,
      // use a fallback ToUnicode map to improve this (fixes issue8229.pdf).
      if (!properties.composite && properties.hasEncoding) {
        properties.fallbackToUnicode = this._simpleFontToUnicode(properties);
      }
      return properties.toUnicode;
    }

    // According to the spec if the font is a simple font we should only map
    // to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or
    // the differences array only contains adobe standard or symbol set names,
    // in pratice it seems better to always try to create a toUnicode map
    // based of the default encoding.
    if (!properties.composite /* is simple font */) {
      return new ToUnicodeMap(this._simpleFontToUnicode(properties));
    }

    // If the font is a composite font that uses one of the predefined CMaps
    // listed in Table 118 (except Identity–H and Identity–V) or whose
    // descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or
    // Adobe-Korea1 character collection:
    if (
      properties.composite &&
      ((properties.cMap.builtInCMap &&
        !(properties.cMap instanceof IdentityCMap)) ||
        (properties.cidSystemInfo.registry === "Adobe" &&
          (properties.cidSystemInfo.ordering === "GB1" ||
            properties.cidSystemInfo.ordering === "CNS1" ||
            properties.cidSystemInfo.ordering === "Japan1" ||
            properties.cidSystemInfo.ordering === "Korea1")))
    ) {
      // Then:
      // a) Map the character code to a character identifier (CID) according
      // to the font’s CMap.
      // b) Obtain the registry and ordering of the character collection used
      // by the font’s CMap (for example, Adobe and Japan1) from its
      // CIDSystemInfo dictionary.
      const { registry, ordering } = properties.cidSystemInfo;
      // c) Construct a second CMap name by concatenating the registry and
      // ordering obtained in step (b) in the format registry–ordering–UCS2
      // (for example, Adobe–Japan1–UCS2).
      const ucs2CMapName = Name.get(`${registry}-${ordering}-UCS2`);
      // d) Obtain the CMap with the name constructed in step (c) (available
      // from the ASN Web site; see the Bibliography).
      const ucs2CMap = await CMapFactory.create({
        encoding: ucs2CMapName,
        fetchBuiltInCMap: this._fetchBuiltInCMapBound,
        useCMap: null,
      });
      const toUnicode = [];
      properties.cMap.forEach(function (charcode, cid) {
        if (cid > 0xffff) {
          throw new FormatError("Max size of CID is 65,535");
        }
        // e) Map the CID obtained in step (a) according to the CMap
        // obtained in step (d), producing a Unicode value.
        const ucs2 = ucs2CMap.lookup(cid);
        if (ucs2) {
          toUnicode[charcode] = String.fromCharCode(
            (ucs2.charCodeAt(0) << 8) + ucs2.charCodeAt(1)
          );
        }
      });
      return new ToUnicodeMap(toUnicode);
    }

    // The viewer's choice, just use an identity map.
    return new IdentityToUnicodeMap(properties.firstChar, properties.lastChar);
  }

  readToUnicode(cmapObj) {
    if (!cmapObj) {
      return Promise.resolve(null);
    }
    if (cmapObj instanceof Name) {
      return CMapFactory.create({
        encoding: cmapObj,
        fetchBuiltInCMap: this._fetchBuiltInCMapBound,
        useCMap: null,
      }).then(function (cmap) {
        if (cmap instanceof IdentityCMap) {
          return new IdentityToUnicodeMap(0, 0xffff);
        }
        return new ToUnicodeMap(cmap.getMap());
      });
    } else if (cmapObj instanceof BaseStream) {
      return CMapFactory.create({
        encoding: cmapObj,
        fetchBuiltInCMap: this._fetchBuiltInCMapBound,
        useCMap: null,
      }).then(
        function (cmap) {
          if (cmap instanceof IdentityCMap) {
            return new IdentityToUnicodeMap(0, 0xffff);
          }
          const map = new Array(cmap.length);
          // Convert UTF-16BE
          // NOTE: cmap can be a sparse array, so use forEach instead of
          // `for(;;)` to iterate over all keys.
          cmap.forEach(function (charCode, token) {
            // Some cmaps contain *only* CID characters (fixes issue9367.pdf).
            if (typeof token === "number") {
              map[charCode] = String.fromCodePoint(token);
              return;
            }
            const str = [];
            for (let k = 0; k < token.length; k += 2) {
              const w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
              if ((w1 & 0xf800) !== 0xd800) {
                // w1 < 0xD800 || w1 > 0xDFFF
                str.push(w1);
                continue;
              }
              k += 2;
              const w2 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
              str.push(((w1 & 0x3ff) << 10) + (w2 & 0x3ff) + 0x10000);
            }
            map[charCode] = String.fromCodePoint.apply(String, str);
          });
          return new ToUnicodeMap(map);
        },
        reason => {
          if (reason instanceof AbortException) {
            return null;
          }
          if (this.options.ignoreErrors) {
            // Error in the ToUnicode data -- sending unsupported feature
            // notification and allow font parsing to continue.
            this.handler.send("UnsupportedFeature", {
              featureId: UNSUPPORTED_FEATURES.errorFontToUnicode,
            });
            warn(`readToUnicode - ignoring ToUnicode data: "${reason}".`);
            return null;
          }
          throw reason;
        }
      );
    }
    return Promise.resolve(null);
  }

  readCidToGidMap(glyphsData, toUnicode) {
    // Extract the encoding from the CIDToGIDMap

    // Set encoding 0 to later verify the font has an encoding
    const result = [];
    for (let j = 0, jj = glyphsData.length; j < jj; j++) {
      const glyphID = (glyphsData[j++] << 8) | glyphsData[j];
      const code = j >> 1;
      if (glyphID === 0 && !toUnicode.has(code)) {
        continue;
      }
      result[code] = glyphID;
    }
    return result;
  }

  extractWidths(dict, descriptor, properties) {
    const xref = this.xref;
    let glyphsWidths = [];
    let defaultWidth = 0;
    const glyphsVMetrics = [];
    let defaultVMetrics;
    let i, ii, j, jj, start, code, widths;
    if (properties.composite) {
      defaultWidth = dict.has("DW") ? dict.get("DW") : 1000;

      widths = dict.get("W");
      if (widths) {
        for (i = 0, ii = widths.length; i < ii; i++) {
          start = xref.fetchIfRef(widths[i++]);
          code = xref.fetchIfRef(widths[i]);
          if (Array.isArray(code)) {
            for (j = 0, jj = code.length; j < jj; j++) {
              glyphsWidths[start++] = xref.fetchIfRef(code[j]);
            }
          } else {
            const width = xref.fetchIfRef(widths[++i]);
            for (j = start; j <= code; j++) {
              glyphsWidths[j] = width;
            }
          }
        }
      }

      if (properties.vertical) {
        let vmetrics = dict.getArray("DW2") || [880, -1000];
        defaultVMetrics = [vmetrics[1], defaultWidth * 0.5, vmetrics[0]];
        vmetrics = dict.get("W2");
        if (vmetrics) {
          for (i = 0, ii = vmetrics.length; i < ii; i++) {
            start = xref.fetchIfRef(vmetrics[i++]);
            code = xref.fetchIfRef(vmetrics[i]);
            if (Array.isArray(code)) {
              for (j = 0, jj = code.length; j < jj; j++) {
                glyphsVMetrics[start++] = [
                  xref.fetchIfRef(code[j++]),
                  xref.fetchIfRef(code[j++]),
                  xref.fetchIfRef(code[j]),
                ];
              }
            } else {
              const vmetric = [
                xref.fetchIfRef(vmetrics[++i]),
                xref.fetchIfRef(vmetrics[++i]),
                xref.fetchIfRef(vmetrics[++i]),
              ];
              for (j = start; j <= code; j++) {
                glyphsVMetrics[j] = vmetric;
              }
            }
          }
        }
      }
    } else {
      const firstChar = properties.firstChar;
      widths = dict.get("Widths");
      if (widths) {
        j = firstChar;
        for (i = 0, ii = widths.length; i < ii; i++) {
          glyphsWidths[j++] = xref.fetchIfRef(widths[i]);
        }
        defaultWidth = parseFloat(descriptor.get("MissingWidth")) || 0;
      } else {
        // Trying get the BaseFont metrics (see comment above).
        const baseFontName = dict.get("BaseFont");
        if (baseFontName instanceof Name) {
          const metrics = this.getBaseFontMetrics(baseFontName.name);

          glyphsWidths = this.buildCharCodeToWidth(metrics.widths, properties);
          defaultWidth = metrics.defaultWidth;
        }
      }
    }

    // Heuristic: detection of monospace font by checking all non-zero widths
    let isMonospace = true;
    let firstWidth = defaultWidth;
    for (const glyph in glyphsWidths) {
      const glyphWidth = glyphsWidths[glyph];
      if (!glyphWidth) {
        continue;
      }
      if (!firstWidth) {
        firstWidth = glyphWidth;
        continue;
      }
      if (firstWidth !== glyphWidth) {
        isMonospace = false;
        break;
      }
    }
    if (isMonospace) {
      properties.flags |= FontFlags.FixedPitch;
    }

    properties.defaultWidth = defaultWidth;
    properties.widths = glyphsWidths;
    properties.defaultVMetrics = defaultVMetrics;
    properties.vmetrics = glyphsVMetrics;
  }

  isSerifFont(baseFontName) {
    // Simulating descriptor flags attribute
    const fontNameWoStyle = baseFontName.split("-")[0];
    return (
      fontNameWoStyle in getSerifFonts() ||
      fontNameWoStyle.search(/serif/gi) !== -1
    );
  }

  getBaseFontMetrics(name) {
    let defaultWidth = 0;
    let widths = Object.create(null);
    let monospace = false;
    const stdFontMap = getStdFontMap();
    let lookupName = stdFontMap[name] || name;
    const Metrics = getMetrics();

    if (!(lookupName in Metrics)) {
      // Use default fonts for looking up font metrics if the passed
      // font is not a base font
      if (this.isSerifFont(name)) {
        lookupName = "Times-Roman";
      } else {
        lookupName = "Helvetica";
      }
    }
    const glyphWidths = Metrics[lookupName];

    if (typeof glyphWidths === "number") {
      defaultWidth = glyphWidths;
      monospace = true;
    } else {
      widths = glyphWidths(); // expand lazy widths array
    }

    return {
      defaultWidth,
      monospace,
      widths,
    };
  }

  buildCharCodeToWidth(widthsByGlyphName, properties) {
    const widths = Object.create(null);
    const differences = properties.differences;
    const encoding = properties.defaultEncoding;
    for (let charCode = 0; charCode < 256; charCode++) {
      if (charCode in differences && widthsByGlyphName[differences[charCode]]) {
        widths[charCode] = widthsByGlyphName[differences[charCode]];
        continue;
      }
      if (charCode in encoding && widthsByGlyphName[encoding[charCode]]) {
        widths[charCode] = widthsByGlyphName[encoding[charCode]];
        continue;
      }
    }
    return widths;
  }

  preEvaluateFont(dict) {
    const baseDict = dict;
    let type = dict.get("Subtype");
    if (!(type instanceof Name)) {
      throw new FormatError("invalid font Subtype");
    }

    let composite = false;
    let hash, toUnicode;
    if (type.name === "Type0") {
      // If font is a composite
      //  - get the descendant font
      //  - set the type according to the descendant font
      //  - get the FontDescriptor from the descendant font
      const df = dict.get("DescendantFonts");
      if (!df) {
        throw new FormatError("Descendant fonts are not specified");
      }
      dict = Array.isArray(df) ? this.xref.fetchIfRef(df[0]) : df;

      if (!(dict instanceof Dict)) {
        throw new FormatError("Descendant font is not a dictionary.");
      }
      type = dict.get("Subtype");
      if (!(type instanceof Name)) {
        throw new FormatError("invalid font Subtype");
      }
      composite = true;
    }

    const firstChar = dict.get("FirstChar") || 0,
      lastChar = dict.get("LastChar") || (composite ? 0xffff : 0xff);
    const descriptor = dict.get("FontDescriptor");
    if (descriptor) {
      hash = new MurmurHash3_64();

      const encoding = baseDict.getRaw("Encoding");
      if (encoding instanceof Name) {
        hash.update(encoding.name);
      } else if (encoding instanceof Ref) {
        hash.update(encoding.toString());
      } else if (encoding instanceof Dict) {
        for (const entry of encoding.getRawValues()) {
          if (entry instanceof Name) {
            hash.update(entry.name);
          } else if (entry instanceof Ref) {
            hash.update(entry.toString());
          } else if (Array.isArray(entry)) {
            // 'Differences' array (fixes bug1157493.pdf).
            const diffLength = entry.length,
              diffBuf = new Array(diffLength);

            for (let j = 0; j < diffLength; j++) {
              const diffEntry = entry[j];
              if (diffEntry instanceof Name) {
                diffBuf[j] = diffEntry.name;
              } else if (
                typeof diffEntry === "number" ||
                diffEntry instanceof Ref
              ) {
                diffBuf[j] = diffEntry.toString();
              }
            }
            hash.update(diffBuf.join());
          }
        }
      }

      hash.update(`${firstChar}-${lastChar}`); // Fixes issue10665_reduced.pdf

      toUnicode = dict.get("ToUnicode") || baseDict.get("ToUnicode");
      if (toUnicode instanceof BaseStream) {
        const stream = toUnicode.str || toUnicode;
        const uint8array = stream.buffer
          ? new Uint8Array(stream.buffer.buffer, 0, stream.bufferLength)
          : new Uint8Array(
              stream.bytes.buffer,
              stream.start,
              stream.end - stream.start
            );
        hash.update(uint8array);
      } else if (toUnicode instanceof Name) {
        hash.update(toUnicode.name);
      }

      const widths = dict.get("Widths") || baseDict.get("Widths");
      if (Array.isArray(widths)) {
        const widthsBuf = [];
        for (const entry of widths) {
          if (typeof entry === "number" || entry instanceof Ref) {
            widthsBuf.push(entry.toString());
          }
        }
        hash.update(widthsBuf.join());
      }

      if (composite) {
        hash.update("compositeFont");

        const compositeWidths = dict.get("W") || baseDict.get("W");
        if (Array.isArray(compositeWidths)) {
          const widthsBuf = [];
          for (const entry of compositeWidths) {
            if (typeof entry === "number" || entry instanceof Ref) {
              widthsBuf.push(entry.toString());
            } else if (Array.isArray(entry)) {
              const subWidthsBuf = [];
              for (const element of entry) {
                if (typeof element === "number" || element instanceof Ref) {
                  subWidthsBuf.push(element.toString());
                }
              }
              widthsBuf.push(`[${subWidthsBuf.join()}]`);
            }
          }
          hash.update(widthsBuf.join());
        }

        const cidToGidMap =
          dict.getRaw("CIDToGIDMap") || baseDict.getRaw("CIDToGIDMap");
        if (cidToGidMap instanceof Name) {
          hash.update(cidToGidMap.name);
        } else if (cidToGidMap instanceof Ref) {
          hash.update(cidToGidMap.toString());
        } else if (cidToGidMap instanceof BaseStream) {
          hash.update(cidToGidMap.peekBytes());
        }
      }
    }

    return {
      descriptor,
      dict,
      baseDict,
      composite,
      type: type.name,
      firstChar,
      lastChar,
      toUnicode,
      hash: hash ? hash.hexdigest() : "",
    };
  }

  async translateFont({
    descriptor,
    dict,
    baseDict,
    composite,
    type,
    firstChar,
    lastChar,
    toUnicode,
    cssFontInfo,
  }) {
    const isType3Font = type === "Type3";
    let properties;

    if (!descriptor) {
      if (isType3Font) {
        // FontDescriptor is only required for Type3 fonts when the document
        // is a tagged pdf. Create a barbebones one to get by.
        descriptor = new Dict(null);
        descriptor.set("FontName", Name.get(type));
        descriptor.set("FontBBox", dict.getArray("FontBBox") || [0, 0, 0, 0]);
      } else {
        // Before PDF 1.5 if the font was one of the base 14 fonts, having a
        // FontDescriptor was not required.
        // This case is here for compatibility.
        let baseFontName = dict.get("BaseFont");
        if (!(baseFontName instanceof Name)) {
          throw new FormatError("Base font is not specified");
        }

        // Using base font name as a font name.
        baseFontName = baseFontName.name.replace(/[,_]/g, "-");
        const metrics = this.getBaseFontMetrics(baseFontName);

        // Simulating descriptor flags attribute
        const fontNameWoStyle = baseFontName.split("-")[0];
        const flags =
          (this.isSerifFont(fontNameWoStyle) ? FontFlags.Serif : 0) |
          (metrics.monospace ? FontFlags.FixedPitch : 0) |
          (getSymbolsFonts()[fontNameWoStyle]
            ? FontFlags.Symbolic
            : FontFlags.Nonsymbolic);

        properties = {
          type,
          name: baseFontName,
          loadedName: baseDict.loadedName,
          widths: metrics.widths,
          defaultWidth: metrics.defaultWidth,
          isSimulatedFlags: true,
          flags,
          firstChar,
          lastChar,
          toUnicode,
          xHeight: 0,
          capHeight: 0,
          italicAngle: 0,
          isType3Font,
        };
        const widths = dict.get("Widths");

        const standardFontName = getStandardFontName(baseFontName);
        let file = null;
        if (standardFontName) {
          properties.isStandardFont = true;
          file = await this.fetchStandardFontData(standardFontName);
          properties.isInternalFont = !!file;
        }
        return this.extractDataStructures(dict, dict, properties).then(
          newProperties => {
            if (widths) {
              const glyphWidths = [];
              let j = firstChar;
              for (let i = 0, ii = widths.length; i < ii; i++) {
                glyphWidths[j++] = this.xref.fetchIfRef(widths[i]);
              }
              newProperties.widths = glyphWidths;
            } else {
              newProperties.widths = this.buildCharCodeToWidth(
                metrics.widths,
                newProperties
              );
            }
            return new Font(baseFontName, file, newProperties);
          }
        );
      }
    }

    // According to the spec if 'FontDescriptor' is declared, 'FirstChar',
    // 'LastChar' and 'Widths' should exist too, but some PDF encoders seem
    // to ignore this rule when a variant of a standard font is used.
    // TODO Fill the width array depending on which of the base font this is
    // a variant.

    let fontName = descriptor.get("FontName");
    let baseFont = dict.get("BaseFont");
    // Some bad PDFs have a string as the font name.
    if (typeof fontName === "string") {
      fontName = Name.get(fontName);
    }
    if (typeof baseFont === "string") {
      baseFont = Name.get(baseFont);
    }

    if (!isType3Font) {
      const fontNameStr = fontName && fontName.name;
      const baseFontStr = baseFont && baseFont.name;
      if (fontNameStr !== baseFontStr) {
        info(
          `The FontDescriptor's FontName is "${fontNameStr}" but ` +
            `should be the same as the Font's BaseFont "${baseFontStr}".`
        );
        // Workaround for cases where e.g. fontNameStr = 'Arial' and
        // baseFontStr = 'Arial,Bold' (needed when no font file is embedded).
        if (fontNameStr && baseFontStr && baseFontStr.startsWith(fontNameStr)) {
          fontName = baseFont;
        }
      }
    }
    fontName = fontName || baseFont;

    if (!(fontName instanceof Name)) {
      throw new FormatError("invalid font name");
    }

    let fontFile, subtype, length1, length2, length3;
    try {
      fontFile = descriptor.get("FontFile", "FontFile2", "FontFile3");
    } catch (ex) {
      if (!this.options.ignoreErrors) {
        throw ex;
      }
      warn(`translateFont - fetching "${fontName.name}" font file: "${ex}".`);
      fontFile = new NullStream();
    }
    let isStandardFont = false;
    let isInternalFont = false;
    let glyphScaleFactors = null;
    if (fontFile) {
      if (fontFile.dict) {
        const subtypeEntry = fontFile.dict.get("Subtype");
        if (subtypeEntry instanceof Name) {
          subtype = subtypeEntry.name;
        }
        length1 = fontFile.dict.get("Length1");
        length2 = fontFile.dict.get("Length2");
        length3 = fontFile.dict.get("Length3");
      }
    } else if (cssFontInfo) {
      // We've a missing XFA font.
      const standardFontName = getXfaFontName(fontName.name);
      if (standardFontName) {
        cssFontInfo.fontFamily = `${cssFontInfo.fontFamily}-PdfJS-XFA`;
        cssFontInfo.metrics = standardFontName.metrics || null;
        glyphScaleFactors = standardFontName.factors || null;
        fontFile = await this.fetchStandardFontData(standardFontName.name);
        isInternalFont = !!fontFile;

        // We're using a substitution font but for example widths (if any)
        // are related to the glyph positions in the font.
        // So we overwrite everything here to be sure that widths are
        // correct.
        baseDict = dict = getXfaFontDict(fontName.name);
        composite = true;
      }
    } else if (!isType3Font) {
      const standardFontName = getStandardFontName(fontName.name);
      if (standardFontName) {
        isStandardFont = true;
        fontFile = await this.fetchStandardFontData(standardFontName);
        isInternalFont = !!fontFile;
      }
    }

    properties = {
      type,
      name: fontName.name,
      subtype,
      file: fontFile,
      length1,
      length2,
      length3,
      isStandardFont,
      isInternalFont,
      loadedName: baseDict.loadedName,
      composite,
      fixedPitch: false,
      fontMatrix: dict.getArray("FontMatrix") || FONT_IDENTITY_MATRIX,
      firstChar,
      lastChar,
      toUnicode,
      bbox: descriptor.getArray("FontBBox") || dict.getArray("FontBBox"),
      ascent: descriptor.get("Ascent"),
      descent: descriptor.get("Descent"),
      xHeight: descriptor.get("XHeight") || 0,
      capHeight: descriptor.get("CapHeight") || 0,
      flags: descriptor.get("Flags"),
      italicAngle: descriptor.get("ItalicAngle") || 0,
      isType3Font,
      cssFontInfo,
      scaleFactors: glyphScaleFactors,
    };

    if (composite) {
      const cidEncoding = baseDict.get("Encoding");
      if (cidEncoding instanceof Name) {
        properties.cidEncoding = cidEncoding.name;
      }
      const cMap = await CMapFactory.create({
        encoding: cidEncoding,
        fetchBuiltInCMap: this._fetchBuiltInCMapBound,
        useCMap: null,
      });
      properties.cMap = cMap;
      properties.vertical = properties.cMap.vertical;
    }

    return this.extractDataStructures(dict, baseDict, properties).then(
      newProperties => {
        this.extractWidths(dict, descriptor, newProperties);

        return new Font(fontName.name, fontFile, newProperties);
      }
    );
  }

  static buildFontPaths(font, glyphs, handler, evaluatorOptions) {
    function buildPath(fontChar) {
      const glyphName = `${font.loadedName}_path_${fontChar}`;
      try {
        if (font.renderer.hasBuiltPath(fontChar)) {
          return;
        }
        handler.send("commonobj", [
          glyphName,
          "FontPath",
          font.renderer.getPathJs(fontChar),
        ]);
      } catch (reason) {
        if (evaluatorOptions.ignoreErrors) {
          // Error in the font data -- sending unsupported feature notification
          // and allow glyph path building to continue.
          handler.send("UnsupportedFeature", {
            featureId: UNSUPPORTED_FEATURES.errorFontBuildPath,
          });
          warn(`buildFontPaths - ignoring ${glyphName} glyph: "${reason}".`);
          return;
        }
        throw reason;
      }
    }

    for (const glyph of glyphs) {
      buildPath(glyph.fontChar);

      // If the glyph has an accent we need to build a path for its
      // fontChar too, otherwise CanvasGraphics_paintChar will fail.
      const accent = glyph.accent;
      if (accent && accent.fontChar) {
        buildPath(accent.fontChar);
      }
    }
  }

  static get fallbackFontDict() {
    const dict = new Dict();
    dict.set("BaseFont", Name.get("PDFJS-FallbackFont"));
    dict.set("Type", Name.get("FallbackType"));
    dict.set("Subtype", Name.get("FallbackType"));
    dict.set("Encoding", Name.get("WinAnsiEncoding"));

    return shadow(this, "fallbackFontDict", dict);
  }
}

class TranslatedFont {
  constructor({ loadedName, font, dict, evaluatorOptions }) {
    this.loadedName = loadedName;
    this.font = font;
    this.dict = dict;
    this._evaluatorOptions = evaluatorOptions || DefaultPartialEvaluatorOptions;
    this.type3Loaded = null;
    this.type3Dependencies = font.isType3Font ? new Set() : null;
    this.sent = false;
  }

  send(handler) {
    if (this.sent) {
      return;
    }
    this.sent = true;

    handler.send("commonobj", [
      this.loadedName,
      "Font",
      this.font.exportData(this._evaluatorOptions.fontExtraProperties),
    ]);
  }

  fallback(handler) {
    if (!this.font.data) {
      return;
    }
    // When font loading failed, fall back to the built-in font renderer.
    this.font.disableFontFace = true;
    // An arbitrary number of text rendering operators could have been
    // encountered between the point in time when the 'Font' message was sent
    // to the main-thread, and the point in time when the 'FontFallback'
    // message was received on the worker-thread.
    // To ensure that all 'FontPath's are available on the main-thread, when
    // font loading failed, attempt to resend *all* previously parsed glyphs.
    PartialEvaluator.buildFontPaths(
      this.font,
      /* glyphs = */ this.font.glyphCacheValues,
      handler,
      this._evaluatorOptions
    );
  }

  loadType3Data(evaluator, resources, task) {
    if (this.type3Loaded) {
      return this.type3Loaded;
    }
    if (!this.font.isType3Font) {
      throw new Error("Must be a Type3 font.");
    }
    // When parsing Type3 glyphs, always ignore them if there are errors.
    // Compared to the parsing of e.g. an entire page, it doesn't really
    // make sense to only be able to render a Type3 glyph partially.
    const type3Evaluator = evaluator.clone({ ignoreErrors: false });
    type3Evaluator.parsingType3Font = true;
    // Prevent circular references in Type3 fonts.
    const type3FontRefs = new RefSet(evaluator.type3FontRefs);
    if (this.dict.objId && !type3FontRefs.has(this.dict.objId)) {
      type3FontRefs.put(this.dict.objId);
    }
    type3Evaluator.type3FontRefs = type3FontRefs;

    const translatedFont = this.font,
      type3Dependencies = this.type3Dependencies;
    let loadCharProcsPromise = Promise.resolve();
    const charProcs = this.dict.get("CharProcs");
    const fontResources = this.dict.get("Resources") || resources;
    const charProcOperatorList = Object.create(null);

    const isEmptyBBox =
      !translatedFont.bbox || isArrayEqual(translatedFont.bbox, [0, 0, 0, 0]);

    for (const key of charProcs.getKeys()) {
      loadCharProcsPromise = loadCharProcsPromise.then(() => {
        const glyphStream = charProcs.get(key);
        const operatorList = new OperatorList();
        return type3Evaluator
          .getOperatorList({
            stream: glyphStream,
            task,
            resources: fontResources,
            operatorList,
          })
          .then(() => {
            // According to the PDF specification, section "9.6.5 Type 3 Fonts"
            // and "Table 113":
            //  "A glyph description that begins with the d1 operator should
            //   not execute any operators that set the colour (or other
            //   colour-related parameters) in the graphics state;
            //   any use of such operators shall be ignored."
            if (operatorList.fnArray[0] === OPS.setCharWidthAndBounds) {
              this._removeType3ColorOperators(operatorList, isEmptyBBox);
            }
            charProcOperatorList[key] = operatorList.getIR();

            for (const dependency of operatorList.dependencies) {
              type3Dependencies.add(dependency);
            }
          })
          .catch(function (reason) {
            warn(`Type3 font resource "${key}" is not available.`);
            const dummyOperatorList = new OperatorList();
            charProcOperatorList[key] = dummyOperatorList.getIR();
          });
      });
    }
    this.type3Loaded = loadCharProcsPromise.then(() => {
      translatedFont.charProcOperatorList = charProcOperatorList;
      if (this._bbox) {
        translatedFont.isCharBBox = true;
        translatedFont.bbox = this._bbox;
      }
    });
    return this.type3Loaded;
  }

  /**
   * @private
   */
  _removeType3ColorOperators(operatorList, isEmptyBBox = false) {
    if (
      typeof PDFJSDev === "undefined" ||
      PDFJSDev.test("!PRODUCTION || TESTING")
    ) {
      assert(
        operatorList.fnArray[0] === OPS.setCharWidthAndBounds,
        "Type3 glyph shall start with the d1 operator."
      );
    }
    if (isEmptyBBox) {
      if (!this._bbox) {
        this._bbox = [Infinity, Infinity, -Infinity, -Infinity];
      }
      const charBBox = Util.normalizeRect(operatorList.argsArray[0].slice(2));

      this._bbox[0] = Math.min(this._bbox[0], charBBox[0]);
      this._bbox[1] = Math.min(this._bbox[1], charBBox[1]);
      this._bbox[2] = Math.max(this._bbox[2], charBBox[2]);
      this._bbox[3] = Math.max(this._bbox[3], charBBox[3]);
    }
    let i = 1,
      ii = operatorList.length;
    while (i < ii) {
      switch (operatorList.fnArray[i]) {
        case OPS.setStrokeColorSpace:
        case OPS.setFillColorSpace:
        case OPS.setStrokeColor:
        case OPS.setStrokeColorN:
        case OPS.setFillColor:
        case OPS.setFillColorN:
        case OPS.setStrokeGray:
        case OPS.setFillGray:
        case OPS.setStrokeRGBColor:
        case OPS.setFillRGBColor:
        case OPS.setStrokeCMYKColor:
        case OPS.setFillCMYKColor:
        case OPS.shadingFill:
        case OPS.setRenderingIntent:
          operatorList.fnArray.splice(i, 1);
          operatorList.argsArray.splice(i, 1);
          ii--;
          continue;

        case OPS.setGState:
          const [gStateObj] = operatorList.argsArray[i];
          let j = 0,
            jj = gStateObj.length;
          while (j < jj) {
            const [gStateKey] = gStateObj[j];
            switch (gStateKey) {
              case "TR":
              case "TR2":
              case "HT":
              case "BG":
              case "BG2":
              case "UCR":
              case "UCR2":
                gStateObj.splice(j, 1);
                jj--;
                continue;
            }
            j++;
          }
          break;
      }
      i++;
    }
  }
}

class StateManager {
  constructor(initialState = new EvalState()) {
    this.state = initialState;
    this.stateStack = [];
  }

  save() {
    const old = this.state;
    this.stateStack.push(this.state);
    this.state = old.clone();
  }

  restore() {
    const prev = this.stateStack.pop();
    if (prev) {
      this.state = prev;
    }
  }

  transform(args) {
    this.state.ctm = Util.transform(this.state.ctm, args);
  }
}

class TextState {
  constructor() {
    this.ctm = new Float32Array(IDENTITY_MATRIX);
    this.fontName = null;
    this.fontSize = 0;
    this.font = null;
    this.fontMatrix = FONT_IDENTITY_MATRIX;
    this.textMatrix = IDENTITY_MATRIX.slice();
    this.textLineMatrix = IDENTITY_MATRIX.slice();
    this.charSpacing = 0;
    this.wordSpacing = 0;
    this.leading = 0;
    this.textHScale = 1;
    this.textRise = 0;
  }

  setTextMatrix(a, b, c, d, e, f) {
    const m = this.textMatrix;
    m[0] = a;
    m[1] = b;
    m[2] = c;
    m[3] = d;
    m[4] = e;
    m[5] = f;
  }

  setTextLineMatrix(a, b, c, d, e, f) {
    const m = this.textLineMatrix;
    m[0] = a;
    m[1] = b;
    m[2] = c;
    m[3] = d;
    m[4] = e;
    m[5] = f;
  }

  translateTextMatrix(x, y) {
    const m = this.textMatrix;
    m[4] = m[0] * x + m[2] * y + m[4];
    m[5] = m[1] * x + m[3] * y + m[5];
  }

  translateTextLineMatrix(x, y) {
    const m = this.textLineMatrix;
    m[4] = m[0] * x + m[2] * y + m[4];
    m[5] = m[1] * x + m[3] * y + m[5];
  }

  carriageReturn() {
    this.translateTextLineMatrix(0, -this.leading);
    this.textMatrix = this.textLineMatrix.slice();
  }

  clone() {
    const clone = Object.create(this);
    clone.textMatrix = this.textMatrix.slice();
    clone.textLineMatrix = this.textLineMatrix.slice();
    clone.fontMatrix = this.fontMatrix.slice();
    return clone;
  }
}

class EvalState {
  constructor() {
    this.ctm = new Float32Array(IDENTITY_MATRIX);
    this.font = null;
    this.textRenderingMode = TextRenderingMode.FILL;
    this.fillColorSpace = ColorSpace.singletons.gray;
    this.strokeColorSpace = ColorSpace.singletons.gray;
  }

  clone() {
    return Object.create(this);
  }
}

class EvaluatorPreprocessor {
  static get opMap() {
    // Specifies properties for each command
    //
    // If variableArgs === true: [0, `numArgs`] expected
    // If variableArgs === false: exactly `numArgs` expected
    const getOPMap = getLookupTableFactory(function (t) {
      // Graphic state
      t.w = { id: OPS.setLineWidth, numArgs: 1, variableArgs: false };
      t.J = { id: OPS.setLineCap, numArgs: 1, variableArgs: false };
      t.j = { id: OPS.setLineJoin, numArgs: 1, variableArgs: false };
      t.M = { id: OPS.setMiterLimit, numArgs: 1, variableArgs: false };
      t.d = { id: OPS.setDash, numArgs: 2, variableArgs: false };
      t.ri = { id: OPS.setRenderingIntent, numArgs: 1, variableArgs: false };
      t.i = { id: OPS.setFlatness, numArgs: 1, variableArgs: false };
      t.gs = { id: OPS.setGState, numArgs: 1, variableArgs: false };
      t.q = { id: OPS.save, numArgs: 0, variableArgs: false };
      t.Q = { id: OPS.restore, numArgs: 0, variableArgs: false };
      t.cm = { id: OPS.transform, numArgs: 6, variableArgs: false };

      // Path
      t.m = { id: OPS.moveTo, numArgs: 2, variableArgs: false };
      t.l = { id: OPS.lineTo, numArgs: 2, variableArgs: false };
      t.c = { id: OPS.curveTo, numArgs: 6, variableArgs: false };
      t.v = { id: OPS.curveTo2, numArgs: 4, variableArgs: false };
      t.y = { id: OPS.curveTo3, numArgs: 4, variableArgs: false };
      t.h = { id: OPS.closePath, numArgs: 0, variableArgs: false };
      t.re = { id: OPS.rectangle, numArgs: 4, variableArgs: false };
      t.S = { id: OPS.stroke, numArgs: 0, variableArgs: false };
      t.s = { id: OPS.closeStroke, numArgs: 0, variableArgs: false };
      t.f = { id: OPS.fill, numArgs: 0, variableArgs: false };
      t.F = { id: OPS.fill, numArgs: 0, variableArgs: false };
      t["f*"] = { id: OPS.eoFill, numArgs: 0, variableArgs: false };
      t.B = { id: OPS.fillStroke, numArgs: 0, variableArgs: false };
      t["B*"] = { id: OPS.eoFillStroke, numArgs: 0, variableArgs: false };
      t.b = { id: OPS.closeFillStroke, numArgs: 0, variableArgs: false };
      t["b*"] = { id: OPS.closeEOFillStroke, numArgs: 0, variableArgs: false };
      t.n = { id: OPS.endPath, numArgs: 0, variableArgs: false };

      // Clipping
      t.W = { id: OPS.clip, numArgs: 0, variableArgs: false };
      t["W*"] = { id: OPS.eoClip, numArgs: 0, variableArgs: false };

      // Text
      t.BT = { id: OPS.beginText, numArgs: 0, variableArgs: false };
      t.ET = { id: OPS.endText, numArgs: 0, variableArgs: false };
      t.Tc = { id: OPS.setCharSpacing, numArgs: 1, variableArgs: false };
      t.Tw = { id: OPS.setWordSpacing, numArgs: 1, variableArgs: false };
      t.Tz = { id: OPS.setHScale, numArgs: 1, variableArgs: false };
      t.TL = { id: OPS.setLeading, numArgs: 1, variableArgs: false };
      t.Tf = { id: OPS.setFont, numArgs: 2, variableArgs: false };
      t.Tr = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false };
      t.Ts = { id: OPS.setTextRise, numArgs: 1, variableArgs: false };
      t.Td = { id: OPS.moveText, numArgs: 2, variableArgs: false };
      t.TD = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false };
      t.Tm = { id: OPS.setTextMatrix, numArgs: 6, variableArgs: false };
      t["T*"] = { id: OPS.nextLine, numArgs: 0, variableArgs: false };
      t.Tj = { id: OPS.showText, numArgs: 1, variableArgs: false };
      t.TJ = { id: OPS.showSpacedText, numArgs: 1, variableArgs: false };
      t["'"] = { id: OPS.nextLineShowText, numArgs: 1, variableArgs: false };
      t['"'] = {
        id: OPS.nextLineSetSpacingShowText,
        numArgs: 3,
        variableArgs: false,
      };

      // Type3 fonts
      t.d0 = { id: OPS.setCharWidth, numArgs: 2, variableArgs: false };
      t.d1 = {
        id: OPS.setCharWidthAndBounds,
        numArgs: 6,
        variableArgs: false,
      };

      // Color
      t.CS = { id: OPS.setStrokeColorSpace, numArgs: 1, variableArgs: false };
      t.cs = { id: OPS.setFillColorSpace, numArgs: 1, variableArgs: false };
      t.SC = { id: OPS.setStrokeColor, numArgs: 4, variableArgs: true };
      t.SCN = { id: OPS.setStrokeColorN, numArgs: 33, variableArgs: true };
      t.sc = { id: OPS.setFillColor, numArgs: 4, variableArgs: true };
      t.scn = { id: OPS.setFillColorN, numArgs: 33, variableArgs: true };
      t.G = { id: OPS.setStrokeGray, numArgs: 1, variableArgs: false };
      t.g = { id: OPS.setFillGray, numArgs: 1, variableArgs: false };
      t.RG = { id: OPS.setStrokeRGBColor, numArgs: 3, variableArgs: false };
      t.rg = { id: OPS.setFillRGBColor, numArgs: 3, variableArgs: false };
      t.K = { id: OPS.setStrokeCMYKColor, numArgs: 4, variableArgs: false };
      t.k = { id: OPS.setFillCMYKColor, numArgs: 4, variableArgs: false };

      // Shading
      t.sh = { id: OPS.shadingFill, numArgs: 1, variableArgs: false };

      // Images
      t.BI = { id: OPS.beginInlineImage, numArgs: 0, variableArgs: false };
      t.ID = { id: OPS.beginImageData, numArgs: 0, variableArgs: false };
      t.EI = { id: OPS.endInlineImage, numArgs: 1, variableArgs: false };

      // XObjects
      t.Do = { id: OPS.paintXObject, numArgs: 1, variableArgs: false };
      t.MP = { id: OPS.markPoint, numArgs: 1, variableArgs: false };
      t.DP = { id: OPS.markPointProps, numArgs: 2, variableArgs: false };
      t.BMC = { id: OPS.beginMarkedContent, numArgs: 1, variableArgs: false };
      t.BDC = {
        id: OPS.beginMarkedContentProps,
        numArgs: 2,
        variableArgs: false,
      };
      t.EMC = { id: OPS.endMarkedContent, numArgs: 0, variableArgs: false };

      // Compatibility
      t.BX = { id: OPS.beginCompat, numArgs: 0, variableArgs: false };
      t.EX = { id: OPS.endCompat, numArgs: 0, variableArgs: false };

      // (reserved partial commands for the lexer)
      t.BM = null;
      t.BD = null;
      t.true = null;
      t.fa = null;
      t.fal = null;
      t.fals = null;
      t.false = null;
      t.nu = null;
      t.nul = null;
      t.null = null;
    });

    return shadow(this, "opMap", getOPMap());
  }

  static get MAX_INVALID_PATH_OPS() {
    return shadow(this, "MAX_INVALID_PATH_OPS", 10);
  }

  constructor(stream, xref, stateManager = new StateManager()) {
    // TODO(mduan): pass array of knownCommands rather than this.opMap
    // dictionary
    this.parser = new Parser({
      lexer: new Lexer(stream, EvaluatorPreprocessor.opMap),
      xref,
    });
    this.stateManager = stateManager;
    this.nonProcessedArgs = [];
    this._numInvalidPathOPS = 0;
  }

  get savedStatesDepth() {
    return this.stateManager.stateStack.length;
  }

  // |operation| is an object with two fields:
  //
  // - |fn| is an out param.
  //
  // - |args| is an inout param. On entry, it should have one of two values.
  //
  //   - An empty array. This indicates that the caller is providing the
  //     array in which the args will be stored in. The caller should use
  //     this value if it can reuse a single array for each call to read().
  //
  //   - |null|. This indicates that the caller needs this function to create
  //     the array in which any args are stored in. If there are zero args,
  //     this function will leave |operation.args| as |null| (thus avoiding
  //     allocations that would occur if we used an empty array to represent
  //     zero arguments). Otherwise, it will replace |null| with a new array
  //     containing the arguments. The caller should use this value if it
  //     cannot reuse an array for each call to read().
  //
  // These two modes are present because this function is very hot and so
  // avoiding allocations where possible is worthwhile.
  //
  read(operation) {
    let args = operation.args;
    while (true) {
      const obj = this.parser.getObj();
      if (obj instanceof Cmd) {
        const cmd = obj.cmd;
        // Check that the command is valid
        const opSpec = EvaluatorPreprocessor.opMap[cmd];
        if (!opSpec) {
          warn(`Unknown command "${cmd}".`);
          continue;
        }

        const fn = opSpec.id;
        const numArgs = opSpec.numArgs;
        let argsLength = args !== null ? args.length : 0;

        if (!opSpec.variableArgs) {
          // Postscript commands can be nested, e.g. /F2 /GS2 gs 5.711 Tf
          if (argsLength !== numArgs) {
            const nonProcessedArgs = this.nonProcessedArgs;
            while (argsLength > numArgs) {
              nonProcessedArgs.push(args.shift());
              argsLength--;
            }
            while (argsLength < numArgs && nonProcessedArgs.length !== 0) {
              if (args === null) {
                args = [];
              }
              args.unshift(nonProcessedArgs.pop());
              argsLength++;
            }
          }

          if (argsLength < numArgs) {
            const partialMsg =
              `command ${cmd}: expected ${numArgs} args, ` +
              `but received ${argsLength} args.`;

            // Incomplete path operators, in particular, can result in fairly
            // chaotic rendering artifacts. Hence the following heuristics is
            // used to error, rather than just warn, once a number of invalid
            // path operators have been encountered (fixes bug1443140.pdf).
            if (
              fn >= OPS.moveTo &&
              fn <= OPS.endPath && // Path operator
              ++this._numInvalidPathOPS >
                EvaluatorPreprocessor.MAX_INVALID_PATH_OPS
            ) {
              throw new FormatError(`Invalid ${partialMsg}`);
            }
            // If we receive too few arguments, it's not possible to execute
            // the command, hence we skip the command.
            warn(`Skipping ${partialMsg}`);
            if (args !== null) {
              args.length = 0;
            }
            continue;
          }
        } else if (argsLength > numArgs) {
          info(
            `Command ${cmd}: expected [0, ${numArgs}] args, ` +
              `but received ${argsLength} args.`
          );
        }

        // TODO figure out how to type-check vararg functions
        this.preprocessCommand(fn, args);

        operation.fn = fn;
        operation.args = args;
        return true;
      }
      if (obj === EOF) {
        return false; // no more commands
      }
      // argument
      if (obj !== null) {
        if (args === null) {
          args = [];
        }
        args.push(obj);
        if (args.length > 33) {
          throw new FormatError("Too many arguments");
        }
      }
    }
  }

  preprocessCommand(fn, args) {
    switch (fn | 0) {
      case OPS.save:
        this.stateManager.save();
        break;
      case OPS.restore:
        this.stateManager.restore();
        break;
      case OPS.transform:
        this.stateManager.transform(args);
        break;
    }
  }
}

export { EvaluatorPreprocessor, PartialEvaluator };
-												Change to the Apache v2 license.

											
										
										
											2012-09-01 07:48:21 +09:00
+								/* Copyright 2012 Mozilla Foundation
 								 *
 								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at
 								 *
 								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
 								 */
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								/* eslint-disable no-var */
-												'use strict' everywhere
											
										
										
											2011-10-26 10:18:22 +09:00
-												Convert the files in the `/src/core` folder to ES6 modules

Please note that the `glyphlist.js` and `unicode.js` files are converted to CommonJS modules instead, since Babel cannot handle files that large and they are thus excluded from transpilation.

											
										
										
											2017-04-02 23:14:30 +09:00
+								import {
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								  AbortException,
 								  assert,
 								  CMapCompressionType,
 								  createPromiseCapability,
 								  FONT_IDENTITY_MATRIX,
 								  FormatError,
 								  IDENTITY_MATRIX,
 								  info,
 								  isArrayEqual,
 								  OPS,
-												Add local caching of `Function`s, by reference, in the `PDFFunctionFactory` (issue 2541)

Note that compared other structures, such as e.g. Images and ColorSpaces, `Function`s are not referred to by name, which however does bring the advantage of being able to share the cache for an *entire* page.
Furthermore, similar to ColorSpaces, the parsing of individual `Function`s are generally fast enough to not really warrant trying to cache them in any "smarter" way than by reference. (Hence trying to do caching similar to e.g. Fonts would most likely be a losing proposition, given the amount of data lookup/parsing that'd be required.)

Originally I tried implementing this similar to e.g. the recently added ColorSpace caching (and in a couple of different ways), however it unfortunately turned out to be quite ugly/unwieldy given the sheer number of functions/methods where you'd thus need to pass in a `LocalFunctionCache` instance. (Also, the affected functions/methods didn't exactly have short signatures as-is.)
After going back and forth on this for a while it seemed to me that the simplest, or least "invasive" if you will, solution would be if each `PartialEvaluator` instance had its *own* `PDFFunctionFactory` instance (since the latter is already passed to all of the required code). This way each `PDFFunctionFactory` instances could have a local `Function` cache, without it being necessary to provide a `LocalFunctionCache` instance manually at every `PDFFunctionFactory.{create, createFromArray}` call-site.

Obviously, with this patch, there's now (potentially) more `PDFFunctionFactory` instances than before when the entire document shared just one. However, each such instance is really quite small and it's also tied to a `PartialEvaluator` instance and those are *not* kept alive and/or cached. To reduce the impact of these changes, I've tried to make as many of these structures as possible *lazily initialized*, specifically:

 - The `PDFFunctionFactory`, on `PartialEvaluator` instances, since not all kinds of general parsing actually requires it. For example: `getTextContent` calls won't cause any `Function` to be parsed, and even some `getOperatorList` calls won't trigger `Function` parsing (if a page contains e.g. no Patterns or "complex" ColorSpaces).

 - The `LocalFunctionCache`, on `PDFFunctionFactory` instances, since only certain parsing requires it. Generally speaking, only e.g. Patterns, "complex" ColorSpaces, and/or (some) SoftMasks will trigger any `Function` parsing.

To put these changes into perspective, when loading/rendering all (14) pages of the default `tracemonkey.pdf` file there's now a total of 6 `PDFFunctionFactory` and 1 `LocalFunctionCache` instances created thanks to the lazy initialization.
(If you instead would keep the document-"global" `PDFFunctionFactory` instance and pass around `LocalFunctionCache` instances everywhere, the numbers for the `tracemonkey.pdf` file would be instead be something like 1 `PDFFunctionFactory` and 6 `LocalFunctionCache` instances.)
All-in-all, I thus don't think that the `PDFFunctionFactory` changes should be generally problematic.

With these changes, we can also modify (some) call-sites to pass in a `Reference` rather than the actual `Function` data. This is nice since `Function`s can also be `Streams`, which are not cached on the `XRef` instance (given their potential size), and this way we can avoid unnecessary lookups and thus save some additional time/resources.

Obviously I had intended to include (standard) benchmark results with these changes, but for reasons I don't really understand the test run-time (even with `master`) of the document in issue 2541 is quite a bit slower than in the development viewer.
However, logging the time it takes for the relevant `PDFFunctionFactory`/`PDFFunction ` parsing shows that it takes *approximately* `0.5 ms` for the `Function` in question. Looking up a cached `Function`, on the other hand, is *one order of magnitude faster* which does add up when the same `Function` is invoked close to 2000 times.

											
										
										
											2020-06-28 20:12:24 +09:00
+								  shadow,
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								  stringToPDFString,
 								  TextRenderingMode,
 								  UNSUPPORTED_FEATURES,
 								  Util,
 								  warn,
-												Ensure that all `import` and `require` statements, in the entire code-base, have a `.js` file extension

In order to eventually get rid of SystemJS and start using native `import`s instead, we'll need to provide "complete" file identifiers since otherwise there'll be MIME type errors when attempting to use `import`.

											
										
										
											2020-01-02 20:00:16 +09:00
+								} from "../shared/util.js";
 								import { CMapFactory, IdentityCMap } from "./cmap.js";
-												Prefer `instanceof Dict` rather than calling `isDict()` with one argument

Unless you actually need to check that something is both a `Dict` and also of the *correct* type, using `instanceof Dict` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isDict` helper function for where it makes sense.

											
										
										
											2022-02-21 20:44:56 +09:00
+								import { Cmd, Dict, EOF, isName, Name, Ref, RefSet } from "./primitives.js";
-												 Move some constants and helper functions `from src/core/fonts.js` and into their own file

 - `FontFlags`, is used in both `src/core/fonts.js` and `src/core/evaluator.js`.
 - `getFontType`, same as the above.
 - `MacStandardGlyphOrdering`, is a fairly large data-structure and `src/core/fonts.js` is already a *very* large file.
 - `recoverGlyphName`, a dependency of `type1FontGlyphMapping`; please see below.
 - `SEAC_ANALYSIS_ENABLED`, is used by both `Type1Font`, `CFFFont`, and unit-tests; please see below.
 - `type1FontGlyphMapping`, is used by both `Type1Font` and `CFFFont` which a later patch will move to their own files.

											
										
										
											2021-05-02 23:11:01 +09:00
+								import { ErrorFont, Font } from "./fonts.js";
 								import { FontFlags, getFontType } from "./fonts_utils.js";
-												Convert the files in the `/src/core` folder to ES6 modules

Please note that the `glyphlist.js` and `unicode.js` files are converted to CommonJS modules instead, since Babel cannot handle files that large and they are thus excluded from transpilation.

											
										
										
											2017-04-02 23:14:30 +09:00
+								import {
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								  getEncoding,
 								  MacRomanEncoding,
 								  StandardEncoding,
 								  SymbolSetEncoding,
 								  WinAnsiEncoding,
 								  ZapfDingbatsEncoding,
-												Ensure that all `import` and `require` statements, in the entire code-base, have a `.js` file extension

In order to eventually get rid of SystemJS and start using native `import`s instead, we'll need to provide "complete" file identifiers since otherwise there'll be MIME type errors when attempting to use `import`.

											
										
										
											2020-01-02 20:00:16 +09:00
+								} from "./encodings.js";
-												Convert the files in the `/src/core` folder to ES6 modules

Please note that the `glyphlist.js` and `unicode.js` files are converted to CommonJS modules instead, since Babel cannot handle files that large and they are thus excluded from transpilation.

											
										
										
											2017-04-02 23:14:30 +09:00
+								import {
-												XFA - Add Liberation-Sans font as a substitution for some missing fonts
  - Some js files contain scale factors for each glyph in order to rescale Liberation to have a final font with the correct width.
  - A lot of XFA have some containers where their dimensions are based on their text content, so using default font from browser can lead to an almost unreadable pdf.

											
										
										
											2021-06-09 03:50:31 +09:00
+								  getFontNameToFileMap,
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								  getSerifFonts,
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								  getStandardFontName,
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								  getStdFontMap,
 								  getSymbolsFonts,
-												Ensure that all `import` and `require` statements, in the entire code-base, have a `.js` file extension

In order to eventually get rid of SystemJS and start using native `import`s instead, we'll need to provide "complete" file identifiers since otherwise there'll be MIME type errors when attempting to use `import`.

											
										
										
											2020-01-02 20:00:16 +09:00
+								} from "./standard_fonts.js";
-												XFA - Add Liberation-Sans font as a substitution for some missing fonts
  - Some js files contain scale factors for each glyph in order to rescale Liberation to have a final font with the correct width.
  - A lot of XFA have some containers where their dimensions are based on their text content, so using default font from browser can lead to an almost unreadable pdf.

											
										
										
											2021-06-09 03:50:31 +09:00
+								import {
 								  getNormalizedUnicodes,
 								  getUnicodeForGlyph,
 								  reverseIfRtl,
 								} from "./unicode.js";
-												Ensure that all `import` and `require` statements, in the entire code-base, have a `.js` file extension

In order to eventually get rid of SystemJS and start using native `import`s instead, we'll need to provide "complete" file identifiers since otherwise there'll be MIME type errors when attempting to use `import`.

											
										
										
											2020-01-02 20:00:16 +09:00
+								import { getTilingPatternIR, Pattern } from "./pattern.js";
-												XFA - Fix font scale factors (bug 1720888)
  - All the scale factors in for the substitution font were wrong because of different glyph positions between Liberation and the other ones:
    - regenerate all the factors
  - Text may have polish chars for example and in this case the glyph widths were wrong:
    - treat substitution font as a composite one
    - add a map glyphIndex to unicode for Liberation in order to generate width array for cid font

											
										
										
											2021-07-29 01:30:22 +09:00
+								import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
-												Move the `IdentityToUnicodeMap`/`ToUnicodeMap` from `src/core/fonts.js` and into its own file

											
										
										
											2021-05-02 19:04:34 +09:00
+								import { IdentityToUnicodeMap, ToUnicodeMap } from "./to_unicode_map.js";
-												Add local caching of `Function`s, by reference, in the `PDFFunctionFactory` (issue 2541)

Note that compared other structures, such as e.g. Images and ColorSpaces, `Function`s are not referred to by name, which however does bring the advantage of being able to share the cache for an *entire* page.
Furthermore, similar to ColorSpaces, the parsing of individual `Function`s are generally fast enough to not really warrant trying to cache them in any "smarter" way than by reference. (Hence trying to do caching similar to e.g. Fonts would most likely be a losing proposition, given the amount of data lookup/parsing that'd be required.)

Originally I tried implementing this similar to e.g. the recently added ColorSpace caching (and in a couple of different ways), however it unfortunately turned out to be quite ugly/unwieldy given the sheer number of functions/methods where you'd thus need to pass in a `LocalFunctionCache` instance. (Also, the affected functions/methods didn't exactly have short signatures as-is.)
After going back and forth on this for a while it seemed to me that the simplest, or least "invasive" if you will, solution would be if each `PartialEvaluator` instance had its *own* `PDFFunctionFactory` instance (since the latter is already passed to all of the required code). This way each `PDFFunctionFactory` instances could have a local `Function` cache, without it being necessary to provide a `LocalFunctionCache` instance manually at every `PDFFunctionFactory.{create, createFromArray}` call-site.

Obviously, with this patch, there's now (potentially) more `PDFFunctionFactory` instances than before when the entire document shared just one. However, each such instance is really quite small and it's also tied to a `PartialEvaluator` instance and those are *not* kept alive and/or cached. To reduce the impact of these changes, I've tried to make as many of these structures as possible *lazily initialized*, specifically:

 - The `PDFFunctionFactory`, on `PartialEvaluator` instances, since not all kinds of general parsing actually requires it. For example: `getTextContent` calls won't cause any `Function` to be parsed, and even some `getOperatorList` calls won't trigger `Function` parsing (if a page contains e.g. no Patterns or "complex" ColorSpaces).

 - The `LocalFunctionCache`, on `PDFFunctionFactory` instances, since only certain parsing requires it. Generally speaking, only e.g. Patterns, "complex" ColorSpaces, and/or (some) SoftMasks will trigger any `Function` parsing.

To put these changes into perspective, when loading/rendering all (14) pages of the default `tracemonkey.pdf` file there's now a total of 6 `PDFFunctionFactory` and 1 `LocalFunctionCache` instances created thanks to the lazy initialization.
(If you instead would keep the document-"global" `PDFFunctionFactory` instance and pass around `LocalFunctionCache` instances everywhere, the numbers for the `tracemonkey.pdf` file would be instead be something like 1 `PDFFunctionFactory` and 6 `LocalFunctionCache` instances.)
All-in-all, I thus don't think that the `PDFFunctionFactory` changes should be generally problematic.

With these changes, we can also modify (some) call-sites to pass in a `Reference` rather than the actual `Function` data. This is nice since `Function`s can also be `Streams`, which are not cached on the `XRef` instance (given their potential size), and this way we can avoid unnecessary lookups and thus save some additional time/resources.

Obviously I had intended to include (standard) benchmark results with these changes, but for reasons I don't really understand the test run-time (even with `master`) of the document in issue 2541 is quite a bit slower than in the development viewer.
However, logging the time it takes for the relevant `PDFFunctionFactory`/`PDFFunction ` parsing shows that it takes *approximately* `0.5 ms` for the `Function` in question. Looking up a cached `Function`, on the other hand, is *one order of magnitude faster* which does add up when the same `Function` is invoked close to 2000 times.

											
										
										
											2020-06-28 20:12:24 +09:00
+								import { isPDFFunction, PDFFunctionFactory } from "./function.js";
-												Ensure that all `import` and `require` statements, in the entire code-base, have a `.js` file extension

In order to eventually get rid of SystemJS and start using native `import`s instead, we'll need to provide "complete" file identifiers since otherwise there'll be MIME type errors when attempting to use `import`.

											
										
										
											2020-01-02 20:00:16 +09:00
+								import { Lexer, Parser } from "./parser.js";
-												Add local caching of "simple" Graphics State (ExtGState) data in `PartialEvaluator.getOperatorList` (issue 2813)

This patch will help pathological cases the most, with issue 2813 being a particularily problematic example. While there's only *four* `/ExtGState` resources, there's a total `29062` of `setGState` operators. Even though parsing of a single `/ExtGState` resource is quite fast, having to re-parse them thousands of times does add up quite significantly.

For simplicity we'll only cache "simple" `/ExtGState` resource, since e.g. the general `SMask` case cannot be easily cached (without re-factoring other code, which may have undesirable effects on general parsing).

By caching "simple" `/ExtGState` resource, we thus improve performance by:
 - Not having to fetch/validate/parse the same `/ExtGState` data over and over.
 - Handling of repeated `setGState` operators becomes *synchronous* during the `OperatorList` building, instead of having to defer to the event-loop/microtask-queue since the `/ExtGState` parsing is done asynchronously.

---

Obviously I had intended to include (standard) benchmark results with this patch, but for reasons I don't understand the test run-time (even with `master`) of the document in issue 2813 is *a lot* slower than in the development viewer (making normal benchmarking infeasible).
However, testing this manually in the development viewer (using `pdfBug=Stats`) shows a *reduction* of `~10 %` in the rendering time of the PDF document in issue 2813.

											
										
										
											2020-07-11 20:52:11 +09:00
+								import {
 								  LocalColorSpaceCache,
 								  LocalGStateCache,
 								  LocalImageCache,
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								  LocalTilingPatternCache,
-												Add local caching of "simple" Graphics State (ExtGState) data in `PartialEvaluator.getOperatorList` (issue 2813)

This patch will help pathological cases the most, with issue 2813 being a particularily problematic example. While there's only *four* `/ExtGState` resources, there's a total `29062` of `setGState` operators. Even though parsing of a single `/ExtGState` resource is quite fast, having to re-parse them thousands of times does add up quite significantly.

For simplicity we'll only cache "simple" `/ExtGState` resource, since e.g. the general `SMask` case cannot be easily cached (without re-factoring other code, which may have undesirable effects on general parsing).

By caching "simple" `/ExtGState` resource, we thus improve performance by:
 - Not having to fetch/validate/parse the same `/ExtGState` data over and over.
 - Handling of repeated `setGState` operators becomes *synchronous* during the `OperatorList` building, instead of having to defer to the event-loop/microtask-queue since the `/ExtGState` parsing is done asynchronously.

---

Obviously I had intended to include (standard) benchmark results with this patch, but for reasons I don't understand the test run-time (even with `master`) of the document in issue 2813 is *a lot* slower than in the development viewer (making normal benchmarking infeasible).
However, testing this manually in the development viewer (using `pdfBug=Stats`) shows a *reduction* of `~10 %` in the rendering time of the PDF document in issue 2813.

											
										
										
											2020-07-11 20:52:11 +09:00
+								} from "./image_utils.js";
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								import { NullStream, Stream } from "./stream.js";
-												Take the /CIDToGIDMap data into account when computing the hash, in `PartialEvaluator.preEvaluateFont`, for composite fonts (bug 1734802)

This is unfortunately *yet another* bug in the `preEvaluateFont`-implementation, and I've lost count of the number of times I've had to tweak this code over the years :-(
I really cannot help thinking that PR 4423 was way too simplistic, since it missed a bunch of cases that leads to broken font rendering in many PDF documents.

Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1734802

											
										
										
											2021-10-08 19:21:26 +09:00
+								import { BaseStream } from "./base_stream.js";
-												Ensure that all `import` and `require` statements, in the entire code-base, have a `.js` file extension

In order to eventually get rid of SystemJS and start using native `import`s instead, we'll need to provide "complete" file identifiers since otherwise there'll be MIME type errors when attempting to use `import`.

											
										
										
											2020-01-02 20:00:16 +09:00
+								import { bidi } from "./bidi.js";
 								import { ColorSpace } from "./colorspace.js";
-												Move the `DecodeStream` and `StreamsSequenceStream` from `src/core/stream.js` and into its own file

											
										
										
											2021-04-27 23:18:52 +09:00
+								import { DecodeStream } from "./decode_stream.js";
-												Ensure that all `import` and `require` statements, in the entire code-base, have a `.js` file extension

In order to eventually get rid of SystemJS and start using native `import`s instead, we'll need to provide "complete" file identifiers since otherwise there'll be MIME type errors when attempting to use `import`.

											
										
										
											2020-01-02 20:00:16 +09:00
+								import { getGlyphsUnicode } from "./glyphlist.js";
-												Remove unneeded `instanceof MissingDataException` checks

The following checks are all unneeded, and could easily cause confusion when reading the code. (All of them are my fault as well, since I've sometimes added those checks without really thinking about the surrounding code.)

 - In `PartialEvaluator.hasBlendModes` there cannot be any `MissingDataException`s thrown, given that the `Page.getOperatorList` method waits for all the necessary /Resources to load first. Furthermore, note also that if an error is thrown from `PartialEvaluator.hasBlendModes` then it'd completely break rendering of that page, since any errors thrown from `Page.getOperatorList` are simply sent to the main-thread.

 - In `PartialEvaluator.handleColorN` there cannot be any `MissingDataException`s thrown, given that again the `Page.getOperatorList` method waits for all the necessary /Resources to load before operatorList parsing starts.

 - In `XRef.readXRef` there cannot be any `MissingDataException`s thrown, given that we're *explicitly* requesting (and waiting for) the entire document in `pdfManagerReady` (in `src/core/worker.js`) before re-parsing of a corrupt document starts.

											
										
										
											2021-02-13 20:12:14 +09:00
+								import { getLookupTableFactory } from "./core_utils.js";
-												Ensure that all `import` and `require` statements, in the entire code-base, have a `.js` file extension

In order to eventually get rid of SystemJS and start using native `import`s instead, we'll need to provide "complete" file identifiers since otherwise there'll be MIME type errors when attempting to use `import`.

											
										
										
											2020-01-02 20:00:16 +09:00
+								import { getMetrics } from "./metrics.js";
 								import { MurmurHash3_64 } from "./murmurhash3.js";
 								import { OperatorList } from "./operator_list.js";
 								import { PDFImage } from "./image.js";
-												Adds UMD headers to core, display and shared files.

											
										
										
											2015-11-22 01:32:47 +09:00
-												Move some constants and helper functions out of the `PartialEvaluator` closure

This will simplify the `class` conversion in the next patch, and with modern JavaScript the moved code is still limited to the current module scope.

*Please note:* For improved consistency with our usual formatting, the `TILING_PATTERN`/`SHADING_PATTERN` constants where re-factored slightly.

											
										
										
											2020-07-05 19:06:56 +09:00
+								const DefaultPartialEvaluatorOptions = Object.freeze({
 								  maxImageSize: -1,
 								  disableFontFace: false,
 								  ignoreErrors: false,
 								  isEvalSupported: true,
 								  fontExtraProperties: false,
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								  useSystemFonts: true,
-												Fetch binary CMap data in the worker-thread, when `useWorkerFetch` is set

This patch uses the new option added in PR 12726 to *also* allow fetching binary CMap data directly in the worker-thread in browsers.
Given that these changes remove the need to transfer data between threads for the default (browser) use-case, we can also revert the changes in PR 11118 since that simplifies the overall implementation.

											
										
										
											2021-06-08 19:02:26 +09:00
+								  cMapUrl: null,
 								  standardFontDataUrl: null,
-												Move some constants and helper functions out of the `PartialEvaluator` closure

This will simplify the `class` conversion in the next patch, and with modern JavaScript the moved code is still limited to the current module scope.

*Please note:* For improved consistency with our usual formatting, the `TILING_PATTERN`/`SHADING_PATTERN` constants where re-factored slightly.

											
										
										
											2020-07-05 19:06:56 +09:00
+								});
 								const PatternType = {
 								  TILING: 1,
 								  SHADING: 2,
 								};
-												[api-minor] Reduce `postMessage` overhead, in `PartialEvaluator.getTextContent`, by sending text chunks in batches (issue 13962)

Following the STR in the issue, this patch reduces the number of `PartialEvaluator.getTextContent`-related `postMessage`-calls by approximately 78 percent.[1]
Note that by enforcing a relatively low value when batching text chunks, we should thus improve worst-case scenarios while not negatively affect all `textLayer` building.

While working on these changes I noticed, thanks to our unit-tests, that the implementation of the `appendEOL` function unfortunately means that the number and content of the textItems could actually be affected by the particular chunking used.
That seems *extremely* unfortunate, since in practice this means that the particular chunking used is thus observable through the API. Obviously that should be a completely internal implementation detail, which is why this patch also modifies `appendEOL` to mitigate that.[2]

Given that this patch adds a *minimum* batch size in `enqueueChunk`, there's obviously nothing preventing it from becoming a lot larger then the limit (depending e.g. on the PDF structure and the CPU load/speed).
While sending more text chunks at once isn't an issue in itself, it could become problematic at the main-thread during `textLayer` building. Note how both the `PartialEvaluator` and `CanvasGraphics` implementations utilize `Date.now()`-checks, to prevent long-running parsing/rendering from "hanging" the respective thread. In the `textLayer` building we don't utilize such a construction[3], and streaming of textContent is thus essentially acting as a *simple* stand-in for that functionality.
Hence why we want to avoid choosing a too large minimum batch size, since that could thus indirectly affect main-thread performance negatively.

---
[1] While it'd be possible to go even lower, that'd likely require more invasive re-factoring/changes to the `PartialEvaluator.getTextContent`-code to ensure that the batches don't become too large.

[2] This should also, as far as I can tell, explain some of the regressions observed in the "enhance" text-selection tests back in PR 13257.
    Looking closer at the `appendEOL` function it should potentially be changed even more, however that should probably not be done here.

[3] I'd really like to avoid implementing something like that for the `textLayer` building as well, given that it'd require adding a fair bit of complexity.

											
										
										
											2021-09-03 20:07:04 +09:00
+								// Optionally avoid sending individual, or very few, text chunks to reduce
 								// `postMessage` overhead with ReadableStream (see issue 13962).
 								//
 								// PLEASE NOTE: This value should *not* be too large (it's used as a lower limit
 								// in `enqueueChunk`), since that would cause streaming of textContent to become
 								// essentially useless in practice by sending all (or most) chunks at once.
 								// Also, a too large value would (indirectly) affect the main-thread `textLayer`
 								// building negatively by forcing all textContent to be handled at once, which
 								// could easily end up hurting *overall* performance (e.g. rendering as well).
 								const TEXT_CHUNK_BATCH_SIZE = 10;
-												Move some constants and helper functions out of the `PartialEvaluator` closure

This will simplify the `class` conversion in the next patch, and with modern JavaScript the moved code is still limited to the current module scope.

*Please note:* For improved consistency with our usual formatting, the `TILING_PATTERN`/`SHADING_PATTERN` constants where re-factored slightly.

											
										
										
											2020-07-05 19:06:56 +09:00
+								const deferred = Promise.resolve();
 								// Convert PDF blend mode names to HTML5 blend mode names.
 								function normalizeBlendMode(value, parsingArray = false) {
 								  if (Array.isArray(value)) {
 								    // Use the first *supported* BM value in the Array (fixes issue11279.pdf).
 								    for (let i = 0, ii = value.length; i < ii; i++) {
 								      const maybeBM = normalizeBlendMode(value[i], /* parsingArray = */ true);
 								      if (maybeBM) {
 								        return maybeBM;
 								      }
 								    }
 								    warn(`Unsupported blend mode Array: ${value}`);
 								    return "source-over";
 								  }
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								  if (!(value instanceof Name)) {
-												Move some constants and helper functions out of the `PartialEvaluator` closure

This will simplify the `class` conversion in the next patch, and with modern JavaScript the moved code is still limited to the current module scope.

*Please note:* For improved consistency with our usual formatting, the `TILING_PATTERN`/`SHADING_PATTERN` constants where re-factored slightly.

											
										
										
											2020-07-05 19:06:56 +09:00
+								    if (parsingArray) {
 								      return null;
 								    }
 								    return "source-over";
 								  }
 								  switch (value.name) {
 								    case "Normal":
 								    case "Compatible":
 								      return "source-over";
 								    case "Multiply":
 								      return "multiply";
 								    case "Screen":
 								      return "screen";
 								    case "Overlay":
 								      return "overlay";
 								    case "Darken":
 								      return "darken";
 								    case "Lighten":
 								      return "lighten";
 								    case "ColorDodge":
 								      return "color-dodge";
 								    case "ColorBurn":
 								      return "color-burn";
 								    case "HardLight":
 								      return "hard-light";
 								    case "SoftLight":
 								      return "soft-light";
 								    case "Difference":
 								      return "difference";
 								    case "Exclusion":
 								      return "exclusion";
 								    case "Hue":
 								      return "hue";
 								    case "Saturation":
 								      return "saturation";
 								    case "Color":
 								      return "color";
 								    case "Luminosity":
 								      return "luminosity";
 								  }
 								  if (parsingArray) {
 								    return null;
 								  }
 								  warn(`Unsupported blend mode: ${value.name}`);
 								  return "source-over";
 								}
 								// Trying to minimize Date.now() usage and check every 100 time.
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								class TimeSlotManager {
 								  static get TIME_SLOT_DURATION_MS() {
 								    return shadow(this, "TIME_SLOT_DURATION_MS", 20);
 								  }
 								  static get CHECK_TIME_EVERY() {
 								    return shadow(this, "CHECK_TIME_EVERY", 100);
 								  }
 								  constructor() {
 								    this.reset();
 								  }
 								  check() {
 								    if (++this.checked < TimeSlotManager.CHECK_TIME_EVERY) {
-												Move some constants and helper functions out of the `PartialEvaluator` closure

This will simplify the `class` conversion in the next patch, and with modern JavaScript the moved code is still limited to the current module scope.

*Please note:* For improved consistency with our usual formatting, the `TILING_PATTERN`/`SHADING_PATTERN` constants where re-factored slightly.

											
										
										
											2020-07-05 19:06:56 +09:00
+								      return false;
 								    }
 								    this.checked = 0;
 								    return this.endTime <= Date.now();
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  }
 								  reset() {
 								    this.endTime = Date.now() + TimeSlotManager.TIME_SLOT_DURATION_MS;
-												Move some constants and helper functions out of the `PartialEvaluator` closure

This will simplify the `class` conversion in the next patch, and with modern JavaScript the moved code is still limited to the current module scope.

*Please note:* For improved consistency with our usual formatting, the `TILING_PATTERN`/`SHADING_PATTERN` constants where re-factored slightly.

											
										
										
											2020-07-05 19:06:56 +09:00
+								    this.checked = 0;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  }
 								}
-												Removes global PDFJS usage from the src/core/.

											
										
										
											2016-03-03 09:48:21 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								class PartialEvaluator {
 								  constructor({
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								    xref,
 								    handler,
 								    pageIndex,
 								    idFactory,
 								    fontCache,
 								    builtInCMapCache,
-												Cache the "raw" standard font data in the worker-thread (PR 12726 follow-up)

*This implementation is basically a copy of the pre-existing `builtInCMapCache` implementation.*

For some, badly generated, PDF documents it's possible that we'll end up having to fetch the *same* standard font data over and over (which is obviously inefficient).
While not common, it's certainly possible that a PDF document uses *custom* font names where the actual font then references one of the standard fonts; see e.g. issue 11399 for one such example.

Note that I did suggest adding worker-thread caching of standard font data in PR 12726, however it wasn't deemed necessary at the time. Now that we have a real-world example that benefit from caching, I think that we should simply implement this now.

											
										
										
											2021-06-08 20:58:52 +09:00
+								    standardFontDataCache,
-												Attempt to cache repeated images at the document, rather than the page, level (issue 11878)

Currently image resources, as opposed to e.g. font resources, are handled exclusively on a page-specific basis. Generally speaking this makes sense, since pages are separate from each other, however there's PDF documents where many (or even all) pages actually references exactly the same image resources (through the XRef table). Hence, in some cases, we're decoding the *same* images over and over for every page which is obviously slow and wasting both CPU and memory resources better used elsewhere.[1]

Obviously we cannot simply treat all image resources as-if they're used throughout the entire PDF document, since that would end up increasing memory usage too much.[2]
However, by introducing a `GlobalImageCache` in the worker we can track image resources that appear on more than one page. Hence we can switch image resources from being page-specific to being document-specific, once the image resource has been seen on more than a certain number of pages.

In many cases, such as e.g. the referenced issue, this patch will thus lead to reduced memory usage for image resources. Scrolling through all pages of the document, there's now only a few main-thread copies of the same image data, as opposed to one for each rendered page (i.e. there could theoretically be *twenty* copies of the image data).
While this obviously benefit both CPU and memory usage in this case, for *very* large image data this patch *may* possibly increase persistent main-thread memory usage a tiny bit. Thus to avoid negatively affecting memory usage too much in general, particularly on the main-thread, the `GlobalImageCache` will *only* cache a certain number of image resources at the document level and simply fallback to the default behaviour.

Unfortunately the asynchronous nature of the code, with ranged/streamed loading of data, actually makes all of this much more complicated than if all data could be assumed to be immediately available.[3]

*Please note:* The patch will lead to *small* movement in some existing test-cases, since we're now using the built-in PDF.js JPEG decoder more. This was done in order to simplify the overall implementation, especially on the main-thread, by limiting it to only the `OPS.paintImageXObject` operator.

---
[1] There's e.g. PDF documents that use the same image as background on all pages.

[2] Given that data stored in the `commonObjs`, on the main-thread, are only cleared manually through `PDFDocumentProxy.cleanup`. This as opposed to data stored in the `objs` of each page, which is automatically removed when the page is cleaned-up e.g. by being evicted from the cache in the default viewer.

[3] If the latter case were true, we could simply check for repeat images *before* parsing started and thus avoid handling *any* duplicate image resources.

											
										
										
											2020-05-18 21:17:56 +09:00
+								    globalImageCache,
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								    options = null,
 								  }) {
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
+								    this.xref = xref;
 								    this.handler = handler;
-												Separate page objects/images from the fonts; does not store large images

											
										
										
											2012-10-29 05:10:34 +09:00
+								    this.pageIndex = pageIndex;
-												Replace direct lookup of `uniquePrefix`/`idCounters`, in `Page` instances, with an `idFactory` containing an `createObjId` method instead

We're currently making use of `uniquePrefix`/`idCounters` in multiple files, to create unique object id's, and adding a new occurrence of them requires some care to ensure that an object id isn't accidentally reused.
Furthermore, having to pass around multiple parameters as we currently do seem like something you want to avoid.

Instead, this patch adds a factory which means that there's only *one* thing that needs to be passed around. And since it's now only necessary to call a method in order to obtain a unique object id, the details are thus abstracted away at the call-sites which avoids accidental reuse of object id's.

To test that this works as expected a very simple `Page` unit-test is added, and the existing `Annotation layer` tests are also adjusted slightly.

											
										
										
											2017-01-09 00:51:30 +09:00
+								    this.idFactory = idFactory;
-												Cleaning up fonts when viewer is idle for some time

											
										
										
											2013-11-15 06:43:38 +09:00
+								    this.fontCache = fontCache;
-												Cache built-in binary CMap files in the worker (issue 4794)

											
										
										
											2017-02-14 22:28:31 +09:00
+								    this.builtInCMapCache = builtInCMapCache;
-												Cache the "raw" standard font data in the worker-thread (PR 12726 follow-up)

*This implementation is basically a copy of the pre-existing `builtInCMapCache` implementation.*

For some, badly generated, PDF documents it's possible that we'll end up having to fetch the *same* standard font data over and over (which is obviously inefficient).
While not common, it's certainly possible that a PDF document uses *custom* font names where the actual font then references one of the standard fonts; see e.g. issue 11399 for one such example.

Note that I did suggest adding worker-thread caching of standard font data in PR 12726, however it wasn't deemed necessary at the time. Now that we have a real-world example that benefit from caching, I think that we should simply implement this now.

											
										
										
											2021-06-08 20:58:52 +09:00
+								    this.standardFontDataCache = standardFontDataCache;
-												Attempt to cache repeated images at the document, rather than the page, level (issue 11878)

Currently image resources, as opposed to e.g. font resources, are handled exclusively on a page-specific basis. Generally speaking this makes sense, since pages are separate from each other, however there's PDF documents where many (or even all) pages actually references exactly the same image resources (through the XRef table). Hence, in some cases, we're decoding the *same* images over and over for every page which is obviously slow and wasting both CPU and memory resources better used elsewhere.[1]

Obviously we cannot simply treat all image resources as-if they're used throughout the entire PDF document, since that would end up increasing memory usage too much.[2]
However, by introducing a `GlobalImageCache` in the worker we can track image resources that appear on more than one page. Hence we can switch image resources from being page-specific to being document-specific, once the image resource has been seen on more than a certain number of pages.

In many cases, such as e.g. the referenced issue, this patch will thus lead to reduced memory usage for image resources. Scrolling through all pages of the document, there's now only a few main-thread copies of the same image data, as opposed to one for each rendered page (i.e. there could theoretically be *twenty* copies of the image data).
While this obviously benefit both CPU and memory usage in this case, for *very* large image data this patch *may* possibly increase persistent main-thread memory usage a tiny bit. Thus to avoid negatively affecting memory usage too much in general, particularly on the main-thread, the `GlobalImageCache` will *only* cache a certain number of image resources at the document level and simply fallback to the default behaviour.

Unfortunately the asynchronous nature of the code, with ranged/streamed loading of data, actually makes all of this much more complicated than if all data could be assumed to be immediately available.[3]

*Please note:* The patch will lead to *small* movement in some existing test-cases, since we're now using the built-in PDF.js JPEG decoder more. This was done in order to simplify the overall implementation, especially on the main-thread, by limiting it to only the `OPS.paintImageXObject` operator.

---
[1] There's e.g. PDF documents that use the same image as background on all pages.

[2] Given that data stored in the `commonObjs`, on the main-thread, are only cleared manually through `PDFDocumentProxy.cleanup`. This as opposed to data stored in the `objs` of each page, which is automatically removed when the page is cleaned-up e.g. by being evicted from the cache in the default viewer.

[3] If the latter case were true, we could simply check for repeat images *before* parsing started and thus avoid handling *any* duplicate image resources.

											
										
										
											2020-05-18 21:17:56 +09:00
+								    this.globalImageCache = globalImageCache;
-												Removes global PDFJS usage from the src/core/.

											
										
										
											2016-03-03 09:48:21 +09:00
+								    this.options = options || DefaultPartialEvaluatorOptions;
-												Support (rare) Type3 fonts which contains image resources (issue 10717)

The Type3 font type is not commonly used in PDF documents, as can be seen from telemetry data such as: https://telemetry.mozilla.org/new-pipeline/dist.html#!cumulative=0&end_date=2019-04-09&include_spill=0&keys=__none__!__none__!__none__&max_channel_version=nightly%252F68&measure=PDF_VIEWER_FONT_TYPES&min_channel_version=nightly%252F57&processType=*&product=Firefox&sanitize=1&sort_by_value=0&sort_keys=submissions&start_date=2019-03-18&table=0&trim=1&use_submission_date=0 (see also https://github.com/mozilla/pdf.js/wiki/Enumeration-Assignments-for-the-Telemetry-Histograms#pdf_viewer_font_types).

Type3 fonts containing image resources are *very* rare in practice, usually they only contain path rendering operators, but as the issue shows they unfortunately do exist.
Currently these Type3-related image resources are not handled in any special way, and given that fonts are document rather than page specific rendering breaks since the image resources are thus not available to the *entire* document.
Fortunately fixing this isn't too difficult, but it does require adding a couple of Type3-specific code-paths to the `PartialEvaluator`. In order to keep the implementation simple, particularily on the main-thread, these Type3 image resources are completely decoded on the worker-thread to avoid adding too many special cases. This should not cause any issues, only marginally less efficient code, but given how rare this kind of Type3 font is adding premature optimizations didn't seem at all warranted at this point.

											
										
										
											2019-04-11 19:26:15 +09:00
+								    this.parsingType3Font = false;
-												[api-minor] Refactor fetching of built-in CMaps to utilize a factory on the `display` side instead, to allow users of the API to provide a custom CMap loading factory (e.g. for use with Node.js)

Currently the built-in CMap files are loaded in `src/core/cmap.js` using `XMLHttpRequest` directly. For some environments that might be a problem, hence this patch refactors that to instead use a factory to load built-in CMaps on the main thread and message the data to the worker thread.

This is inspired by other recent work, e.g. the addition of the `CanvasFactory`, and to a large extent on the IRC discussion starting at http://logs.glob.uno/?c=mozilla%23pdfjs&s=12+Oct+2016&e=12+Oct+2016#c53010.

											
										
										
											2017-02-12 23:54:41 +09:00
-												Move the `fetchBuiltInCMap` method to the `PartialEvaluator.prototype`

Defining this *inline* in the "constructor" looks slightly weird (I really don't know why I wrote it like that originally), and it can simply be changed to a regular method instead.

											
										
										
											2020-06-25 00:23:41 +09:00
+								    this._fetchBuiltInCMapBound = this.fetchBuiltInCMap.bind(this);
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
+								  }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  /**
 								   * Since Functions are only cached (locally) by reference, we can share one
 								   * `PDFFunctionFactory` instance within this `PartialEvaluator` instance.
 								   */
 								  get _pdfFunctionFactory() {
 								    const pdfFunctionFactory = new PDFFunctionFactory({
 								      xref: this.xref,
 								      isEvalSupported: this.options.isEvalSupported,
 								    });
 								    return shadow(this, "_pdfFunctionFactory", pdfFunctionFactory);
 								  }
-												Ensure that `PartialEvaluator.hasBlendModes` handles Blend Modes in Arrays (PR 11281 follow-up)

I completely overlooked this in PR 11281, but you obviously need to make similar changes in `PartialEvaluator.hasBlendModes` since it will otherwise ignore valid Blend Modes.

											
										
										
											2019-10-28 19:28:13 +09:00
-												Ensure that the old/new `options` are correctly combined in `PartialEvaluator.clone`

											
										
										
											2021-05-31 19:13:20 +09:00
+								  clone(newOptions = null) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const newEvaluator = Object.create(this);
-												Ensure that the old/new `options` are correctly combined in `PartialEvaluator.clone`

											
										
										
											2021-05-31 19:13:20 +09:00
+								    newEvaluator.options = Object.assign(
 								      Object.create(null),
 								      this.options,
 								      newOptions
 								    );
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    return newEvaluator;
 								  }
-												Add global caching, for /Resources without blend modes, and use it to reduce repeated fetching/parsing in `PartialEvaluator.hasBlendModes`

The `PartialEvaluator.hasBlendModes` method is necessary to determine if there's any blend modes on a page, which unfortunately requires *synchronous* parsing of the /Resources of each page before its rendering can start (see the "StartRenderPage"-message).
In practice it's not uncommon for certain /Resources-entries to be found on more than one page (referenced via the XRef-table), which thus leads to unnecessary re-fetching/re-parsing of data in `PartialEvaluator.hasBlendModes`.

To improve performance, especially in pathological cases, we can cache /Resources-entries when it's absolutely clear that they do not contain *any* blend modes at all[1]. This way, subsequent `PartialEvaluator.hasBlendModes` calls can be made significantly more efficient.

This patch was tested using the PDF file from issue 6961, i.e. https://github.com/mozilla/pdf.js/files/121712/test.pdf:
```
[
    {  "id": "issue6961",
       "file": "../web/pdfs/issue6961.pdf",
       "md5": "a80e4357a8fda758d96c2c76f2980b03",
       "rounds": 100,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
```
-- Grouped By browser, page, stat --
browser | page | stat         | Count | Baseline(ms) | Current(ms) |  +/- |     %  | Result(P<.05)
------- | ---- | ------------ | ----- | ------------ | ----------- | ---- | ------ | -------------
firefox | 0    | Overall      |   100 |         1034 |         555 | -480 | -46.39 |        faster
firefox | 0    | Page Request |   100 |          489 |           7 | -482 | -98.67 |        faster
firefox | 0    | Rendering    |   100 |          545 |         548 |    2 |   0.45 |
firefox | 1    | Overall      |   100 |          912 |         428 | -484 | -53.06 |        faster
firefox | 1    | Page Request |   100 |          487 |           1 | -486 | -99.77 |        faster
firefox | 1    | Rendering    |   100 |          425 |         427 |    2 |   0.51 |
```

---
[1] In the case where blend modes *are* found, it becomes a lot more difficult to know if it's generally safe to skip /Resources-entries. Hence we don't cache anything in that case, however note that most document/pages do not utilize blend modes anyway.

											
										
										
											2020-11-05 21:35:33 +09:00
+								  hasBlendModes(resources, nonBlendModesSet) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (!(resources instanceof Dict)) {
 								      return false;
 								    }
-												Add global caching, for /Resources without blend modes, and use it to reduce repeated fetching/parsing in `PartialEvaluator.hasBlendModes`

The `PartialEvaluator.hasBlendModes` method is necessary to determine if there's any blend modes on a page, which unfortunately requires *synchronous* parsing of the /Resources of each page before its rendering can start (see the "StartRenderPage"-message).
In practice it's not uncommon for certain /Resources-entries to be found on more than one page (referenced via the XRef-table), which thus leads to unnecessary re-fetching/re-parsing of data in `PartialEvaluator.hasBlendModes`.

To improve performance, especially in pathological cases, we can cache /Resources-entries when it's absolutely clear that they do not contain *any* blend modes at all[1]. This way, subsequent `PartialEvaluator.hasBlendModes` calls can be made significantly more efficient.

This patch was tested using the PDF file from issue 6961, i.e. https://github.com/mozilla/pdf.js/files/121712/test.pdf:
```
[
    {  "id": "issue6961",
       "file": "../web/pdfs/issue6961.pdf",
       "md5": "a80e4357a8fda758d96c2c76f2980b03",
       "rounds": 100,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
```
-- Grouped By browser, page, stat --
browser | page | stat         | Count | Baseline(ms) | Current(ms) |  +/- |     %  | Result(P<.05)
------- | ---- | ------------ | ----- | ------------ | ----------- | ---- | ------ | -------------
firefox | 0    | Overall      |   100 |         1034 |         555 | -480 | -46.39 |        faster
firefox | 0    | Page Request |   100 |          489 |           7 | -482 | -98.67 |        faster
firefox | 0    | Rendering    |   100 |          545 |         548 |    2 |   0.45 |
firefox | 1    | Overall      |   100 |          912 |         428 | -484 | -53.06 |        faster
firefox | 1    | Page Request |   100 |          487 |           1 | -486 | -99.77 |        faster
firefox | 1    | Rendering    |   100 |          425 |         427 |    2 |   0.51 |
```

---
[1] In the case where blend modes *are* found, it becomes a lot more difficult to know if it's generally safe to skip /Resources-entries. Hence we don't cache anything in that case, however note that most document/pages do not utilize blend modes anyway.

											
										
										
											2020-11-05 21:35:33 +09:00
+								    if (resources.objId && nonBlendModesSet.has(resources.objId)) {
 								      return false;
 								    }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
-												Add global caching, for /Resources without blend modes, and use it to reduce repeated fetching/parsing in `PartialEvaluator.hasBlendModes`

The `PartialEvaluator.hasBlendModes` method is necessary to determine if there's any blend modes on a page, which unfortunately requires *synchronous* parsing of the /Resources of each page before its rendering can start (see the "StartRenderPage"-message).
In practice it's not uncommon for certain /Resources-entries to be found on more than one page (referenced via the XRef-table), which thus leads to unnecessary re-fetching/re-parsing of data in `PartialEvaluator.hasBlendModes`.

To improve performance, especially in pathological cases, we can cache /Resources-entries when it's absolutely clear that they do not contain *any* blend modes at all[1]. This way, subsequent `PartialEvaluator.hasBlendModes` calls can be made significantly more efficient.

This patch was tested using the PDF file from issue 6961, i.e. https://github.com/mozilla/pdf.js/files/121712/test.pdf:
```
[
    {  "id": "issue6961",
       "file": "../web/pdfs/issue6961.pdf",
       "md5": "a80e4357a8fda758d96c2c76f2980b03",
       "rounds": 100,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
```
-- Grouped By browser, page, stat --
browser | page | stat         | Count | Baseline(ms) | Current(ms) |  +/- |     %  | Result(P<.05)
------- | ---- | ------------ | ----- | ------------ | ----------- | ---- | ------ | -------------
firefox | 0    | Overall      |   100 |         1034 |         555 | -480 | -46.39 |        faster
firefox | 0    | Page Request |   100 |          489 |           7 | -482 | -98.67 |        faster
firefox | 0    | Rendering    |   100 |          545 |         548 |    2 |   0.45 |
firefox | 1    | Overall      |   100 |          912 |         428 | -484 | -53.06 |        faster
firefox | 1    | Page Request |   100 |          487 |           1 | -486 | -99.77 |        faster
firefox | 1    | Rendering    |   100 |          425 |         427 |    2 |   0.51 |
```

---
[1] In the case where blend modes *are* found, it becomes a lot more difficult to know if it's generally safe to skip /Resources-entries. Hence we don't cache anything in that case, however note that most document/pages do not utilize blend modes anyway.

											
										
										
											2020-11-05 21:35:33 +09:00
+								    const processed = new RefSet(nonBlendModesSet);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (resources.objId) {
-												Use a `RefSet`, rather than a plain Object, for tracking already processed nodes in `PartialEvaluator.hasBlendModes`

											
										
										
											2020-07-15 19:05:05 +09:00
+								      processed.put(resources.objId);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const nodes = [resources],
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      xref = this.xref;
 								    while (nodes.length) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const node = nodes.shift();
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      // First check the current resources for blend modes.
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const graphicStates = node.get("ExtGState");
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (graphicStates instanceof Dict) {
-												Add a `getRawValues` method, to `Dict` instances, to provide an easier way of getting all *raw* values

When the old `Dict.getAll()` method was removed, it was replaced with a `Dict.getKeys()` call and `Dict.get(...)` calls (in a loop).
While this pattern obviously makes a lot of sense in many cases, there's some instances where we actually want the *raw* `Dict` values (i.e. `Ref`s where applicable). In those cases, `Dict.getRaw(...)` calls are instead used within the loop. However, by introducing a new `Dict.getRawValues()` method we can reduce the number of (strictly unnecessary) function calls by simply getting the *raw* `Dict` values directly.

											
										
										
											2020-07-17 19:57:34 +09:00
+								        for (let graphicState of graphicStates.getRawValues()) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          if (graphicState instanceof Ref) {
-												Use a `RefSet`, rather than a plain Object, for tracking already processed nodes in `PartialEvaluator.hasBlendModes`

											
										
										
											2020-07-15 19:05:05 +09:00
+								            if (processed.has(graphicState)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              continue; // The ExtGState has already been processed.
-												Replace `getAll` with `getKeys` in `PartialEvaluator_hasBlendModes` to speed up loading of badly generated PDF files (issue 6961)

Some bad PDF generators, in particular "Scribus PDF", duplicates resources *a lot* at various levels of the PDF files. This can lead to `PartialEvaluator_hasBlendModes` taking an unreasonable amount of time to complete.
The reason is that the current code is using `Dict_getAll`, which recursively dereferences *all* indirect objects, which can be really slow. This patch instead uses `Dict_getKeys`, and then manually looks up only the necessary indirect objects.

I've added the PDF file as a `load` test. The most important thing here is probably to ensure that the file remains available in the repo, and the comment should help reduced the chance of regressions. (Note that locally, the `load` test times out without this patch, but we cannot really assume that that always happens.)

Fixes 6961.

											
										
										
											2016-02-10 01:09:17 +09:00
+								            }
-												Prevent lookup errors in `PartialEvaluator.hasBlendModes` from breaking all parsing/rendering of a page (issue 11678)

The PDF document in question is *corrupt*, since it contains an XObject with a truncated dictionary and where the stream contents start without a "stream" operator.

											
										
										
											2020-03-09 19:37:33 +09:00
+								            try {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              graphicState = xref.fetch(graphicState);
-												Prevent lookup errors in `PartialEvaluator.hasBlendModes` from breaking all parsing/rendering of a page (issue 11678)

The PDF document in question is *corrupt*, since it contains an XObject with a truncated dictionary and where the stream contents start without a "stream" operator.

											
										
										
											2020-03-09 19:37:33 +09:00
+								            } catch (ex) {
-												Handle lookup errors "silently" in `PartialEvaluator.hasBlendModes` (PR 11680 follow-up)

Given that this method is used during what's essentially a *pre*-parsing stage, before the actual OperatorList parsing occurs, on second thought it doesn't seem at all necessary to warn and trigger fallback in cases where there's lookup errors.

*Please note:* Any any errors will still be either suppressed or thrown, according to the `ignoreErrors` option, during the *actual* OperatorList parsing.

											
										
										
											2020-07-14 20:00:35 +09:00
+								              // Avoid parsing a corrupt ExtGState more than once.
-												Use a `RefSet`, rather than a plain Object, for tracking already processed nodes in `PartialEvaluator.hasBlendModes`

											
										
										
											2020-07-15 19:05:05 +09:00
+								              processed.put(graphicState);
-												Handle lookup errors "silently" in `PartialEvaluator.hasBlendModes` (PR 11680 follow-up)

Given that this method is used during what's essentially a *pre*-parsing stage, before the actual OperatorList parsing occurs, on second thought it doesn't seem at all necessary to warn and trigger fallback in cases where there's lookup errors.

*Please note:* Any any errors will still be either suppressed or thrown, according to the `ignoreErrors` option, during the *actual* OperatorList parsing.

											
										
										
											2020-07-14 20:00:35 +09:00
 								              info(`hasBlendModes - ignoring ExtGState: "${ex}".`);
 								              continue;
-												Prevent lookup errors in `PartialEvaluator.hasBlendModes` from breaking all parsing/rendering of a page (issue 11678)

The PDF document in question is *corrupt*, since it contains an XObject with a truncated dictionary and where the stream contents start without a "stream" operator.

											
										
										
											2020-03-09 19:37:33 +09:00
+								            }
-												Replace `getAll` with `getKeys` in `PartialEvaluator_hasBlendModes` to speed up loading of badly generated PDF files (issue 6961)

Some bad PDF generators, in particular "Scribus PDF", duplicates resources *a lot* at various levels of the PDF files. This can lead to `PartialEvaluator_hasBlendModes` taking an unreasonable amount of time to complete.
The reason is that the current code is using `Dict_getAll`, which recursively dereferences *all* indirect objects, which can be really slow. This patch instead uses `Dict_getKeys`, and then manually looks up only the necessary indirect objects.

I've added the PDF file as a `load` test. The most important thing here is probably to ensure that the file remains available in the repo, and the comment should help reduced the chance of regressions. (Note that locally, the `load` test times out without this patch, but we cannot really assume that that always happens.)

Fixes 6961.

											
										
										
											2016-02-10 01:09:17 +09:00
+								          }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          if (!(graphicState instanceof Dict)) {
-												Incrementally render by sending the operator list by chunks as they're ready.

											
										
										
											2013-08-01 03:17:36 +09:00
+								            continue;
 								          }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          if (graphicState.objId) {
-												Use a `RefSet`, rather than a plain Object, for tracking already processed nodes in `PartialEvaluator.hasBlendModes`

											
										
										
											2020-07-15 19:05:05 +09:00
+								            processed.put(graphicState.objId);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          }
 								          const bm = graphicState.get("BM");
 								          if (bm instanceof Name) {
 								            if (bm.name !== "Normal") {
 								              return true;
-												add object id to streams to prevent infinite loops.

fixes http://bugzil.la/1020858

											
										
										
											2014-06-10 18:29:25 +09:00
+								            }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            continue;
-												add object id to streams to prevent infinite loops.

fixes http://bugzil.la/1020858

											
										
										
											2014-06-10 18:29:25 +09:00
+								          }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          if (bm !== undefined && Array.isArray(bm)) {
-												Slightly simplify the code in `PartialEvaluator.hasBlendModes`, e.g. by using `for...of` loops

 - Replace the existing loops with `for...of` variants instead.

 - Make use of `continue`, to reduce indentation and to make the code (slightly) easier to follow, when checking `/Resources` entries.

											
										
										
											2020-07-15 18:51:45 +09:00
+								            for (const element of bm) {
 								              if (element instanceof Name && element.name !== "Normal") {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                return true;
 								              }
-												Doesn't traverse cyclic references in Dict.getAll; reduces empty-Dict garbage

											
										
										
											2014-03-26 23:07:38 +09:00
+								            }
-												Incrementally render by sending the operator list by chunks as they're ready.

											
										
										
											2013-08-01 03:17:36 +09:00
+								          }
 								        }
 								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      // Descend into the XObjects to look for more resources and blend modes.
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const xObjects = node.get("XObject");
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (!(xObjects instanceof Dict)) {
 								        continue;
-												Move the `fetchBuiltInCMap` method to the `PartialEvaluator.prototype`

Defining this *inline* in the "constructor" looks slightly weird (I really don't know why I wrote it like that originally), and it can simply be changed to a regular method instead.

											
										
										
											2020-06-25 00:23:41 +09:00
+								      }
-												Add a `getRawValues` method, to `Dict` instances, to provide an easier way of getting all *raw* values

When the old `Dict.getAll()` method was removed, it was replaced with a `Dict.getKeys()` call and `Dict.get(...)` calls (in a loop).
While this pattern obviously makes a lot of sense in many cases, there's some instances where we actually want the *raw* `Dict` values (i.e. `Ref`s where applicable). In those cases, `Dict.getRaw(...)` calls are instead used within the loop. However, by introducing a new `Dict.getRawValues()` method we can reduce the number of (strictly unnecessary) function calls by simply getting the *raw* `Dict` values directly.

											
										
										
											2020-07-17 19:57:34 +09:00
+								      for (let xObject of xObjects.getRawValues()) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        if (xObject instanceof Ref) {
-												Use a `RefSet`, rather than a plain Object, for tracking already processed nodes in `PartialEvaluator.hasBlendModes`

											
										
										
											2020-07-15 19:05:05 +09:00
+								          if (processed.has(xObject)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            // The XObject has already been processed, and by avoiding a
 								            // redundant `xref.fetch` we can *significantly* reduce the load
 								            // time for badly generated PDF files (fixes issue6961.pdf).
 								            continue;
 								          }
 								          try {
 								            xObject = xref.fetch(xObject);
 								          } catch (ex) {
-												Handle lookup errors "silently" in `PartialEvaluator.hasBlendModes` (PR 11680 follow-up)

Given that this method is used during what's essentially a *pre*-parsing stage, before the actual OperatorList parsing occurs, on second thought it doesn't seem at all necessary to warn and trigger fallback in cases where there's lookup errors.

*Please note:* Any any errors will still be either suppressed or thrown, according to the `ignoreErrors` option, during the *actual* OperatorList parsing.

											
										
										
											2020-07-14 20:00:35 +09:00
+								            // Avoid parsing a corrupt XObject more than once.
-												Use a `RefSet`, rather than a plain Object, for tracking already processed nodes in `PartialEvaluator.hasBlendModes`

											
										
										
											2020-07-15 19:05:05 +09:00
+								            processed.put(xObject);
-												Handle lookup errors "silently" in `PartialEvaluator.hasBlendModes` (PR 11680 follow-up)

Given that this method is used during what's essentially a *pre*-parsing stage, before the actual OperatorList parsing occurs, on second thought it doesn't seem at all necessary to warn and trigger fallback in cases where there's lookup errors.

*Please note:* Any any errors will still be either suppressed or thrown, according to the `ignoreErrors` option, during the *actual* OperatorList parsing.

											
										
										
											2020-07-14 20:00:35 +09:00
 								            info(`hasBlendModes - ignoring XObject: "${ex}".`);
 								            continue;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          }
 								        }
-												Remove the `isStream` helper function

At this point all the various Stream-classes extends an abstract base-class, hence this helper function is no longer necessary and only adds unnecessary indirection in the code.

											
										
										
											2022-02-17 21:45:42 +09:00
+								        if (!(xObject instanceof BaseStream)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          continue;
 								        }
 								        if (xObject.dict.objId) {
-												Use a `RefSet`, rather than a plain Object, for tracking already processed nodes in `PartialEvaluator.hasBlendModes`

											
										
										
											2020-07-15 19:05:05 +09:00
+								          processed.put(xObject.dict.objId);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        }
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        const xResources = xObject.dict.get("Resources");
-												Slightly simplify the code in `PartialEvaluator.hasBlendModes`, e.g. by using `for...of` loops

 - Replace the existing loops with `for...of` variants instead.

 - Make use of `continue`, to reduce indentation and to make the code (slightly) easier to follow, when checking `/Resources` entries.

											
										
										
											2020-07-15 18:51:45 +09:00
+								        if (!(xResources instanceof Dict)) {
 								          continue;
 								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        // Checking objId to detect an infinite loop.
-												Use a `RefSet`, rather than a plain Object, for tracking already processed nodes in `PartialEvaluator.hasBlendModes`

											
										
										
											2020-07-15 19:05:05 +09:00
+								        if (xResources.objId && processed.has(xResources.objId)) {
-												Slightly simplify the code in `PartialEvaluator.hasBlendModes`, e.g. by using `for...of` loops

 - Replace the existing loops with `for...of` variants instead.

 - Make use of `continue`, to reduce indentation and to make the code (slightly) easier to follow, when checking `/Resources` entries.

											
										
										
											2020-07-15 18:51:45 +09:00
+								          continue;
 								        }
 								        nodes.push(xResources);
 								        if (xResources.objId) {
-												Use a `RefSet`, rather than a plain Object, for tracking already processed nodes in `PartialEvaluator.hasBlendModes`

											
										
										
											2020-07-15 19:05:05 +09:00
+								          processed.put(xResources.objId);
-												Move the `fetchBuiltInCMap` method to the `PartialEvaluator.prototype`

Defining this *inline* in the "constructor" looks slightly weird (I really don't know why I wrote it like that originally), and it can simply be changed to a regular method instead.

											
										
										
											2020-06-25 00:23:41 +09:00
+								        }
 								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Add global caching, for /Resources without blend modes, and use it to reduce repeated fetching/parsing in `PartialEvaluator.hasBlendModes`

The `PartialEvaluator.hasBlendModes` method is necessary to determine if there's any blend modes on a page, which unfortunately requires *synchronous* parsing of the /Resources of each page before its rendering can start (see the "StartRenderPage"-message).
In practice it's not uncommon for certain /Resources-entries to be found on more than one page (referenced via the XRef-table), which thus leads to unnecessary re-fetching/re-parsing of data in `PartialEvaluator.hasBlendModes`.

To improve performance, especially in pathological cases, we can cache /Resources-entries when it's absolutely clear that they do not contain *any* blend modes at all[1]. This way, subsequent `PartialEvaluator.hasBlendModes` calls can be made significantly more efficient.

This patch was tested using the PDF file from issue 6961, i.e. https://github.com/mozilla/pdf.js/files/121712/test.pdf:
```
[
    {  "id": "issue6961",
       "file": "../web/pdfs/issue6961.pdf",
       "md5": "a80e4357a8fda758d96c2c76f2980b03",
       "rounds": 100,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
```
-- Grouped By browser, page, stat --
browser | page | stat         | Count | Baseline(ms) | Current(ms) |  +/- |     %  | Result(P<.05)
------- | ---- | ------------ | ----- | ------------ | ----------- | ---- | ------ | -------------
firefox | 0    | Overall      |   100 |         1034 |         555 | -480 | -46.39 |        faster
firefox | 0    | Page Request |   100 |          489 |           7 | -482 | -98.67 |        faster
firefox | 0    | Rendering    |   100 |          545 |         548 |    2 |   0.45 |
firefox | 1    | Overall      |   100 |          912 |         428 | -484 | -53.06 |        faster
firefox | 1    | Page Request |   100 |          487 |           1 | -486 | -99.77 |        faster
firefox | 1    | Rendering    |   100 |          425 |         427 |    2 |   0.51 |
```

---
[1] In the case where blend modes *are* found, it becomes a lot more difficult to know if it's generally safe to skip /Resources-entries. Hence we don't cache anything in that case, however note that most document/pages do not utilize blend modes anyway.

											
										
										
											2020-11-05 21:35:33 +09:00
 								    // When no blend modes exist, there's no need re-fetch/re-parse any of the
 								    // processed `Ref`s again for subsequent pages. This helps reduce redundant
 								    // `XRef.fetch` calls for some documents (e.g. issue6961.pdf).
-												Add general iteration support in the `RefSet` and `RefSetCache` classes

This patch removes the existing `forEach` methods, in favor of making the classes properly iterable instead. Given that the classes are using a `Set` respectively a `Map` internally, implementing this is very easy/efficient and allows us to simplify some existing code.

											
										
										
											2022-03-18 22:18:03 +09:00
+								    for (const ref of processed) {
-												Add global caching, for /Resources without blend modes, and use it to reduce repeated fetching/parsing in `PartialEvaluator.hasBlendModes`

The `PartialEvaluator.hasBlendModes` method is necessary to determine if there's any blend modes on a page, which unfortunately requires *synchronous* parsing of the /Resources of each page before its rendering can start (see the "StartRenderPage"-message).
In practice it's not uncommon for certain /Resources-entries to be found on more than one page (referenced via the XRef-table), which thus leads to unnecessary re-fetching/re-parsing of data in `PartialEvaluator.hasBlendModes`.

To improve performance, especially in pathological cases, we can cache /Resources-entries when it's absolutely clear that they do not contain *any* blend modes at all[1]. This way, subsequent `PartialEvaluator.hasBlendModes` calls can be made significantly more efficient.

This patch was tested using the PDF file from issue 6961, i.e. https://github.com/mozilla/pdf.js/files/121712/test.pdf:
```
[
    {  "id": "issue6961",
       "file": "../web/pdfs/issue6961.pdf",
       "md5": "a80e4357a8fda758d96c2c76f2980b03",
       "rounds": 100,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
```
-- Grouped By browser, page, stat --
browser | page | stat         | Count | Baseline(ms) | Current(ms) |  +/- |     %  | Result(P<.05)
------- | ---- | ------------ | ----- | ------------ | ----------- | ---- | ------ | -------------
firefox | 0    | Overall      |   100 |         1034 |         555 | -480 | -46.39 |        faster
firefox | 0    | Page Request |   100 |          489 |           7 | -482 | -98.67 |        faster
firefox | 0    | Rendering    |   100 |          545 |         548 |    2 |   0.45 |
firefox | 1    | Overall      |   100 |          912 |         428 | -484 | -53.06 |        faster
firefox | 1    | Page Request |   100 |          487 |           1 | -486 | -99.77 |        faster
firefox | 1    | Rendering    |   100 |          425 |         427 |    2 |   0.51 |
```

---
[1] In the case where blend modes *are* found, it becomes a lot more difficult to know if it's generally safe to skip /Resources-entries. Hence we don't cache anything in that case, however note that most document/pages do not utilize blend modes anyway.

											
										
										
											2020-11-05 21:35:33 +09:00
+								      nonBlendModesSet.put(ref);
-												Add general iteration support in the `RefSet` and `RefSetCache` classes

This patch removes the existing `forEach` methods, in favor of making the classes properly iterable instead. Given that the classes are using a `Set` respectively a `Map` internally, implementing this is very easy/efficient and allows us to simplify some existing code.

											
										
										
											2022-03-18 22:18:03 +09:00
+								    }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    return false;
 								  }
-												Move the `fetchBuiltInCMap` method to the `PartialEvaluator.prototype`

Defining this *inline* in the "constructor" looks slightly weird (I really don't know why I wrote it like that originally), and it can simply be changed to a regular method instead.

											
										
										
											2020-06-25 00:23:41 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  async fetchBuiltInCMap(name) {
 								    const cachedData = this.builtInCMapCache.get(name);
 								    if (cachedData) {
 								      return cachedData;
 								    }
-												Fetch binary CMap data in the worker-thread, when `useWorkerFetch` is set

This patch uses the new option added in PR 12726 to *also* allow fetching binary CMap data directly in the worker-thread in browsers.
Given that these changes remove the need to transfer data between threads for the default (browser) use-case, we can also revert the changes in PR 11118 since that simplifies the overall implementation.

											
										
										
											2021-06-08 19:02:26 +09:00
+								    let data;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
-												Fetch binary CMap data in the worker-thread, when `useWorkerFetch` is set

This patch uses the new option added in PR 12726 to *also* allow fetching binary CMap data directly in the worker-thread in browsers.
Given that these changes remove the need to transfer data between threads for the default (browser) use-case, we can also revert the changes in PR 11118 since that simplifies the overall implementation.

											
										
										
											2021-06-08 19:02:26 +09:00
+								    if (this.options.cMapUrl !== null) {
 								      // Only compressed CMaps are (currently) supported here.
 								      const url = `${this.options.cMapUrl}${name}.bcmap`;
 								      const response = await fetch(url);
 								      if (!response.ok) {
 								        throw new Error(
 								          `fetchBuiltInCMap: failed to fetch file "${url}" with "${response.statusText}".`
 								        );
-												Normalize the BBox of form XObjects on the /core side

											
										
										
											2018-10-16 22:23:14 +09:00
+								      }
-												Fetch binary CMap data in the worker-thread, when `useWorkerFetch` is set

This patch uses the new option added in PR 12726 to *also* allow fetching binary CMap data directly in the worker-thread in browsers.
Given that these changes remove the need to transfer data between threads for the default (browser) use-case, we can also revert the changes in PR 11118 since that simplifies the overall implementation.

											
										
										
											2021-06-08 19:02:26 +09:00
+								      data = {
 								        cMapData: new Uint8Array(await response.arrayBuffer()),
 								        compressionType: CMapCompressionType.BINARY,
 								      };
 								    } else {
 								      // Get the data on the main-thread instead.
 								      data = await this.handler.sendWithPromise("FetchBuiltInCMap", { name });
 								    }
-												Make getOperatorList() calls independent and merge queues at end

											
										
										
											2013-04-09 07:14:56 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (data.compressionType !== CMapCompressionType.NONE) {
 								      // Given the size of uncompressed CMaps, only cache compressed ones.
 								      this.builtInCMapCache.set(name, data);
 								    }
 								    return data;
 								  }
-												Add local caching of `ColorSpace`s, by name, in `PartialEvaluator.getOperatorList` (issue 2504)

By caching parsed `ColorSpace`s, we thus don't need to re-parse the same data over and over which saves CPU cycles *and* reduces peak memory usage. (Obviously persistent memory usage *may* increase a tiny bit, but since the caching is done per `PartialEvaluator.getOperatorList` invocation and given that `ColorSpace` instances generally hold very little data this shouldn't be much of an issue.)
Furthermore, by caching `ColorSpace`s we can also lookup the already parsed ones *synchronously* during the `OperatorList` building, instead of having to defer to the event loop/microtask queue since the parsing is done asynchronously (such that error handling is easier).

Possible future improvements:
 - Cache/lookup parsed `ColorSpaces` used in `Pattern`s and `Image`s.
 - Attempt to cache *local* `ColorSpace`s by reference as well, in addition to only by name, assuming that there's documents where that would be beneficial and that it's not too difficult to implement.
 - Assuming there's documents that would benefit from it, also cache repeated `ColorSpace`s *globally* as well.

Given that we've never, until now, been doing *any* caching of parsed `ColorSpace`s and that even using a simple name-only *local* cache helps tremendously in pathological cases, I purposely decided against complicating the implementation too much initially.
Also, compared to parsing of `Image`s, simply creating a `ColorSpace` instance isn't that expensive (hence I'd be somewhat surprised if adding a *global* cache would help much).

---

This patch was tested using:
 - The default `tracemonkey` PDF file, which was included mostly to show that "normal" documents aren't negatively affected by these changes.
 - The PDF file from issue 2504, i.e. https://dl-ctlg.panasonic.com/jp/manual/sd/sd_rbm1000_0.pdf, where most pages will switch *thousands* of times between a handful of `ColorSpace`s.

with the following manifest file:
```
[
    {  "id": "tracemonkey",
       "file": "pdfs/tracemonkey.pdf",
       "md5": "9a192d8b1a7dc652a19835f6f08098bd",
       "rounds": 100,
       "type": "eq"
    },
    {  "id": "issue2504",
       "file": "../web/pdfs/issue2504.pdf",
       "md5": "",
       "rounds": 20,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
 - Overall
```
-- Grouped By browser, pdf, stat --
browser | pdf         | stat         | Count | Baseline(ms) | Current(ms) |  +/- |     %  | Result(P<.05)
------- | ----------- | ------------ | ----- | ------------ | ----------- | ---- | ------ | -------------
firefox | issue2504   | Overall      |   640 |          977 |         497 | -479 | -49.08 |        faster
firefox | issue2504   | Page Request |   640 |            3 |           4 |    1 |  59.18 |
firefox | issue2504   | Rendering    |   640 |          974 |         493 | -481 | -49.37 |        faster
firefox | tracemonkey | Overall      |  1400 |          116 |         111 |   -5 |  -4.43 |
firefox | tracemonkey | Page Request |  1400 |            2 |           2 |    0 |  -2.86 |
firefox | tracemonkey | Rendering    |  1400 |          114 |         109 |   -5 |  -4.47 |
```

 - Page-specific
```
-- Grouped By browser, pdf, page, stat --
browser | pdf         | page | stat         | Count | Baseline(ms) | Current(ms) |   +/- |      %  | Result(P<.05)
------- | ----------- | ---- | ------------ | ----- | ------------ | ----------- | ----- | ------- | -------------
firefox | issue2504   | 0    | Overall      |    20 |         2295 |        1268 | -1027 |  -44.76 |        faster
firefox | issue2504   | 0    | Page Request |    20 |            6 |           7 |     1 |   15.32 |
firefox | issue2504   | 0    | Rendering    |    20 |         2288 |        1260 | -1028 |  -44.93 |        faster
firefox | issue2504   | 1    | Overall      |    20 |         3059 |        2806 |  -252 |   -8.25 |        faster
firefox | issue2504   | 1    | Page Request |    20 |           11 |          14 |     3 |   23.25 |        slower
firefox | issue2504   | 1    | Rendering    |    20 |         3047 |        2792 |  -255 |   -8.37 |        faster
firefox | issue2504   | 2    | Overall      |    20 |          411 |         295 |  -116 |  -28.20 |        faster
firefox | issue2504   | 2    | Page Request |    20 |            2 |          42 |    40 | 1897.62 |
firefox | issue2504   | 2    | Rendering    |    20 |          409 |         253 |  -156 |  -38.09 |        faster
firefox | issue2504   | 3    | Overall      |    20 |          736 |         299 |  -437 |  -59.34 |        faster
firefox | issue2504   | 3    | Page Request |    20 |            2 |           2 |     0 |    0.00 |
firefox | issue2504   | 3    | Rendering    |    20 |          734 |         297 |  -437 |  -59.49 |        faster
firefox | issue2504   | 4    | Overall      |    20 |          356 |         458 |   102 |   28.63 |
firefox | issue2504   | 4    | Page Request |    20 |            1 |           2 |     1 |   57.14 |        slower
firefox | issue2504   | 4    | Rendering    |    20 |          354 |         455 |   101 |   28.53 |
firefox | issue2504   | 5    | Overall      |    20 |         1381 |         765 |  -616 |  -44.59 |        faster
firefox | issue2504   | 5    | Page Request |    20 |            3 |           5 |     2 |   50.00 |        slower
firefox | issue2504   | 5    | Rendering    |    20 |         1378 |         760 |  -617 |  -44.81 |        faster
firefox | issue2504   | 6    | Overall      |    20 |          757 |         299 |  -459 |  -60.57 |        faster
firefox | issue2504   | 6    | Page Request |    20 |            2 |           5 |     3 |  150.00 |        slower
firefox | issue2504   | 6    | Rendering    |    20 |          755 |         294 |  -462 |  -61.11 |        faster
firefox | issue2504   | 7    | Overall      |    20 |          394 |         302 |   -92 |  -23.39 |        faster
firefox | issue2504   | 7    | Page Request |    20 |            2 |           1 |    -1 |  -34.88 |        faster
firefox | issue2504   | 7    | Rendering    |    20 |          392 |         301 |   -91 |  -23.32 |        faster
firefox | issue2504   | 8    | Overall      |    20 |         2875 |         979 | -1896 |  -65.95 |        faster
firefox | issue2504   | 8    | Page Request |    20 |            1 |           2 |     0 |   11.11 |
firefox | issue2504   | 8    | Rendering    |    20 |         2874 |         978 | -1896 |  -65.99 |        faster
firefox | issue2504   | 9    | Overall      |    20 |          700 |         332 |  -368 |  -52.60 |        faster
firefox | issue2504   | 9    | Page Request |    20 |            3 |           2 |     0 |   -4.00 |
firefox | issue2504   | 9    | Rendering    |    20 |          698 |         329 |  -368 |  -52.78 |        faster
firefox | issue2504   | 10   | Overall      |    20 |         3296 |         926 | -2370 |  -71.91 |        faster
firefox | issue2504   | 10   | Page Request |    20 |            2 |           2 |     0 |  -18.75 |
firefox | issue2504   | 10   | Rendering    |    20 |         3293 |         924 | -2370 |  -71.96 |        faster
firefox | issue2504   | 11   | Overall      |    20 |          524 |         197 |  -327 |  -62.34 |        faster
firefox | issue2504   | 11   | Page Request |    20 |            2 |           3 |     1 |   58.54 |
firefox | issue2504   | 11   | Rendering    |    20 |          522 |         194 |  -328 |  -62.81 |        faster
firefox | issue2504   | 12   | Overall      |    20 |          752 |         369 |  -384 |  -50.98 |        faster
firefox | issue2504   | 12   | Page Request |    20 |            3 |           2 |    -1 |  -36.51 |        faster
firefox | issue2504   | 12   | Rendering    |    20 |          749 |         367 |  -382 |  -51.05 |        faster
firefox | issue2504   | 13   | Overall      |    20 |          679 |         487 |  -193 |  -28.38 |        faster
firefox | issue2504   | 13   | Page Request |    20 |            4 |           2 |    -2 |  -48.68 |        faster
firefox | issue2504   | 13   | Rendering    |    20 |          676 |         485 |  -191 |  -28.28 |        faster
firefox | issue2504   | 14   | Overall      |    20 |          474 |         283 |  -191 |  -40.26 |        faster
firefox | issue2504   | 14   | Page Request |    20 |            2 |           4 |     2 |   78.57 |
firefox | issue2504   | 14   | Rendering    |    20 |          471 |         279 |  -192 |  -40.79 |        faster
firefox | issue2504   | 15   | Overall      |    20 |          860 |         618 |  -241 |  -28.05 |        faster
firefox | issue2504   | 15   | Page Request |    20 |            2 |           3 |     0 |   10.87 |
firefox | issue2504   | 15   | Rendering    |    20 |          857 |         616 |  -241 |  -28.15 |        faster
firefox | issue2504   | 16   | Overall      |    20 |          389 |         243 |  -147 |  -37.71 |        faster
firefox | issue2504   | 16   | Page Request |    20 |            2 |           2 |     0 |    2.33 |
firefox | issue2504   | 16   | Rendering    |    20 |          387 |         240 |  -147 |  -37.94 |        faster
firefox | issue2504   | 17   | Overall      |    20 |         1484 |         672 |  -812 |  -54.70 |        faster
firefox | issue2504   | 17   | Page Request |    20 |            2 |           3 |     1 |   37.21 |
firefox | issue2504   | 17   | Rendering    |    20 |         1482 |         669 |  -812 |  -54.84 |        faster
firefox | issue2504   | 18   | Overall      |    20 |          575 |         252 |  -323 |  -56.12 |        faster
firefox | issue2504   | 18   | Page Request |    20 |            2 |           2 |     0 |  -16.22 |
firefox | issue2504   | 18   | Rendering    |    20 |          573 |         251 |  -322 |  -56.24 |        faster
firefox | issue2504   | 19   | Overall      |    20 |          517 |         227 |  -290 |  -56.08 |        faster
firefox | issue2504   | 19   | Page Request |    20 |            2 |           2 |     0 |   21.62 |
firefox | issue2504   | 19   | Rendering    |    20 |          515 |         225 |  -290 |  -56.37 |        faster
firefox | issue2504   | 20   | Overall      |    20 |          668 |         670 |     2 |    0.31 |
firefox | issue2504   | 20   | Page Request |    20 |            4 |           2 |    -1 |  -34.29 |
firefox | issue2504   | 20   | Rendering    |    20 |          664 |         667 |     3 |    0.49 |
firefox | issue2504   | 21   | Overall      |    20 |          486 |         309 |  -177 |  -36.44 |        faster
firefox | issue2504   | 21   | Page Request |    20 |            2 |           2 |     0 |   16.13 |
firefox | issue2504   | 21   | Rendering    |    20 |          484 |         307 |  -177 |  -36.60 |        faster
firefox | issue2504   | 22   | Overall      |    20 |          543 |         267 |  -276 |  -50.85 |        faster
firefox | issue2504   | 22   | Page Request |    20 |            2 |           2 |     0 |   10.26 |
firefox | issue2504   | 22   | Rendering    |    20 |          541 |         265 |  -276 |  -51.07 |        faster
firefox | issue2504   | 23   | Overall      |    20 |         3246 |         871 | -2375 |  -73.17 |        faster
firefox | issue2504   | 23   | Page Request |    20 |            2 |           3 |     1 |   37.21 |
firefox | issue2504   | 23   | Rendering    |    20 |         3243 |         868 | -2376 |  -73.25 |        faster
firefox | issue2504   | 24   | Overall      |    20 |          379 |         156 |  -223 |  -58.83 |        faster
firefox | issue2504   | 24   | Page Request |    20 |            2 |           2 |     0 |   -2.86 |
firefox | issue2504   | 24   | Rendering    |    20 |          378 |         154 |  -223 |  -59.10 |        faster
firefox | issue2504   | 25   | Overall      |    20 |          176 |         127 |   -50 |  -28.19 |        faster
firefox | issue2504   | 25   | Page Request |    20 |            2 |           1 |     0 |  -15.63 |
firefox | issue2504   | 25   | Rendering    |    20 |          175 |         125 |   -49 |  -28.31 |        faster
firefox | issue2504   | 26   | Overall      |    20 |          181 |         108 |   -74 |  -40.67 |        faster
firefox | issue2504   | 26   | Page Request |    20 |            3 |           2 |    -1 |  -39.13 |        faster
firefox | issue2504   | 26   | Rendering    |    20 |          178 |         105 |   -72 |  -40.69 |        faster
firefox | issue2504   | 27   | Overall      |    20 |          208 |         104 |  -104 |  -49.92 |        faster
firefox | issue2504   | 27   | Page Request |    20 |            2 |           2 |     1 |   48.39 |
firefox | issue2504   | 27   | Rendering    |    20 |          206 |         102 |  -104 |  -50.64 |        faster
firefox | issue2504   | 28   | Overall      |    20 |          241 |         111 |  -131 |  -54.16 |        faster
firefox | issue2504   | 28   | Page Request |    20 |            2 |           2 |    -1 |  -33.33 |
firefox | issue2504   | 28   | Rendering    |    20 |          239 |         109 |  -130 |  -54.39 |        faster
firefox | issue2504   | 29   | Overall      |    20 |          321 |         196 |  -125 |  -39.05 |        faster
firefox | issue2504   | 29   | Page Request |    20 |            1 |           2 |     0 |   17.86 |
firefox | issue2504   | 29   | Rendering    |    20 |          319 |         194 |  -126 |  -39.35 |        faster
firefox | issue2504   | 30   | Overall      |    20 |          651 |         271 |  -380 |  -58.41 |        faster
firefox | issue2504   | 30   | Page Request |    20 |            1 |           2 |     1 |   50.00 |
firefox | issue2504   | 30   | Rendering    |    20 |          649 |         269 |  -381 |  -58.60 |        faster
firefox | issue2504   | 31   | Overall      |    20 |         1635 |         647 |  -988 |  -60.42 |        faster
firefox | issue2504   | 31   | Page Request |    20 |            1 |           2 |     0 |   30.43 |
firefox | issue2504   | 31   | Rendering    |    20 |         1634 |         645 |  -988 |  -60.49 |        faster
firefox | tracemonkey | 0    | Overall      |   100 |           51 |          51 |     0 |    0.02 |
firefox | tracemonkey | 0    | Page Request |   100 |            1 |           1 |     0 |   -4.76 |
firefox | tracemonkey | 0    | Rendering    |   100 |           50 |          50 |     0 |    0.12 |
firefox | tracemonkey | 1    | Overall      |   100 |           97 |          91 |    -5 |   -5.52 |        faster
firefox | tracemonkey | 1    | Page Request |   100 |            3 |           3 |     0 |   -1.32 |
firefox | tracemonkey | 1    | Rendering    |   100 |           94 |          88 |    -5 |   -5.73 |        faster
firefox | tracemonkey | 2    | Overall      |   100 |           40 |          40 |     0 |    0.50 |
firefox | tracemonkey | 2    | Page Request |   100 |            1 |           1 |     0 |    3.16 |
firefox | tracemonkey | 2    | Rendering    |   100 |           39 |          39 |     0 |    0.54 |
firefox | tracemonkey | 3    | Overall      |   100 |           62 |          62 |    -1 |   -0.94 |
firefox | tracemonkey | 3    | Page Request |   100 |            1 |           1 |     0 |   17.05 |
firefox | tracemonkey | 3    | Rendering    |   100 |           61 |          61 |    -1 |   -1.11 |
firefox | tracemonkey | 4    | Overall      |   100 |           56 |          58 |     2 |    3.41 |
firefox | tracemonkey | 4    | Page Request |   100 |            1 |           1 |     0 |   15.31 |
firefox | tracemonkey | 4    | Rendering    |   100 |           55 |          57 |     2 |    3.23 |
firefox | tracemonkey | 5    | Overall      |   100 |           73 |          71 |    -2 |   -2.28 |
firefox | tracemonkey | 5    | Page Request |   100 |            2 |           2 |     0 |   12.20 |
firefox | tracemonkey | 5    | Rendering    |   100 |           71 |          69 |    -2 |   -2.69 |
firefox | tracemonkey | 6    | Overall      |   100 |           85 |          69 |   -16 |  -18.73 |        faster
firefox | tracemonkey | 6    | Page Request |   100 |            2 |           2 |     0 |   -9.90 |
firefox | tracemonkey | 6    | Rendering    |   100 |           83 |          67 |   -16 |  -18.97 |        faster
firefox | tracemonkey | 7    | Overall      |   100 |           65 |          64 |     0 |   -0.37 |
firefox | tracemonkey | 7    | Page Request |   100 |            1 |           1 |     0 |  -11.94 |
firefox | tracemonkey | 7    | Rendering    |   100 |           63 |          63 |     0 |   -0.05 |
firefox | tracemonkey | 8    | Overall      |   100 |           53 |          54 |     1 |    2.04 |
firefox | tracemonkey | 8    | Page Request |   100 |            1 |           1 |     0 |   17.02 |
firefox | tracemonkey | 8    | Rendering    |   100 |           52 |          53 |     1 |    1.82 |
firefox | tracemonkey | 9    | Overall      |   100 |           79 |          73 |    -6 |   -7.86 |        faster
firefox | tracemonkey | 9    | Page Request |   100 |            2 |           2 |     0 |  -15.14 |
firefox | tracemonkey | 9    | Rendering    |   100 |           77 |          71 |    -6 |   -7.86 |        faster
firefox | tracemonkey | 10   | Overall      |   100 |          545 |         519 |   -27 |   -4.86 |        faster
firefox | tracemonkey | 10   | Page Request |   100 |           14 |          13 |     0 |   -3.56 |
firefox | tracemonkey | 10   | Rendering    |   100 |          532 |         506 |   -26 |   -4.90 |        faster
firefox | tracemonkey | 11   | Overall      |   100 |           42 |          41 |    -1 |   -2.50 |
firefox | tracemonkey | 11   | Page Request |   100 |            1 |           1 |     0 |  -27.42 |        faster
firefox | tracemonkey | 11   | Rendering    |   100 |           41 |          40 |    -1 |   -1.75 |
firefox | tracemonkey | 12   | Overall      |   100 |          350 |         332 |   -18 |   -5.16 |        faster
firefox | tracemonkey | 12   | Page Request |   100 |            3 |           3 |     0 |   -5.17 |
firefox | tracemonkey | 12   | Rendering    |   100 |          347 |         329 |   -18 |   -5.15 |        faster
firefox | tracemonkey | 13   | Overall      |   100 |           31 |          31 |     0 |    0.52 |
firefox | tracemonkey | 13   | Page Request |   100 |            1 |           1 |     0 |    4.95 |
firefox | tracemonkey | 13   | Rendering    |   100 |           30 |          30 |     0 |    0.20 |
```

											
										
										
											2020-06-13 21:12:40 +09:00
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								  async fetchStandardFontData(name) {
-												Cache the "raw" standard font data in the worker-thread (PR 12726 follow-up)

*This implementation is basically a copy of the pre-existing `builtInCMapCache` implementation.*

For some, badly generated, PDF documents it's possible that we'll end up having to fetch the *same* standard font data over and over (which is obviously inefficient).
While not common, it's certainly possible that a PDF document uses *custom* font names where the actual font then references one of the standard fonts; see e.g. issue 11399 for one such example.

Note that I did suggest adding worker-thread caching of standard font data in PR 12726, however it wasn't deemed necessary at the time. Now that we have a real-world example that benefit from caching, I think that we should simply implement this now.

											
										
										
											2021-06-08 20:58:52 +09:00
+								    const cachedData = this.standardFontDataCache.get(name);
 								    if (cachedData) {
 								      return new Stream(cachedData);
 								    }
-												Set the default value of `useSystemFonts` correctly, depending on `disableFontFace`, in the API (PR 13516 follow-up)

*Sorry about the churn here, since the change that I made in PR 13516 was not very smart.*

With the current code, it's now *impossible* for a user to actually control the `useSystemFonts` option manually. To prevent outright breakage we obviously still need to default to setting `useSystemFonts = false` when `disableFontFace === true`, however that should be possible for an API consumer to override.

											
										
										
											2021-06-19 20:34:19 +09:00
+								    // The symbol fonts are not consistent across platforms, always load the
 								    // standard font data for them.
 								    if (
 								      this.options.useSystemFonts &&
 								      name !== "Symbol" &&
 								      name !== "ZapfDingbats"
 								    ) {
 								      return null;
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								    }
-												Cache the "raw" standard font data in the worker-thread (PR 12726 follow-up)

*This implementation is basically a copy of the pre-existing `builtInCMapCache` implementation.*

For some, badly generated, PDF documents it's possible that we'll end up having to fetch the *same* standard font data over and over (which is obviously inefficient).
While not common, it's certainly possible that a PDF document uses *custom* font names where the actual font then references one of the standard fonts; see e.g. issue 11399 for one such example.

Note that I did suggest adding worker-thread caching of standard font data in PR 12726, however it wasn't deemed necessary at the time. Now that we have a real-world example that benefit from caching, I think that we should simply implement this now.

											
										
										
											2021-06-08 20:58:52 +09:00
 								    const standardFontNameToFileName = getFontNameToFileMap(),
 								      filename = standardFontNameToFileName[name];
 								    let data;
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								    if (this.options.standardFontDataUrl !== null) {
-												XFA - Add Liberation-Sans font as a substitution for some missing fonts
  - Some js files contain scale factors for each glyph in order to rescale Liberation to have a final font with the correct width.
  - A lot of XFA have some containers where their dimensions are based on their text content, so using default font from browser can lead to an almost unreadable pdf.

											
										
										
											2021-06-09 03:50:31 +09:00
+								      const url = `${this.options.standardFontDataUrl}${filename}`;
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								      const response = await fetch(url);
 								      if (!response.ok) {
 								        warn(
-												Cache the "raw" standard font data in the worker-thread (PR 12726 follow-up)

*This implementation is basically a copy of the pre-existing `builtInCMapCache` implementation.*

For some, badly generated, PDF documents it's possible that we'll end up having to fetch the *same* standard font data over and over (which is obviously inefficient).
While not common, it's certainly possible that a PDF document uses *custom* font names where the actual font then references one of the standard fonts; see e.g. issue 11399 for one such example.

Note that I did suggest adding worker-thread caching of standard font data in PR 12726, however it wasn't deemed necessary at the time. Now that we have a real-world example that benefit from caching, I think that we should simply implement this now.

											
										
										
											2021-06-08 20:58:52 +09:00
+								          `fetchStandardFontData: failed to fetch file "${url}" with "${response.statusText}".`
 								        );
 								      } else {
 								        data = await response.arrayBuffer();
 								      }
 								    } else {
 								      // Get the data on the main-thread instead.
 								      try {
 								        data = await this.handler.sendWithPromise("FetchStandardFontData", {
 								          filename,
 								        });
 								      } catch (e) {
 								        warn(
 								          `fetchStandardFontData: failed to fetch file "${filename}" with "${e}".`
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								        );
 								      }
 								    }
-												Cache the "raw" standard font data in the worker-thread (PR 12726 follow-up)

*This implementation is basically a copy of the pre-existing `builtInCMapCache` implementation.*

For some, badly generated, PDF documents it's possible that we'll end up having to fetch the *same* standard font data over and over (which is obviously inefficient).
While not common, it's certainly possible that a PDF document uses *custom* font names where the actual font then references one of the standard fonts; see e.g. issue 11399 for one such example.

Note that I did suggest adding worker-thread caching of standard font data in PR 12726, however it wasn't deemed necessary at the time. Now that we have a real-world example that benefit from caching, I think that we should simply implement this now.

											
										
										
											2021-06-08 20:58:52 +09:00
 								    if (!data) {
 								      return null;
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								    }
-												Cache the "raw" standard font data in the worker-thread (PR 12726 follow-up)

*This implementation is basically a copy of the pre-existing `builtInCMapCache` implementation.*

For some, badly generated, PDF documents it's possible that we'll end up having to fetch the *same* standard font data over and over (which is obviously inefficient).
While not common, it's certainly possible that a PDF document uses *custom* font names where the actual font then references one of the standard fonts; see e.g. issue 11399 for one such example.

Note that I did suggest adding worker-thread caching of standard font data in PR 12726, however it wasn't deemed necessary at the time. Now that we have a real-world example that benefit from caching, I think that we should simply implement this now.

											
										
										
											2021-06-08 20:58:52 +09:00
+								    // Cache the "raw" standard font data, to avoid fetching it repeateadly
 								    // (see e.g. issue 11399).
 								    this.standardFontDataCache.set(name, data);
 								    return new Stream(data);
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								  }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  async buildFormXObject(
 								    resources,
 								    xobj,
 								    smask,
 								    operatorList,
 								    task,
 								    initialState,
 								    localColorSpaceCache
 								  ) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const dict = xobj.dict;
 								    const matrix = dict.getArray("Matrix");
 								    let bbox = dict.getArray("BBox");
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (Array.isArray(bbox) && bbox.length === 4) {
 								      bbox = Util.normalizeRect(bbox);
 								    } else {
 								      bbox = null;
 								    }
-												Ensure that beginMarkedContentProps/endMarkedContent-operators, for /XObjects, are balanced in corrupt documents (PR 13854 follow-up)

Something that I *just* realized is that while PR 13854 fixed an issue as reported, it could still cause bugs in other similarily broken documents since we'll not insert a matching endMarkedContent-operator in the operatorList.

											
										
										
											2021-08-27 00:05:30 +09:00
 								    let optionalContent, groupOptions;
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								    if (dict.has("OC")) {
 								      optionalContent = await this.parseMarkedContentProps(
 								        dict.get("OC"),
 								        resources
 								      );
-												Ensure that beginMarkedContentProps/endMarkedContent-operators, for /XObjects, are balanced in corrupt documents (PR 13854 follow-up)

Something that I *just* realized is that while PR 13854 fixed an issue as reported, it could still cause bugs in other similarily broken documents since we'll not insert a matching endMarkedContent-operator in the operatorList.

											
										
										
											2021-08-27 00:05:30 +09:00
+								    }
 								    if (optionalContent !== undefined) {
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								      operatorList.addOp(OPS.beginMarkedContentProps, ["OC", optionalContent]);
 								    }
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const group = dict.get("Group");
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (group) {
-												Fix a few *safe* ESLint `no-var` failures in `src/core/evaluator.js` (13371 follow-up)

As can be seen in PR 13371, some of the `no-var` changes in the `PartialEvaluator.{getOperatorList, getTextContent}` methods caused errors in `gulp server`-mode.
However, there's a handful of instances of `var` in other methods which should be completely *safe* to convert since there's no strange scope-issues present in that code.

											
										
										
											2021-05-16 22:15:12 +09:00
+								      groupOptions = {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        matrix,
 								        bbox,
 								        smask,
 								        isolated: false,
 								        knockout: false,
 								      };
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const groupSubtype = group.get("S");
 								      let colorSpace = null;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (isName(groupSubtype, "Transparency")) {
 								        groupOptions.isolated = group.get("I") || false;
 								        groupOptions.knockout = group.get("K") || false;
 								        if (group.has("CS")) {
 								          const cs = group.getRaw("CS");
 								          const cachedColorSpace = ColorSpace.getCached(
 								            cs,
 								            this.xref,
 								            localColorSpaceCache
 								          );
 								          if (cachedColorSpace) {
 								            colorSpace = cachedColorSpace;
 								          } else {
 								            colorSpace = await this.parseColorSpace({
-												Improve (local) caching of parsed `ColorSpace`s (PR 12001 follow-up)

This patch contains the following *notable* improvements:
 - Changes the `ColorSpace.parse` call-sites to, where possible, pass in a reference rather than actual ColorSpace data (necessary for the next point).
 - Adds (local) caching of `ColorSpace`s by `Ref`, when applicable, in addition the caching by name. This (generally) improves `ColorSpace` caching for e.g. the SMask code-paths.
 - Extends the (local) `ColorSpace` caching to also apply when handling Images and Patterns, thus further reducing unneeded re-parsing.
 - Adds a new `ColorSpace.parseAsync` method, almost identical to the existing `ColorSpace.parse` one, but returning a Promise instead (this simplifies some code in the `PartialEvaluator`).

											
										
										
											2020-06-18 01:45:11 +09:00
+								              cs,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              resources,
 								              localColorSpaceCache,
 								            });
-												Split the existing `PDFFunction` in two classes, a private `PDFFunction` and a public `PDFFunctionFactory``, and utilize the latter in `PDFDocument` to allow various code to access the methods of `PDFFunction`

*Follow-up to PR 8909.*

This requires us to pass around `pdfFunctionFactory` to quite a lot of existing code, however I don't see another way of handling this while still guaranteeing that we can access `PDFFunction` as freely as in the old code.

Please note that the patch passes all tests locally (unit, font, reference), and I *very* much hope that we have sufficient test-coverage for the code in question to catch any typos/mistakes in the re-factoring.

											
										
										
											2017-09-19 20:49:30 +09:00
+								          }
-												Make getOperatorList() calls independent and merge queues at end

											
										
										
											2013-04-09 07:14:56 +09:00
+								        }
-												Falls back to ErrorFont when font object is not available or corrupted

											
										
										
											2012-10-16 01:48:45 +09:00
+								      }
-												Refactor text extraction / font loading logic

											
										
										
											2012-09-14 00:09:46 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (smask && smask.backdrop) {
 								        colorSpace = colorSpace || ColorSpace.singletons.rgb;
 								        smask.backdrop = colorSpace.getRgb(smask.backdrop, 0);
 								      }
-												Refactor text extraction / font loading logic

											
										
										
											2012-09-14 00:09:46 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      operatorList.addOp(OPS.beginGroup, [groupOptions]);
 								    }
-												Change the signatures of the `PartialEvaluator` "constructor" and its `getOperatorList`/`getTextContent` methods to take parameter objects

Currently these methods accept a large number of parameters, which creates quite unwieldy call-sites. When invoking them, you have to remember not only what arguments to supply, but also the correct order, to avoid runtime errors.
Furthermore, since some of the parameters are optional, you also have to remember to pass e.g. `null` or `undefined` for those ones.
Also, adding new parameters to these methods (which happens occasionally), often becomes unnecessarily tedious (based on personal experience).

Please note that I do *not* think that we need/should convert *every* single method in `evaluator.js` (or elsewhere in `/core` files) to take parameter objects. However, in my opinion, once a method starts relying on approximately five parameter (or even more), passing them in individually becomes quite cumbersome.

With these changes, I obviously needed to update the `evaluator_spec.js` unit-tests. The main change there, except the new method signatures[1], is that it's now re-using *one* `PartialEvalutor` instance, since I couldn't see any compelling reason for creating a new one in every single test.

*Note:* If this patch is accepted, my intention is to (time permitting) see if it makes sense to convert additional methods in `evaluator.js` (and other `/core` files) in a similar fashion, but I figured that it'd be a good idea to limit the initial scope somewhat.

---

[1] A fun fact here, note how the `PartialEvaluator` signature used in `evaluator_spec.js` wasn't even correct in the current `master`.

											
										
										
											2017-04-30 06:13:51 +09:00
-												Don't double apply a group xobject's bbox.

In `beginGroup` we create a new canvas that is the size of the
bounding box and we translate it to the offset. This means we don't need to
also apply the bounding box during `paintFormXObjectBegin`.

This improves #6961 quite a bit, but it still is missing the indention
in the ruler.

											
										
										
											2021-11-06 06:50:48 +09:00
+								    // If it's a group, a new canvas will be created that is the size of the
 								    // bounding box and translated to the correct position so we don't need to
 								    // apply the bounding box to it.
 								    const args = group ? [matrix, null] : [matrix, bbox];
 								    operatorList.addOp(OPS.paintFormXObjectBegin, args);
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    return this.getOperatorList({
 								      stream: xobj,
 								      task,
 								      resources: dict.get("Resources") || resources,
 								      operatorList,
 								      initialState,
 								    }).then(function () {
 								      operatorList.addOp(OPS.paintFormXObjectEnd, []);
-												Extract the actual sending of image data from the `PartialEvaluator.buildPaintImageXObject` method

After PRs 10727 and 11912, the code responsible for sending the decoded image data to the main-thread has now become a fair bit more involved the previously.
To reduce the amount of duplication here, the actual code responsible for sending the data is thus extracted into a new helper method instead.

											
										
										
											2020-06-07 19:01:51 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (group) {
 								        operatorList.addOp(OPS.endGroup, [groupOptions]);
-												Extract the actual sending of image data from the `PartialEvaluator.buildPaintImageXObject` method

After PRs 10727 and 11912, the code responsible for sending the decoded image data to the main-thread has now become a fair bit more involved the previously.
To reduce the amount of duplication here, the actual code responsible for sending the data is thus extracted into a new helper method instead.

											
										
										
											2020-06-07 19:01:51 +09:00
+								      }
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
-												Ensure that beginMarkedContentProps/endMarkedContent-operators, for /XObjects, are balanced in corrupt documents (PR 13854 follow-up)

Something that I *just* realized is that while PR 13854 fixed an issue as reported, it could still cause bugs in other similarily broken documents since we'll not insert a matching endMarkedContent-operator in the operatorList.

											
										
										
											2021-08-27 00:05:30 +09:00
+								      if (optionalContent !== undefined) {
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								        operatorList.addOp(OPS.endMarkedContent, []);
 								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    });
 								  }
 								  _sendImgData(objId, imgData, cacheGlobally = false) {
 								    const transfers = imgData ? [imgData.data.buffer] : null;
-												Send/receive Type3 images the same way as other globally-cached images

There's quite frankly no particular reason to special-case Type3-fonts with image resources, which are very rare anyway, now that we have a general mechanism for sending/receiving images globally.

											
										
										
											2020-07-27 01:05:38 +09:00
+								    if (this.parsingType3Font || cacheGlobally) {
-												Extract the actual sending of image data from the `PartialEvaluator.buildPaintImageXObject` method

After PRs 10727 and 11912, the code responsible for sending the decoded image data to the main-thread has now become a fair bit more involved the previously.
To reduce the amount of duplication here, the actual code responsible for sending the data is thus extracted into a new helper method instead.

											
										
										
											2020-06-07 19:01:51 +09:00
+								      return this.handler.send(
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        "commonobj",
 								        [objId, "Image", imgData],
-												Extract the actual sending of image data from the `PartialEvaluator.buildPaintImageXObject` method

After PRs 10727 and 11912, the code responsible for sending the decoded image data to the main-thread has now become a fair bit more involved the previously.
To reduce the amount of duplication here, the actual code responsible for sending the data is thus extracted into a new helper method instead.

											
										
										
											2020-06-07 19:01:51 +09:00
+								        transfers
 								      );
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
 								    return this.handler.send(
 								      "obj",
 								      [objId, this.pageIndex, "Image", imgData],
 								      transfers
 								    );
 								  }
-												Limit image size to 1024*1024 for b2g.

											
										
										
											2013-07-11 01:52:37 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  async buildPaintImageXObject({
 								    resources,
 								    image,
 								    isInline = false,
 								    operatorList,
 								    cacheKey,
 								    localImageCache,
 								    localColorSpaceCache,
 								  }) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const dict = image.dict;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    const imageRef = dict.objId;
-												Always prefer abbreviated keys, over full ones, when doing any dictionary lookups (issue 14256)

Note that issue 14256 was specifically about *inline* images, please refer to:
 - https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#G7.1852045
 - https://www.pdfa.org/safedocs-unearths-pdf-inline-image-issue/
 - https://pdf-issues.pdfa.org/32000-2-2020/clause08.html#H8.9.7

However, during review of the initial PR in https://github.com/mozilla/pdf.js/pull/14257#issuecomment-964469710, it was suggested that we instead do this *unconditionally for all* dictionary lookups.
In addition to re-ordering the existing call-sites in the `src/core`-code, and adding non-PRODUCTION/TESTING asserts to catch future errors, for consistency a number of existing `if`/`switch`-blocks were re-factored to also check the abbreviated keys first.

											
										
										
											2021-11-10 06:39:21 +09:00
+								    const w = dict.get("W", "Width");
 								    const h = dict.get("H", "Height");
-												Actually transfer eligible ImageMask data, rather than always copying it

By transfering `ArrayBuffer`s you can avoid having two copies of the same data, i.e. one copy on each of the worker/main-thread, for data that's used only *once* on the worker-thread.

Note how the code in [`PDFImage.createMask`](https://github.com/mozilla/pdf.js/blob/80135378cadd98b55a835446f0857e4bc30524e0/src/core/image.js#L284-L285) goes to great lengths to actually enable tranfering of the image data. However in [`PartialEvaluator.buildPaintImageXObject`](https://github.com/mozilla/pdf.js/blob/80135378cadd98b55a835446f0857e4bc30524e0/src/core/evaluator.js#L336) the `cached` property is always set to `true`, which disqualifies the image data from being transfered; see [`getTransfers`](https://github.com/mozilla/pdf.js/blob/80135378cadd98b55a835446f0857e4bc30524e0/src/core/operator_list.js#L552-L554).

For most ImageMask data this patch won't matter, since images found in the `/Resources -> /XObject` dictionary will always be indexed by name. However for *inline* images which contains ImageMask data, where only "small" images are cached (in both `parser.js` and `evaluator.js`), the current code will result in some unnecessary memory usage.

											
										
										
											2019-03-16 20:09:14 +09:00
-												Remove the `isNum` helper function

The call-sites are replaced by direct `typeof`-checks instead, which removes unnecessary function calls. Note that in the `src/`-folder we already had more `typeof`-cases than `isNum`-calls.

These changes were *mostly* done using regular expression search-and-replace, with two exceptions:
 - In `Font._charToGlyph` we no longer unconditionally update the `width`, since that seems completely unnecessary.
 - In `PDFDocument.documentInfo`, when parsing custom entries, we now do the `typeof`-check once.

											
										
										
											2022-02-22 19:55:34 +09:00
+								    if (!(w && typeof w === "number") || !(h && typeof h === "number")) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      warn("Image dimensions are missing, or not numbers.");
-												Support Optional Content in Image-/XObjects (issue 13931)

Currently, in the `PartialEvaluator`, we only support Optional Content in Form-/XObjects. Hence this patch adds support for Image-/XObjects as well, which looks like a simple oversight in PR 12095 since the canvas-implementation already contains the necessary code to support this.

											
										
										
											2021-08-24 18:30:19 +09:00
+								      return;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const maxImageSize = this.options.maxImageSize;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (maxImageSize !== -1 && w * h > maxImageSize) {
-												When `stopAtErrors` is set, throw rather than warn when exceeding `maxImageSize` (issue 14626)

The situation described in issue 14626 seems like a fairly special case, and it thus seem reasonable that we simply follow the same pattern as elsewhere in the `PartialEvaluator` when the `stopAtErrors` API-option is being used.

											
										
										
											2022-03-03 21:04:42 +09:00
+								      const msg = "Image exceeded maximum allowed size and was removed.";
 								      if (this.options.ignoreErrors) {
 								        warn(msg);
 								        return;
 								      }
 								      throw new Error(msg);
-												Support Optional Content in Image-/XObjects (issue 13931)

Currently, in the `PartialEvaluator`, we only support Optional Content in Form-/XObjects. Hence this patch adds support for Image-/XObjects as well, which looks like a simple oversight in PR 12095 since the canvas-implementation already contains the necessary code to support this.

											
										
										
											2021-08-24 18:30:19 +09:00
+								    }
-												Ensure that beginMarkedContentProps/endMarkedContent-operators, for /XObjects, are balanced in corrupt documents (PR 13854 follow-up)

Something that I *just* realized is that while PR 13854 fixed an issue as reported, it could still cause bugs in other similarily broken documents since we'll not insert a matching endMarkedContent-operator in the operatorList.

											
										
										
											2021-08-27 00:05:30 +09:00
+								    let optionalContent;
-												Support Optional Content in Image-/XObjects (issue 13931)

Currently, in the `PartialEvaluator`, we only support Optional Content in Form-/XObjects. Hence this patch adds support for Image-/XObjects as well, which looks like a simple oversight in PR 12095 since the canvas-implementation already contains the necessary code to support this.

											
										
										
											2021-08-24 18:30:19 +09:00
+								    if (dict.has("OC")) {
 								      optionalContent = await this.parseMarkedContentProps(
 								        dict.get("OC"),
 								        resources
 								      );
-												Ensure that beginMarkedContentProps/endMarkedContent-operators, for /XObjects, are balanced in corrupt documents (PR 13854 follow-up)

Something that I *just* realized is that while PR 13854 fixed an issue as reported, it could still cause bugs in other similarily broken documents since we'll not insert a matching endMarkedContent-operator in the operatorList.

											
										
										
											2021-08-27 00:05:30 +09:00
+								    }
 								    if (optionalContent !== undefined) {
-												Support Optional Content in Image-/XObjects (issue 13931)

Currently, in the `PartialEvaluator`, we only support Optional Content in Form-/XObjects. Hence this patch adds support for Image-/XObjects as well, which looks like a simple oversight in PR 12095 since the canvas-implementation already contains the necessary code to support this.

											
										
										
											2021-08-24 18:30:19 +09:00
+								      operatorList.addOp(OPS.beginMarkedContentProps, ["OC", optionalContent]);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Make getOperatorList() calls independent and merge queues at end

											
										
										
											2013-04-09 07:14:56 +09:00
-												Always prefer abbreviated keys, over full ones, when doing any dictionary lookups (issue 14256)

Note that issue 14256 was specifically about *inline* images, please refer to:
 - https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#G7.1852045
 - https://www.pdfa.org/safedocs-unearths-pdf-inline-image-issue/
 - https://pdf-issues.pdfa.org/32000-2-2020/clause08.html#H8.9.7

However, during review of the initial PR in https://github.com/mozilla/pdf.js/pull/14257#issuecomment-964469710, it was suggested that we instead do this *unconditionally for all* dictionary lookups.
In addition to re-ordering the existing call-sites in the `src/core`-code, and adding non-PRODUCTION/TESTING asserts to catch future errors, for consistency a number of existing `if`/`switch`-blocks were re-factored to also check the abbreviated keys first.

											
										
										
											2021-11-10 06:39:21 +09:00
+								    const imageMask = dict.get("IM", "ImageMask") || false;
 								    const interpolate = dict.get("I", "Interpolate");
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    let imgData, args;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (imageMask) {
 								      // This depends on a tmpCanvas being filled with the
 								      // current fillStyle, such that processing the pixel
 								      // data can't be done here. Instead of creating a
 								      // complete PDFImage, only read the information needed
 								      // for later.
-												Always prefer abbreviated keys, over full ones, when doing any dictionary lookups (issue 14256)

Note that issue 14256 was specifically about *inline* images, please refer to:
 - https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#G7.1852045
 - https://www.pdfa.org/safedocs-unearths-pdf-inline-image-issue/
 - https://pdf-issues.pdfa.org/32000-2-2020/clause08.html#H8.9.7

However, during review of the initial PR in https://github.com/mozilla/pdf.js/pull/14257#issuecomment-964469710, it was suggested that we instead do this *unconditionally for all* dictionary lookups.
In addition to re-ordering the existing call-sites in the `src/core`-code, and adding non-PRODUCTION/TESTING asserts to catch future errors, for consistency a number of existing `if`/`switch`-blocks were re-factored to also check the abbreviated keys first.

											
										
										
											2021-11-10 06:39:21 +09:00
+								      const bitStrideLength = (w + 7) >> 3;
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const imgArray = image.getBytes(
-												Always prefer abbreviated keys, over full ones, when doing any dictionary lookups (issue 14256)

Note that issue 14256 was specifically about *inline* images, please refer to:
 - https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#G7.1852045
 - https://www.pdfa.org/safedocs-unearths-pdf-inline-image-issue/
 - https://pdf-issues.pdfa.org/32000-2-2020/clause08.html#H8.9.7

However, during review of the initial PR in https://github.com/mozilla/pdf.js/pull/14257#issuecomment-964469710, it was suggested that we instead do this *unconditionally for all* dictionary lookups.
In addition to re-ordering the existing call-sites in the `src/core`-code, and adding non-PRODUCTION/TESTING asserts to catch future errors, for consistency a number of existing `if`/`switch`-blocks were re-factored to also check the abbreviated keys first.

											
										
										
											2021-11-10 06:39:21 +09:00
+								        bitStrideLength * h,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        /* forceClamped = */ true
 								      );
-												Always prefer abbreviated keys, over full ones, when doing any dictionary lookups (issue 14256)

Note that issue 14256 was specifically about *inline* images, please refer to:
 - https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#G7.1852045
 - https://www.pdfa.org/safedocs-unearths-pdf-inline-image-issue/
 - https://pdf-issues.pdfa.org/32000-2-2020/clause08.html#H8.9.7

However, during review of the initial PR in https://github.com/mozilla/pdf.js/pull/14257#issuecomment-964469710, it was suggested that we instead do this *unconditionally for all* dictionary lookups.
In addition to re-ordering the existing call-sites in the `src/core`-code, and adding non-PRODUCTION/TESTING asserts to catch future errors, for consistency a number of existing `if`/`switch`-blocks were re-factored to also check the abbreviated keys first.

											
										
										
											2021-11-10 06:39:21 +09:00
+								      const decode = dict.getArray("D", "Decode");
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
 								      imgData = PDFImage.createMask({
 								        imgArray,
-												Always prefer abbreviated keys, over full ones, when doing any dictionary lookups (issue 14256)

Note that issue 14256 was specifically about *inline* images, please refer to:
 - https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#G7.1852045
 - https://www.pdfa.org/safedocs-unearths-pdf-inline-image-issue/
 - https://pdf-issues.pdfa.org/32000-2-2020/clause08.html#H8.9.7

However, during review of the initial PR in https://github.com/mozilla/pdf.js/pull/14257#issuecomment-964469710, it was suggested that we instead do this *unconditionally for all* dictionary lookups.
In addition to re-ordering the existing call-sites in the `src/core`-code, and adding non-PRODUCTION/TESTING asserts to catch future errors, for consistency a number of existing `if`/`switch`-blocks were re-factored to also check the abbreviated keys first.

											
										
										
											2021-11-10 06:39:21 +09:00
+								        width: w,
 								        height: h,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        imageIsFromDecodeStream: image instanceof DecodeStream,
 								        inverseDecode: !!decode && decode[0] > 0,
-												Enable/disable image smoothing based on image interpolate value. (bug 1722191)

While some of the output looks worse to my eye, this behavior more
closely matches what I see when I open the PDFs in Adobe acrobat.

Fixes: #4706, #9713, #8245, #1344

											
										
										
											2021-09-09 09:31:10 +09:00
+								        interpolate,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      });
 								      imgData.cached = !!cacheKey;
 								      args = [imgData];
-												Make getOperatorList() calls independent and merge queues at end

											
										
										
											2013-04-09 07:14:56 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      operatorList.addOp(OPS.paintImageMaskXObject, args);
 								      if (cacheKey) {
 								        localImageCache.set(cacheKey, imageRef, {
 								          fn: OPS.paintImageMaskXObject,
 								          args,
-												Remove the unused `inline` parameter from various methods/functions in `PDFImage`, and change a couple of methods to use Objects rather than plain parameters

The `inline` parameter is passed to a number of methods/functions in `PDFImage`, despite not actually being used. Its value is never checked, nor is it ever assigned to the current `PDFImage` instance (i.e. no `this.inline = inline` exists).
Looking briefly at the history of this code, I was also unable to find a point in time where `inline` was being used.

As far as I'm concerned, `inline` does nothing more than add clutter to already very unwieldy method/function signatures, hence why I'm proposing that we just remove it.
To further simplify call-sites using `PDFImage`/`NativeImageDecoder`, a number of methods/functions are changed to take Objects rather than a bunch of (somewhat) randomly ordered parameters.

											
										
										
											2017-09-21 19:14:05 +09:00
+								        });
-												Support (rare) Type3 fonts which contains image resources (issue 10717)

The Type3 font type is not commonly used in PDF documents, as can be seen from telemetry data such as: https://telemetry.mozilla.org/new-pipeline/dist.html#!cumulative=0&end_date=2019-04-09&include_spill=0&keys=__none__!__none__!__none__&max_channel_version=nightly%252F68&measure=PDF_VIEWER_FONT_TYPES&min_channel_version=nightly%252F57&processType=*&product=Firefox&sanitize=1&sort_by_value=0&sort_keys=submissions&start_date=2019-03-18&table=0&trim=1&use_submission_date=0 (see also https://github.com/mozilla/pdf.js/wiki/Enumeration-Assignments-for-the-Telemetry-Histograms#pdf_viewer_font_types).

Type3 fonts containing image resources are *very* rare in practice, usually they only contain path rendering operators, but as the issue shows they unfortunately do exist.
Currently these Type3-related image resources are not handled in any special way, and given that fonts are document rather than page specific rendering breaks since the image resources are thus not available to the *entire* document.
Fortunately fixing this isn't too difficult, but it does require adding a couple of Type3-specific code-paths to the `PartialEvaluator`. In order to keep the implementation simple, particularily on the main-thread, these Type3 image resources are completely decoded on the worker-thread to avoid adding too many special cases. This should not cause any issues, only marginally less efficient code, but given how rare this kind of Type3 font is adding premature optimizations didn't seem at all warranted at this point.

											
										
										
											2019-04-11 19:26:15 +09:00
+								      }
-												Support Optional Content in Image-/XObjects (issue 13931)

Currently, in the `PartialEvaluator`, we only support Optional Content in Form-/XObjects. Hence this patch adds support for Image-/XObjects as well, which looks like a simple oversight in PR 12095 since the canvas-implementation already contains the necessary code to support this.

											
										
										
											2021-08-24 18:30:19 +09:00
-												Ensure that beginMarkedContentProps/endMarkedContent-operators, for /XObjects, are balanced in corrupt documents (PR 13854 follow-up)

Something that I *just* realized is that while PR 13854 fixed an issue as reported, it could still cause bugs in other similarily broken documents since we'll not insert a matching endMarkedContent-operator in the operatorList.

											
										
										
											2021-08-27 00:05:30 +09:00
+								      if (optionalContent !== undefined) {
-												Support Optional Content in Image-/XObjects (issue 13931)

Currently, in the `PartialEvaluator`, we only support Optional Content in Form-/XObjects. Hence this patch adds support for Image-/XObjects as well, which looks like a simple oversight in PR 12095 since the canvas-implementation already contains the necessary code to support this.

											
										
										
											2021-08-24 18:30:19 +09:00
+								        operatorList.addOp(OPS.endMarkedContent, []);
 								      }
 								      return;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Make getOperatorList() calls independent and merge queues at end

											
										
										
											2013-04-09 07:14:56 +09:00
-												Always prefer abbreviated keys, over full ones, when doing any dictionary lookups (issue 14256)

Note that issue 14256 was specifically about *inline* images, please refer to:
 - https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf#G7.1852045
 - https://www.pdfa.org/safedocs-unearths-pdf-inline-image-issue/
 - https://pdf-issues.pdfa.org/32000-2-2020/clause08.html#H8.9.7

However, during review of the initial PR in https://github.com/mozilla/pdf.js/pull/14257#issuecomment-964469710, it was suggested that we instead do this *unconditionally for all* dictionary lookups.
In addition to re-ordering the existing call-sites in the `src/core`-code, and adding non-PRODUCTION/TESTING asserts to catch future errors, for consistency a number of existing `if`/`switch`-blocks were re-factored to also check the abbreviated keys first.

											
										
										
											2021-11-10 06:39:21 +09:00
+								    const softMask = dict.get("SM", "SMask") || false;
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const mask = dict.get("Mask") || false;
-												Fallback to the built-in JPEG decoder if 'JpegStream', in `src/display/api.js`, fails to load the image

This works by making `PartialEvaluator.buildPaintImageXObject` wait for the success/failure of `loadJpegStream` on the API side *before* parsing continues.

Please note that in practice, it should be quite rare for the browser to fail loading/decoding of a JPEG image. In the general case, it should thus not be completely surprising if even `src/core/jpg.js` will fail to decode the image.

											
										
										
											2018-02-02 00:43:10 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const SMALL_IMAGE_DIMENSIONS = 200;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // Inlining small images into the queue as RGB data
 								    if (isInline && !softMask && !mask && w + h < SMALL_IMAGE_DIMENSIONS) {
 								      const imageObj = new PDFImage({
-												Remove the unused `inline` parameter from various methods/functions in `PDFImage`, and change a couple of methods to use Objects rather than plain parameters

The `inline` parameter is passed to a number of methods/functions in `PDFImage`, despite not actually being used. Its value is never checked, nor is it ever assigned to the current `PDFImage` instance (i.e. no `this.inline = inline` exists).
Looking briefly at the history of this code, I was also unable to find a point in time where `inline` was being used.

As far as I'm concerned, `inline` does nothing more than add clutter to already very unwieldy method/function signatures, hence why I'm proposing that we just remove it.
To further simplify call-sites using `PDFImage`/`NativeImageDecoder`, a number of methods/functions are changed to take Objects rather than a bunch of (somewhat) randomly ordered parameters.

											
										
										
											2017-09-21 19:14:05 +09:00
+								        xref: this.xref,
 								        res: resources,
 								        image,
-												Change the signature of `PartialEvaluator.buildPaintImageXObject` to take a parameter object

This method currently requires a fair number of parameters, which creates quite	unwieldy call-sites. When invoking `buildPaintImageXObject`, you have to remember not only which arguments to supply, but also the correct order, to prevent run-time errors.

											
										
										
											2018-02-01 23:44:49 +09:00
+								        isInline,
-												Add local caching of `Function`s, by reference, in the `PDFFunctionFactory` (issue 2541)

Note that compared other structures, such as e.g. Images and ColorSpaces, `Function`s are not referred to by name, which however does bring the advantage of being able to share the cache for an *entire* page.
Furthermore, similar to ColorSpaces, the parsing of individual `Function`s are generally fast enough to not really warrant trying to cache them in any "smarter" way than by reference. (Hence trying to do caching similar to e.g. Fonts would most likely be a losing proposition, given the amount of data lookup/parsing that'd be required.)

Originally I tried implementing this similar to e.g. the recently added ColorSpace caching (and in a couple of different ways), however it unfortunately turned out to be quite ugly/unwieldy given the sheer number of functions/methods where you'd thus need to pass in a `LocalFunctionCache` instance. (Also, the affected functions/methods didn't exactly have short signatures as-is.)
After going back and forth on this for a while it seemed to me that the simplest, or least "invasive" if you will, solution would be if each `PartialEvaluator` instance had its *own* `PDFFunctionFactory` instance (since the latter is already passed to all of the required code). This way each `PDFFunctionFactory` instances could have a local `Function` cache, without it being necessary to provide a `LocalFunctionCache` instance manually at every `PDFFunctionFactory.{create, createFromArray}` call-site.

Obviously, with this patch, there's now (potentially) more `PDFFunctionFactory` instances than before when the entire document shared just one. However, each such instance is really quite small and it's also tied to a `PartialEvaluator` instance and those are *not* kept alive and/or cached. To reduce the impact of these changes, I've tried to make as many of these structures as possible *lazily initialized*, specifically:

 - The `PDFFunctionFactory`, on `PartialEvaluator` instances, since not all kinds of general parsing actually requires it. For example: `getTextContent` calls won't cause any `Function` to be parsed, and even some `getOperatorList` calls won't trigger `Function` parsing (if a page contains e.g. no Patterns or "complex" ColorSpaces).

 - The `LocalFunctionCache`, on `PDFFunctionFactory` instances, since only certain parsing requires it. Generally speaking, only e.g. Patterns, "complex" ColorSpaces, and/or (some) SoftMasks will trigger any `Function` parsing.

To put these changes into perspective, when loading/rendering all (14) pages of the default `tracemonkey.pdf` file there's now a total of 6 `PDFFunctionFactory` and 1 `LocalFunctionCache` instances created thanks to the lazy initialization.
(If you instead would keep the document-"global" `PDFFunctionFactory` instance and pass around `LocalFunctionCache` instances everywhere, the numbers for the `tracemonkey.pdf` file would be instead be something like 1 `PDFFunctionFactory` and 6 `LocalFunctionCache` instances.)
All-in-all, I thus don't think that the `PDFFunctionFactory` changes should be generally problematic.

With these changes, we can also modify (some) call-sites to pass in a `Reference` rather than the actual `Function` data. This is nice since `Function`s can also be `Streams`, which are not cached on the `XRef` instance (given their potential size), and this way we can avoid unnecessary lookups and thus save some additional time/resources.

Obviously I had intended to include (standard) benchmark results with these changes, but for reasons I don't really understand the test run-time (even with `master`) of the document in issue 2541 is quite a bit slower than in the development viewer.
However, logging the time it takes for the relevant `PDFFunctionFactory`/`PDFFunction ` parsing shows that it takes *approximately* `0.5 ms` for the `Function` in question. Looking up a cached `Function`, on the other hand, is *one order of magnitude faster* which does add up when the same `Function` is invoked close to 2000 times.

											
										
										
											2020-06-28 20:12:24 +09:00
+								        pdfFunctionFactory: this._pdfFunctionFactory,
-												Improve (local) caching of parsed `ColorSpace`s (PR 12001 follow-up)

This patch contains the following *notable* improvements:
 - Changes the `ColorSpace.parse` call-sites to, where possible, pass in a reference rather than actual ColorSpace data (necessary for the next point).
 - Adds (local) caching of `ColorSpace`s by `Ref`, when applicable, in addition the caching by name. This (generally) improves `ColorSpace` caching for e.g. the SMask code-paths.
 - Extends the (local) `ColorSpace` caching to also apply when handling Images and Patterns, thus further reducing unneeded re-parsing.
 - Adds a new `ColorSpace.parseAsync` method, almost identical to the existing `ColorSpace.parse` one, but returning a Promise instead (this simplifies some code in the `PartialEvaluator`).

											
										
										
											2020-06-18 01:45:11 +09:00
+								        localColorSpaceCache,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      });
 								      // We force the use of RGBA_32BPP images here, because we can't handle
 								      // any other kind.
 								      imgData = imageObj.createImageData(/* forceRGBA = */ true);
 								      operatorList.addOp(OPS.paintInlineImageXObject, [imgData]);
-												Support Optional Content in Image-/XObjects (issue 13931)

Currently, in the `PartialEvaluator`, we only support Optional Content in Form-/XObjects. Hence this patch adds support for Image-/XObjects as well, which looks like a simple oversight in PR 12095 since the canvas-implementation already contains the necessary code to support this.

											
										
										
											2021-08-24 18:30:19 +09:00
-												Ensure that beginMarkedContentProps/endMarkedContent-operators, for /XObjects, are balanced in corrupt documents (PR 13854 follow-up)

Something that I *just* realized is that while PR 13854 fixed an issue as reported, it could still cause bugs in other similarily broken documents since we'll not insert a matching endMarkedContent-operator in the operatorList.

											
										
										
											2021-08-27 00:05:30 +09:00
+								      if (optionalContent !== undefined) {
-												Support Optional Content in Image-/XObjects (issue 13931)

Currently, in the `PartialEvaluator`, we only support Optional Content in Form-/XObjects. Hence this patch adds support for Image-/XObjects as well, which looks like a simple oversight in PR 12095 since the canvas-implementation already contains the necessary code to support this.

											
										
										
											2021-08-24 18:30:19 +09:00
+								        operatorList.addOp(OPS.endMarkedContent, []);
 								      }
 								      return;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // If there is no imageMask, create the PDFImage and a lot
 								    // of image processing can be done here.
 								    let objId = `img_${this.idFactory.createObjId()}`,
 								      cacheGlobally = false;
 								    if (this.parsingType3Font) {
-												Re-factor the `idFactory` functionality, used in the `core/`-code, and move the `fontID` generation into it

Note how the `getFontID`-method in `src/core/fonts.js` is *completely* global, rather than properly tied to the current document. This means that if you repeatedly open and parse/render, and then close, even the *same* PDF document the `fontID`s will still be incremented continuously.

For comparison the `createObjId` method, on `idFactory`, will always create a *consistent* id, assuming of course that the document and its pages are parsed/rendered in the same order.

In order to address this inconsistency, it thus seems reasonable to add a new `createFontId` method on the `idFactory` and use that when obtaining `fontID`s. (When the current `getFontID` method was added the `idFactory` didn't actually exist yet, which explains why the code looks the way it does.)
*Please note:* Since the document id is (still) part of the `loadedName`, it's thus not possible for different documents to have identical font names.

											
										
										
											2020-07-07 23:00:05 +09:00
+								      objId = `${this.idFactory.getDocId()}_type3_${objId}`;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    } else if (imageRef) {
 								      cacheGlobally = this.globalImageCache.shouldCache(
 								        imageRef,
 								        this.pageIndex
 								      );
-												Making src/core/evaluator.js adhere to the style guide

											
										
										
											2014-03-23 03:15:51 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (cacheGlobally) {
 								        objId = `${this.idFactory.getDocId()}_${objId}`;
-												Support (rare) Type3 fonts which contains image resources (issue 10717)

The Type3 font type is not commonly used in PDF documents, as can be seen from telemetry data such as: https://telemetry.mozilla.org/new-pipeline/dist.html#!cumulative=0&end_date=2019-04-09&include_spill=0&keys=__none__!__none__!__none__&max_channel_version=nightly%252F68&measure=PDF_VIEWER_FONT_TYPES&min_channel_version=nightly%252F57&processType=*&product=Firefox&sanitize=1&sort_by_value=0&sort_keys=submissions&start_date=2019-03-18&table=0&trim=1&use_submission_date=0 (see also https://github.com/mozilla/pdf.js/wiki/Enumeration-Assignments-for-the-Telemetry-Histograms#pdf_viewer_font_types).

Type3 fonts containing image resources are *very* rare in practice, usually they only contain path rendering operators, but as the issue shows they unfortunately do exist.
Currently these Type3-related image resources are not handled in any special way, and given that fonts are document rather than page specific rendering breaks since the image resources are thus not available to the *entire* document.
Fortunately fixing this isn't too difficult, but it does require adding a couple of Type3-specific code-paths to the `PartialEvaluator`. In order to keep the implementation simple, particularily on the main-thread, these Type3 image resources are completely decoded on the worker-thread to avoid adding too many special cases. This should not cause any issues, only marginally less efficient code, but given how rare this kind of Type3 font is adding premature optimizations didn't seem at all warranted at this point.

											
										
										
											2019-04-11 19:26:15 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Support (rare) Type3 fonts which contains image resources (issue 10717)

The Type3 font type is not commonly used in PDF documents, as can be seen from telemetry data such as: https://telemetry.mozilla.org/new-pipeline/dist.html#!cumulative=0&end_date=2019-04-09&include_spill=0&keys=__none__!__none__!__none__&max_channel_version=nightly%252F68&measure=PDF_VIEWER_FONT_TYPES&min_channel_version=nightly%252F57&processType=*&product=Firefox&sanitize=1&sort_by_value=0&sort_keys=submissions&start_date=2019-03-18&table=0&trim=1&use_submission_date=0 (see also https://github.com/mozilla/pdf.js/wiki/Enumeration-Assignments-for-the-Telemetry-Histograms#pdf_viewer_font_types).

Type3 fonts containing image resources are *very* rare in practice, usually they only contain path rendering operators, but as the issue shows they unfortunately do exist.
Currently these Type3-related image resources are not handled in any special way, and given that fonts are document rather than page specific rendering breaks since the image resources are thus not available to the *entire* document.
Fortunately fixing this isn't too difficult, but it does require adding a couple of Type3-specific code-paths to the `PartialEvaluator`. In order to keep the implementation simple, particularily on the main-thread, these Type3 image resources are completely decoded on the worker-thread to avoid adding too many special cases. This should not cause any issues, only marginally less efficient code, but given how rare this kind of Type3 font is adding premature optimizations didn't seem at all warranted at this point.

											
										
										
											2019-04-11 19:26:15 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // Ensure that the dependency is added before the image is decoded.
 								    operatorList.addDependency(objId);
 								    args = [objId, w, h];
-												Attempt to cache repeated images at the document, rather than the page, level (issue 11878)

Currently image resources, as opposed to e.g. font resources, are handled exclusively on a page-specific basis. Generally speaking this makes sense, since pages are separate from each other, however there's PDF documents where many (or even all) pages actually references exactly the same image resources (through the XRef table). Hence, in some cases, we're decoding the *same* images over and over for every page which is obviously slow and wasting both CPU and memory resources better used elsewhere.[1]

Obviously we cannot simply treat all image resources as-if they're used throughout the entire PDF document, since that would end up increasing memory usage too much.[2]
However, by introducing a `GlobalImageCache` in the worker we can track image resources that appear on more than one page. Hence we can switch image resources from being page-specific to being document-specific, once the image resource has been seen on more than a certain number of pages.

In many cases, such as e.g. the referenced issue, this patch will thus lead to reduced memory usage for image resources. Scrolling through all pages of the document, there's now only a few main-thread copies of the same image data, as opposed to one for each rendered page (i.e. there could theoretically be *twenty* copies of the image data).
While this obviously benefit both CPU and memory usage in this case, for *very* large image data this patch *may* possibly increase persistent main-thread memory usage a tiny bit. Thus to avoid negatively affecting memory usage too much in general, particularly on the main-thread, the `GlobalImageCache` will *only* cache a certain number of image resources at the document level and simply fallback to the default behaviour.

Unfortunately the asynchronous nature of the code, with ranged/streamed loading of data, actually makes all of this much more complicated than if all data could be assumed to be immediately available.[3]

*Please note:* The patch will lead to *small* movement in some existing test-cases, since we're now using the built-in PDF.js JPEG decoder more. This was done in order to simplify the overall implementation, especially on the main-thread, by limiting it to only the `OPS.paintImageXObject` operator.

---
[1] There's e.g. PDF documents that use the same image as background on all pages.

[2] Given that data stored in the `commonObjs`, on the main-thread, are only cleared manually through `PDFDocumentProxy.cleanup`. This as opposed to data stored in the `objs` of each page, which is automatically removed when the page is cleaned-up e.g. by being evicted from the cache in the default viewer.

[3] If the latter case were true, we could simply check for repeat images *before* parsing started and thus avoid handling *any* duplicate image resources.

											
										
										
											2020-05-18 21:17:56 +09:00
-												Improve how Type3-fonts with dependencies are handled

While the `CharProcs` streams of Type3-fonts *usually* don't rely on dependencies, such as e.g. images, it does happen in some cases.

Currently any dependencies are simply appended to the parent operatorList, which in practice means *only* the operatorList of the *first* page where the Type3-font is being used.
However, there's one thing that's slightly unfortunate with that approach: Since fonts are global to the PDF document, we really ought to ensure that any Type3 dependencies are appended to the operatorList of *all* pages where the Type3-font is being used. Otherwise there's a theoretical risk that, if one page has its rendering paused, another page may try to use a Type3-font whose dependencies are not yet fully resolved. In that case there would be errors, since Type3 operatorLists are executed synchronously.

Hence this patch, which ensures that all relevant pages will have Type3 dependencies appended to the main operatorList. (Note here that the `OperatorList.addDependencies` method, via `OperatorList.addDependency`, ensures that a dependency is only added *once* to any operatorList.)

Finally, these changes also remove the need for the "waiting for the main-thread"-hack that was added to `PartialEvaluator.buildPaintImageXObject` as part of fixing issue 10717.

											
										
										
											2020-07-26 19:23:28 +09:00
+								    PDFImage.buildImage({
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      xref: this.xref,
 								      res: resources,
 								      image,
 								      isInline,
 								      pdfFunctionFactory: this._pdfFunctionFactory,
 								      localColorSpaceCache,
 								    })
 								      .then(imageObj => {
 								        imgData = imageObj.createImageData(/* forceRGBA = */ false);
-												Attempt to cache repeated images at the document, rather than the page, level (issue 11878)

Currently image resources, as opposed to e.g. font resources, are handled exclusively on a page-specific basis. Generally speaking this makes sense, since pages are separate from each other, however there's PDF documents where many (or even all) pages actually references exactly the same image resources (through the XRef table). Hence, in some cases, we're decoding the *same* images over and over for every page which is obviously slow and wasting both CPU and memory resources better used elsewhere.[1]

Obviously we cannot simply treat all image resources as-if they're used throughout the entire PDF document, since that would end up increasing memory usage too much.[2]
However, by introducing a `GlobalImageCache` in the worker we can track image resources that appear on more than one page. Hence we can switch image resources from being page-specific to being document-specific, once the image resource has been seen on more than a certain number of pages.

In many cases, such as e.g. the referenced issue, this patch will thus lead to reduced memory usage for image resources. Scrolling through all pages of the document, there's now only a few main-thread copies of the same image data, as opposed to one for each rendered page (i.e. there could theoretically be *twenty* copies of the image data).
While this obviously benefit both CPU and memory usage in this case, for *very* large image data this patch *may* possibly increase persistent main-thread memory usage a tiny bit. Thus to avoid negatively affecting memory usage too much in general, particularly on the main-thread, the `GlobalImageCache` will *only* cache a certain number of image resources at the document level and simply fallback to the default behaviour.

Unfortunately the asynchronous nature of the code, with ranged/streamed loading of data, actually makes all of this much more complicated than if all data could be assumed to be immediately available.[3]

*Please note:* The patch will lead to *small* movement in some existing test-cases, since we're now using the built-in PDF.js JPEG decoder more. This was done in order to simplify the overall implementation, especially on the main-thread, by limiting it to only the `OPS.paintImageXObject` operator.

---
[1] There's e.g. PDF documents that use the same image as background on all pages.

[2] Given that data stored in the `commonObjs`, on the main-thread, are only cleared manually through `PDFDocumentProxy.cleanup`. This as opposed to data stored in the `objs` of each page, which is automatically removed when the page is cleaned-up e.g. by being evicted from the cache in the default viewer.

[3] If the latter case were true, we could simply check for repeat images *before* parsing started and thus avoid handling *any* duplicate image resources.

											
										
										
											2020-05-18 21:17:56 +09:00
-												Improve global image caching for small images (PR 11912 follow-up, issue 12098)

When implementing the `GlobalImageCache` functionality I was mostly worried about the effect of *very large* images, hence the maximum number of cached images were purposely kept quite low[1].
However, there's one fairly obvious problem with that approach: In documents with hundreds, or even thousands, of *small* images the `GlobalImageCache` as implemented becomes essentially pointless.

Hence this patch, where the `GlobalImageCache`-implementation is changed in the following ways:
 - We're still guaranteed to be able to cache a *minimum* number of images, set to `10` (similar as before).
 - If the *total* size of all the cached image data is below a threshold[2], we're allowed to cache additional images.

This patch thus *improve*, but doesn't completely fix, issue 12098. Note that that document is created by a *very poor* PDF generator, since every single page contains the *entire* document (with all of its /Resources) and to create the individual pages clipping is used.[3]

---
[1] Currently set to `10` images; imagine what would happen to overall memory usage if we encountered e.g. 50 images each 10 MB in size.

[2] This value was chosen, somewhat randomly, to be `40` megabytes; basically five times the [maximum individual image size per page](https://github.com/mozilla/pdf.js/blob/6249ef517d3aaacc9aa6c9e1f5377acfaa4bc2a7/src/display/api.js#L2483-L2484).

[3] This surely has to be some kind of record w.r.t. how badly PDF generators can mess things up...

											
										
										
											2021-01-25 23:09:11 +09:00
+								        if (cacheKey && imageRef && cacheGlobally) {
 								          this.globalImageCache.addByteSize(imageRef, imgData.data.length);
 								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        return this._sendImgData(objId, imgData, cacheGlobally);
 								      })
 								      .catch(reason => {
 								        warn(`Unable to decode image "${objId}": "${reason}".`);
-												Make getOperatorList() calls independent and merge queues at end

											
										
										
											2013-04-09 07:14:56 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        return this._sendImgData(objId, /* imgData = */ null, cacheGlobally);
 								      });
 								    operatorList.addOp(OPS.paintImageXObject, args);
 								    if (cacheKey) {
 								      localImageCache.set(cacheKey, imageRef, {
 								        fn: OPS.paintImageXObject,
 								        args,
 								      });
 								      if (imageRef) {
 								        assert(!isInline, "Cannot cache an inline image globally.");
 								        this.globalImageCache.addPageIndex(imageRef, this.pageIndex);
 								        if (cacheGlobally) {
 								          this.globalImageCache.setData(imageRef, {
 								            objId,
 								            fn: OPS.paintImageXObject,
 								            args,
-												Improve global image caching for small images (PR 11912 follow-up, issue 12098)

When implementing the `GlobalImageCache` functionality I was mostly worried about the effect of *very large* images, hence the maximum number of cached images were purposely kept quite low[1].
However, there's one fairly obvious problem with that approach: In documents with hundreds, or even thousands, of *small* images the `GlobalImageCache` as implemented becomes essentially pointless.

Hence this patch, where the `GlobalImageCache`-implementation is changed in the following ways:
 - We're still guaranteed to be able to cache a *minimum* number of images, set to `10` (similar as before).
 - If the *total* size of all the cached image data is below a threshold[2], we're allowed to cache additional images.

This patch thus *improve*, but doesn't completely fix, issue 12098. Note that that document is created by a *very poor* PDF generator, since every single page contains the *entire* document (with all of its /Resources) and to create the individual pages clipping is used.[3]

---
[1] Currently set to `10` images; imagine what would happen to overall memory usage if we encountered e.g. 50 images each 10 MB in size.

[2] This value was chosen, somewhat randomly, to be `40` megabytes; basically five times the [maximum individual image size per page](https://github.com/mozilla/pdf.js/blob/6249ef517d3aaacc9aa6c9e1f5377acfaa4bc2a7/src/display/api.js#L2483-L2484).

[3] This surely has to be some kind of record w.r.t. how badly PDF generators can mess things up...

											
										
										
											2021-01-25 23:09:11 +09:00
+								            byteSize: 0, // Temporary entry, note `addByteSize` above.
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          });
-												Adds transfer function support for SMask.

											
										
										
											2015-12-05 03:52:45 +09:00
+								        }
 								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Support Optional Content in Image-/XObjects (issue 13931)

Currently, in the `PartialEvaluator`, we only support Optional Content in Form-/XObjects. Hence this patch adds support for Image-/XObjects as well, which looks like a simple oversight in PR 12095 since the canvas-implementation already contains the necessary code to support this.

											
										
										
											2021-08-24 18:30:19 +09:00
-												Ensure that beginMarkedContentProps/endMarkedContent-operators, for /XObjects, are balanced in corrupt documents (PR 13854 follow-up)

Something that I *just* realized is that while PR 13854 fixed an issue as reported, it could still cause bugs in other similarily broken documents since we'll not insert a matching endMarkedContent-operator in the operatorList.

											
										
										
											2021-08-27 00:05:30 +09:00
+								    if (optionalContent !== undefined) {
-												Support Optional Content in Image-/XObjects (issue 13931)

Currently, in the `PartialEvaluator`, we only support Optional Content in Form-/XObjects. Hence this patch adds support for Image-/XObjects as well, which looks like a simple oversight in PR 12095 since the canvas-implementation already contains the necessary code to support this.

											
										
										
											2021-08-24 18:30:19 +09:00
+								      operatorList.addOp(OPS.endMarkedContent, []);
 								    }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  }
-												Adds transfer function support for SMask.

											
										
										
											2015-12-05 03:52:45 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  handleSMask(
 								    smask,
 								    resources,
 								    operatorList,
 								    task,
 								    stateManager,
 								    localColorSpaceCache
 								  ) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const smaskContent = smask.get("G");
 								    const smaskOptions = {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      subtype: smask.get("S").name,
 								      backdrop: smask.get("BC"),
 								    };
-												SMask emulation

											
										
										
											2014-01-24 02:13:32 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // The SMask might have a alpha/luminosity value transfer function --
 								    // we will build a map of integer values in range 0..255 to be fast.
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const transferObj = smask.get("TR");
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (isPDFFunction(transferObj)) {
 								      const transferFn = this._pdfFunctionFactory.create(transferObj);
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const transferMap = new Uint8Array(256);
 								      const tmp = new Float32Array(1);
 								      for (let i = 0; i < 256; i++) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        tmp[0] = i / 255;
 								        transferFn(tmp, 0, tmp, 0);
 								        transferMap[i] = (tmp[0] * 255) | 0;
 								      }
 								      smaskOptions.transferMap = transferMap;
 								    }
-												Refactor text extraction / font loading logic

											
										
										
											2012-09-14 00:09:46 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    return this.buildFormXObject(
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								      resources,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      smaskContent,
 								      smaskOptions,
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								      operatorList,
 								      task,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      stateManager.state.clone(),
 								      localColorSpaceCache
 								    );
 								  }
-												Incrementally render by sending the operator list by chunks as they're ready.

											
										
										
											2013-08-01 03:17:36 +09:00
-												Add (basic) support for transfer functions to Images (issue 6931, bug 1149713)

This is *similar* to the existing transfer function support for SMasks, but extended to simple image data.
Please note that the extra amount of data now being sent to the worker-thread, for affected /ExtGState entries, is limited to *at most* 4 `Uint8Array`s each with a length of 256 elements.

Refer to https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G9.1658137 for additional details.

											
										
										
											2020-08-17 15:49:19 +09:00
+								  handleTransferFunction(tr) {
 								    let transferArray;
 								    if (Array.isArray(tr)) {
 								      transferArray = tr;
 								    } else if (isPDFFunction(tr)) {
 								      transferArray = [tr];
 								    } else {
 								      return null; // Not a valid transfer function entry.
 								    }
 								    const transferMaps = [];
 								    let numFns = 0,
 								      numEffectfulFns = 0;
 								    for (const entry of transferArray) {
 								      const transferObj = this.xref.fetchIfRef(entry);
 								      numFns++;
 								      if (isName(transferObj, "Identity")) {
 								        transferMaps.push(null);
 								        continue;
 								      } else if (!isPDFFunction(transferObj)) {
 								        return null; // Not a valid transfer function object.
 								      }
 								      const transferFn = this._pdfFunctionFactory.create(transferObj);
 								      const transferMap = new Uint8Array(256),
 								        tmp = new Float32Array(1);
 								      for (let j = 0; j < 256; j++) {
 								        tmp[0] = j / 255;
 								        transferFn(tmp, 0, tmp, 0);
 								        transferMap[j] = (tmp[0] * 255) | 0;
 								      }
 								      transferMaps.push(transferMap);
 								      numEffectfulFns++;
 								    }
 								    if (!(numFns === 1 || numFns === 4)) {
 								      return null; // Only 1 or 4 functions are supported, by the specification.
 								    }
 								    if (numEffectfulFns === 0) {
 								      return null; // Only /Identity transfer functions found, which are no-ops.
 								    }
 								    return transferMaps;
 								  }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  handleTilingType(
 								    fn,
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								    color,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    resources,
 								    pattern,
 								    patternDict,
 								    operatorList,
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								    task,
 								    localTilingPatternCache
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  ) {
 								    // Create an IR of the pattern code.
 								    const tilingOpList = new OperatorList();
 								    // Merge the available resources, to prevent issues when the patternDict
 								    // is missing some /Resources entries (fixes issue6541.pdf).
-												Add support, in `Dict.merge`, for merging of "sub"-dictionaries

This allows for merging of dictionaries one level deeper than previously. This could be useful e.g. for /Resources dictionaries, where you want to e.g. merge their respective /Font dictionaries (and other) together rather than picking just the first one.

											
										
										
											2020-08-28 08:05:33 +09:00
+								    const patternResources = Dict.merge({
 								      xref: this.xref,
 								      dictArray: [patternDict.get("Resources"), resources],
 								    });
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
 								    return this.getOperatorList({
 								      stream: pattern,
 								      task,
 								      resources: patternResources,
 								      operatorList: tilingOpList,
 								    })
 								      .then(function () {
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								        const operatorListIR = tilingOpList.getIR();
 								        const tilingPatternIR = getTilingPatternIR(
 								          operatorListIR,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          patternDict,
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								          color
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        );
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								        // Add the dependencies to the parent operator list so they are
 								        // resolved before the sub operator list is executed synchronously.
 								        operatorList.addDependencies(tilingOpList.dependencies);
 								        operatorList.addOp(fn, tilingPatternIR);
-												Re-factor the `LocalTilingPatternCache` to cache by Ref rather than Name (PR 12458 follow-up, issue 13780)

This way there cannot be any *incorrect* cache hits, since Refs are guaranteed to be unique.
Please note that the reason for caching by Ref rather than doing something along the lines of the `localShadingPatternCache` (which uses a `Map` directly), is that TilingPatterns are streams and those cannot be cached on the `XRef`-instance (this way we avoid unnecessary parsing).

											
										
										
											2021-08-18 19:49:01 +09:00
+								        if (patternDict.objId) {
 								          localTilingPatternCache.set(/* name = */ null, patternDict.objId, {
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								            operatorListIR,
 								            dict: patternDict,
 								          });
 								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      })
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								      .catch(reason => {
 								        if (reason instanceof AbortException) {
 								          return;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        }
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								        if (this.options.ignoreErrors) {
 								          // Error(s) in the TilingPattern -- sending unsupported feature
 								          // notification and allow rendering to continue.
 								          this.handler.send("UnsupportedFeature", {
 								            featureId: UNSUPPORTED_FEATURES.errorTilingPattern,
 								          });
 								          warn(`handleTilingType - ignoring pattern: "${reason}".`);
 								          return;
 								        }
 								        throw reason;
 								      });
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  }
-												Add basic support for transparency groups.

											
										
										
											2013-03-13 09:20:38 +09:00
-												Fallback font for buttons must be ZapfDingbats.

Fix bug https://bugzilla.mozilla.org/show_bug.cgi?id=1669099.

											
										
										
											2020-10-22 00:21:33 +09:00
+								  handleSetFont(
 								    resources,
 								    fontArgs,
 								    fontRef,
 								    operatorList,
 								    task,
 								    state,
-												XFA -- Load fonts permanently from the pdf
  - Different fonts can be used in xfa and some of them are embedded in the pdf.
  - Load all the fonts in window.document.

Update src/core/document.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Update src/core/worker.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

											
										
										
											2021-03-26 17:28:18 +09:00
+								    fallbackFontDict = null,
 								    cssFontInfo = null
-												Fallback font for buttons must be ZapfDingbats.

Fix bug https://bugzilla.mozilla.org/show_bug.cgi?id=1669099.

											
										
										
											2020-10-22 00:21:33 +09:00
+								  ) {
-												[api-minor] Change the format of the `fontName`-property, in `defaultAppearanceData`, on Annotation-instances (PR 12831 follow-up)

Currently the `fontName`-property contains an actual /Name-instance, which is a problem given that its fallback value is an empty string; see https://github.com/mozilla/pdf.js/blob/ca7f546828603d15ac0975f6131669321bfccceb/src/core/default_appearance.js#L35
The reason that this is a problem can be seen in https://github.com/mozilla/pdf.js/blob/ca7f546828603d15ac0975f6131669321bfccceb/src/core/primitives.js#L30-L34, since an empty string short-circuits the cache. Essentially, in PDF documents, a /Name-instance cannot be empty and the way that the `DefaultAppearanceEvaluator` does things is unfortunately not entirely correct.

Hence the `fontName`-property is changed to instead contain a string, rather than a /Name-instance, which simplifies the code overall.

*Please note:* I'm tagging this patch with "[api-minor]", since PR 12831 is included in the current pre-release (although we're not using the `fontName`-property in the display-layer).

											
										
										
											2021-04-01 22:19:45 +09:00
+								    const fontName =
 								      fontArgs && fontArgs[0] instanceof Name ? fontArgs[0].name : null;
-												Build paths for glyph accents when drawing text as curves

											
										
										
											2014-05-04 00:28:30 +09:00
-												XFA -- Load fonts permanently from the pdf
  - Different fonts can be used in xfa and some of them are embedded in the pdf.
  - Load all the fonts in window.document.

Update src/core/document.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Update src/core/worker.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

											
										
										
											2021-03-26 17:28:18 +09:00
+								    return this.loadFont(
 								      fontName,
 								      fontRef,
 								      resources,
 								      fallbackFontDict,
 								      cssFontInfo
 								    )
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      .then(translated => {
 								        if (!translated.font.isType3Font) {
 								          return translated;
-												Move the creation of canvas path fonts to the worker.

											
										
										
											2013-08-20 08:33:20 +09:00
+								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        return translated
-												Improve how Type3-fonts with dependencies are handled

While the `CharProcs` streams of Type3-fonts *usually* don't rely on dependencies, such as e.g. images, it does happen in some cases.

Currently any dependencies are simply appended to the parent operatorList, which in practice means *only* the operatorList of the *first* page where the Type3-font is being used.
However, there's one thing that's slightly unfortunate with that approach: Since fonts are global to the PDF document, we really ought to ensure that any Type3 dependencies are appended to the operatorList of *all* pages where the Type3-font is being used. Otherwise there's a theoretical risk that, if one page has its rendering paused, another page may try to use a Type3-font whose dependencies are not yet fully resolved. In that case there would be errors, since Type3 operatorLists are executed synchronously.

Hence this patch, which ensures that all relevant pages will have Type3 dependencies appended to the main operatorList. (Note here that the `OperatorList.addDependencies` method, via `OperatorList.addDependency`, ensures that a dependency is only added *once* to any operatorList.)

Finally, these changes also remove the need for the "waiting for the main-thread"-hack that was added to `PartialEvaluator.buildPaintImageXObject` as part of fixing issue 10717.

											
										
										
											2020-07-26 19:23:28 +09:00
+								          .loadType3Data(this, resources, task)
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          .then(function () {
-												Improve how Type3-fonts with dependencies are handled

While the `CharProcs` streams of Type3-fonts *usually* don't rely on dependencies, such as e.g. images, it does happen in some cases.

Currently any dependencies are simply appended to the parent operatorList, which in practice means *only* the operatorList of the *first* page where the Type3-font is being used.
However, there's one thing that's slightly unfortunate with that approach: Since fonts are global to the PDF document, we really ought to ensure that any Type3 dependencies are appended to the operatorList of *all* pages where the Type3-font is being used. Otherwise there's a theoretical risk that, if one page has its rendering paused, another page may try to use a Type3-font whose dependencies are not yet fully resolved. In that case there would be errors, since Type3 operatorLists are executed synchronously.

Hence this patch, which ensures that all relevant pages will have Type3 dependencies appended to the main operatorList. (Note here that the `OperatorList.addDependencies` method, via `OperatorList.addDependency`, ensures that a dependency is only added *once* to any operatorList.)

Finally, these changes also remove the need for the "waiting for the main-thread"-hack that was added to `PartialEvaluator.buildPaintImageXObject` as part of fixing issue 10717.

											
										
										
											2020-07-26 19:23:28 +09:00
+								            // Add the dependencies to the parent operatorList so they are
 								            // resolved before Type3 operatorLists are executed synchronously.
 								            operatorList.addDependencies(translated.type3Dependencies);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            return translated;
 								          })
 								          .catch(reason => {
 								            // Error in the font data -- sending unsupported feature
 								            // notification.
 								            this.handler.send("UnsupportedFeature", {
 								              featureId: UNSUPPORTED_FEATURES.errorFontLoadType3,
 								            });
 								            return new TranslatedFont({
 								              loadedName: "g_font_error",
 								              font: new ErrorFont(`Type3 font load error: ${reason}`),
 								              dict: translated.font,
-												Handle errors gracefully, in `PartialEvaluator.buildFontPaths`, when glyph path building fails

The building of glyph paths, in the `FontRendererFactory`, can fail in various ways for corrupt font data. However, we're currently not attempting to handle any such errors in the evaluator, which means that a single broken glyph *can* prevent an entire page from rendering.

To address this we simply have to pass along, and check, the existing `ignoreErrors` option in `PartialEvaluator.buildFontPaths` similar to the rest of the `PartialEvaluator` code.

											
										
										
											2021-05-16 01:21:18 +09:00
+								              evaluatorOptions: this.options,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            });
 								          });
 								      })
 								      .then(translated => {
 								        state.font = translated.font;
 								        translated.send(this.handler);
 								        return translated.loadedName;
 								      });
 								  }
-												Ensure that there's always a setFont (Tf) operator before text rendering operators (issue 11651)

The PDF document in question is *corrupt*, since it contains multiple instances of incorrect operators.
We obviously don't want to slow down parsing of *all* documents (since most are valid), just to accommodate a particular bad PDF generator, hence the reason for the inline check before calling the `ensureStateFont` method.

											
										
										
											2020-03-02 23:34:00 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  handleText(chars, state) {
 								    const font = state.font;
 								    const glyphs = font.charsToGlyphs(chars);
 								    if (font.data) {
 								      const isAddToPathSet = !!(
 								        state.textRenderingMode & TextRenderingMode.ADD_TO_PATH_FLAG
 								      );
 								      if (
 								        isAddToPathSet ||
 								        state.fillColorSpace.name === "Pattern" ||
 								        font.disableFontFace ||
 								        this.options.disableFontFace
 								      ) {
-												Handle errors gracefully, in `PartialEvaluator.buildFontPaths`, when glyph path building fails

The building of glyph paths, in the `FontRendererFactory`, can fail in various ways for corrupt font data. However, we're currently not attempting to handle any such errors in the evaluator, which means that a single broken glyph *can* prevent an entire page from rendering.

To address this we simply have to pass along, and check, the existing `ignoreErrors` option in `PartialEvaluator.buildFontPaths` similar to the rest of the `PartialEvaluator` code.

											
										
										
											2021-05-16 01:21:18 +09:00
+								        PartialEvaluator.buildFontPaths(
 								          font,
 								          glyphs,
 								          this.handler,
 								          this.options
 								        );
-												Ensure that there's always a setFont (Tf) operator before text rendering operators (issue 11651)

The PDF document in question is *corrupt*, since it contains multiple instances of incorrect operators.
We obviously don't want to slow down parsing of *all* documents (since most are valid), just to accommodate a particular bad PDF generator, hence the reason for the inline check before calling the `ensureStateFont` method.

											
										
										
											2020-03-02 23:34:00 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
 								    return glyphs;
 								  }
-												Ensure that there's always a setFont (Tf) operator before text rendering operators (issue 11651)

The PDF document in question is *corrupt*, since it contains multiple instances of incorrect operators.
We obviously don't want to slow down parsing of *all* documents (since most are valid), just to accommodate a particular bad PDF generator, hence the reason for the inline check before calling the `ensureStateFont` method.

											
										
										
											2020-03-02 23:34:00 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  ensureStateFont(state) {
 								    if (state.font) {
 								      return;
 								    }
 								    const reason = new FormatError(
 								      "Missing setFont (Tf) operator before text rendering operator."
 								    );
 								    if (this.options.ignoreErrors) {
 								      // Missing setFont operator before text rendering operator -- sending
 								      // unsupported feature notification and allow rendering to continue.
 								      this.handler.send("UnsupportedFeature", {
 								        featureId: UNSUPPORTED_FEATURES.errorFontState,
 								      });
 								      warn(`ensureStateFont: "${reason}".`);
 								      return;
 								    }
 								    throw reason;
 								  }
-												Change `PartialEvaluator.setGState` to an `async` method

Since this method calls `Dict.get` to fetch data, there could thus be `Error`s thrown in corrupt PDF documents when attempting to resolve an indirect object.
To ensure that this won't ever become a problem, we change the method to be `async` such that a rejected Promise would be returned and general OperatorList parsing won't break.

											
										
										
											2020-07-15 21:25:24 +09:00
+								  async setGState({
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    resources,
 								    gState,
 								    operatorList,
-												Add local caching of "simple" Graphics State (ExtGState) data in `PartialEvaluator.getOperatorList` (issue 2813)

This patch will help pathological cases the most, with issue 2813 being a particularily problematic example. While there's only *four* `/ExtGState` resources, there's a total `29062` of `setGState` operators. Even though parsing of a single `/ExtGState` resource is quite fast, having to re-parse them thousands of times does add up quite significantly.

For simplicity we'll only cache "simple" `/ExtGState` resource, since e.g. the general `SMask` case cannot be easily cached (without re-factoring other code, which may have undesirable effects on general parsing).

By caching "simple" `/ExtGState` resource, we thus improve performance by:
 - Not having to fetch/validate/parse the same `/ExtGState` data over and over.
 - Handling of repeated `setGState` operators becomes *synchronous* during the `OperatorList` building, instead of having to defer to the event-loop/microtask-queue since the `/ExtGState` parsing is done asynchronously.

---

Obviously I had intended to include (standard) benchmark results with this patch, but for reasons I don't understand the test run-time (even with `master`) of the document in issue 2813 is *a lot* slower than in the development viewer (making normal benchmarking infeasible).
However, testing this manually in the development viewer (using `pdfBug=Stats`) shows a *reduction* of `~10 %` in the rendering time of the PDF document in issue 2813.

											
										
										
											2020-07-11 20:52:11 +09:00
+								    cacheKey,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    task,
 								    stateManager,
-												Add local caching of "simple" Graphics State (ExtGState) data in `PartialEvaluator.getOperatorList` (issue 2813)

This patch will help pathological cases the most, with issue 2813 being a particularily problematic example. While there's only *four* `/ExtGState` resources, there's a total `29062` of `setGState` operators. Even though parsing of a single `/ExtGState` resource is quite fast, having to re-parse them thousands of times does add up quite significantly.

For simplicity we'll only cache "simple" `/ExtGState` resource, since e.g. the general `SMask` case cannot be easily cached (without re-factoring other code, which may have undesirable effects on general parsing).

By caching "simple" `/ExtGState` resource, we thus improve performance by:
 - Not having to fetch/validate/parse the same `/ExtGState` data over and over.
 - Handling of repeated `setGState` operators becomes *synchronous* during the `OperatorList` building, instead of having to defer to the event-loop/microtask-queue since the `/ExtGState` parsing is done asynchronously.

---

Obviously I had intended to include (standard) benchmark results with this patch, but for reasons I don't understand the test run-time (even with `master`) of the document in issue 2813 is *a lot* slower than in the development viewer (making normal benchmarking infeasible).
However, testing this manually in the development viewer (using `pdfBug=Stats`) shows a *reduction* of `~10 %` in the rendering time of the PDF document in issue 2813.

											
										
										
											2020-07-11 20:52:11 +09:00
+								    localGStateCache,
 								    localColorSpaceCache,
 								  }) {
 								    const gStateRef = gState.objId;
 								    let isSimpleGState = true;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // This array holds the converted/processed state data.
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const gStateObj = [];
 								    const gStateKeys = gState.getKeys();
 								    let promise = Promise.resolve();
 								    for (let i = 0, ii = gStateKeys.length; i < ii; i++) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      const key = gStateKeys[i];
 								      const value = gState.get(key);
 								      switch (key) {
 								        case "Type":
 								          break;
 								        case "LW":
 								        case "LC":
 								        case "LJ":
 								        case "ML":
 								        case "D":
 								        case "RI":
 								        case "FL":
 								        case "CA":
 								        case "ca":
 								          gStateObj.push([key, value]);
 								          break;
 								        case "Font":
-												Don't cache /ExtGState entries that contain fonts (PR 12087 follow-up)

I completely overlooked the fact that `PartialEvaluator.handleSetFont` also updates the current `state`, which means that currently we're not actually handling font data correctly for cached /ExtGState data. (Thankfully, using /ExtGState to set a font is somewhat rare in practice.)

											
										
										
											2020-08-17 00:32:15 +09:00
+								          isSimpleGState = false;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          promise = promise.then(() => {
 								            return this.handleSetFont(
 								              resources,
 								              null,
 								              value[0],
 								              operatorList,
 								              task,
 								              stateManager.state
 								            ).then(function (loadedName) {
 								              operatorList.addDependency(loadedName);
 								              gStateObj.push([key, [loadedName, value[1]]]);
 								            });
 								          });
 								          break;
 								        case "BM":
 								          gStateObj.push([key, normalizeBlendMode(value)]);
 								          break;
 								        case "SMask":
 								          if (isName(value, "None")) {
 								            gStateObj.push([key, false]);
-												Make getOperatorList() calls independent and merge queues at end

											
										
										
											2013-04-09 07:14:56 +09:00
+								            break;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          }
-												Prefer `instanceof Dict` rather than calling `isDict()` with one argument

Unless you actually need to check that something is both a `Dict` and also of the *correct* type, using `instanceof Dict` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isDict` helper function for where it makes sense.

											
										
										
											2022-02-21 20:44:56 +09:00
+								          if (value instanceof Dict) {
-												Add local caching of "simple" Graphics State (ExtGState) data in `PartialEvaluator.getOperatorList` (issue 2813)

This patch will help pathological cases the most, with issue 2813 being a particularily problematic example. While there's only *four* `/ExtGState` resources, there's a total `29062` of `setGState` operators. Even though parsing of a single `/ExtGState` resource is quite fast, having to re-parse them thousands of times does add up quite significantly.

For simplicity we'll only cache "simple" `/ExtGState` resource, since e.g. the general `SMask` case cannot be easily cached (without re-factoring other code, which may have undesirable effects on general parsing).

By caching "simple" `/ExtGState` resource, we thus improve performance by:
 - Not having to fetch/validate/parse the same `/ExtGState` data over and over.
 - Handling of repeated `setGState` operators becomes *synchronous* during the `OperatorList` building, instead of having to defer to the event-loop/microtask-queue since the `/ExtGState` parsing is done asynchronously.

---

Obviously I had intended to include (standard) benchmark results with this patch, but for reasons I don't understand the test run-time (even with `master`) of the document in issue 2813 is *a lot* slower than in the development viewer (making normal benchmarking infeasible).
However, testing this manually in the development viewer (using `pdfBug=Stats`) shows a *reduction* of `~10 %` in the rendering time of the PDF document in issue 2813.

											
										
										
											2020-07-11 20:52:11 +09:00
+								            isSimpleGState = false;
-												Replace unnecessary `bind(this)` and `var self = this` statements with arrow functions in `src/core/evaluator.js`

Note that by using `let` instead of `var` in `PartialEvaluator.setGState` and `TranslatedFont.loadType3Data`, we can get rid of further `bind` usages since `let` is block-scoped.
Also, the fact that `bind` wasn't used in the `Font` case inside of `setGState` is actually a bug which has been present ever since PR 5205, where a closure was replaced by a standard loop.[1]

---
[1] I'm not aware of any bugs caused by this, but that is probably more a happy accident than anything else, since e.g. just removing the `bind` from the `SMask` case without using block-scoped variables causes test failures.

											
										
										
											2017-04-30 06:36:43 +09:00
+								            promise = promise.then(() => {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              return this.handleSMask(
 								                value,
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								                resources,
 								                operatorList,
 								                task,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                stateManager,
 								                localColorSpaceCache
 								              );
-												Adds Promise to the getOperatorList

											
										
										
											2014-05-10 10:21:15 +09:00
+								            });
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            gStateObj.push([key, true]);
 								          } else {
 								            warn("Unsupported SMask type");
 								          }
-												Add (basic) support for transfer functions to Images (issue 6931, bug 1149713)

This is *similar* to the existing transfer function support for SMasks, but extended to simple image data.
Please note that the extra amount of data now being sent to the worker-thread, for affected /ExtGState entries, is limited to *at most* 4 `Uint8Array`s each with a length of 256 elements.

Refer to https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G9.1658137 for additional details.

											
										
										
											2020-08-17 15:49:19 +09:00
+								          break;
 								        case "TR":
 								          const transferMaps = this.handleTransferFunction(value);
 								          gStateObj.push([key, transferMaps]);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          break;
 								        // Only generate info log messages for the following since
 								        // they are unlikely to have a big impact on the rendering.
 								        case "OP":
 								        case "op":
 								        case "OPM":
 								        case "BG":
 								        case "BG2":
 								        case "UCR":
 								        case "UCR2":
 								        case "TR2":
 								        case "HT":
 								        case "SM":
 								        case "SA":
 								        case "AIS":
 								        case "TK":
 								          // TODO implement these operators.
 								          info("graphic state operator " + key);
 								          break;
 								        default:
 								          info("Unknown graphic state operator " + key);
 								          break;
-												Make getOperatorList() calls independent and merge queues at end

											
										
										
											2013-04-09 07:14:56 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
 								    return promise.then(function () {
 								      if (gStateObj.length > 0) {
 								        operatorList.addOp(OPS.setGState, [gStateObj]);
-												Cache fonts by reference.

											
										
										
											2013-06-26 02:33:53 +09:00
+								      }
-												Add local caching of "simple" Graphics State (ExtGState) data in `PartialEvaluator.getOperatorList` (issue 2813)

This patch will help pathological cases the most, with issue 2813 being a particularily problematic example. While there's only *four* `/ExtGState` resources, there's a total `29062` of `setGState` operators. Even though parsing of a single `/ExtGState` resource is quite fast, having to re-parse them thousands of times does add up quite significantly.

For simplicity we'll only cache "simple" `/ExtGState` resource, since e.g. the general `SMask` case cannot be easily cached (without re-factoring other code, which may have undesirable effects on general parsing).

By caching "simple" `/ExtGState` resource, we thus improve performance by:
 - Not having to fetch/validate/parse the same `/ExtGState` data over and over.
 - Handling of repeated `setGState` operators becomes *synchronous* during the `OperatorList` building, instead of having to defer to the event-loop/microtask-queue since the `/ExtGState` parsing is done asynchronously.

---

Obviously I had intended to include (standard) benchmark results with this patch, but for reasons I don't understand the test run-time (even with `master`) of the document in issue 2813 is *a lot* slower than in the development viewer (making normal benchmarking infeasible).
However, testing this manually in the development viewer (using `pdfBug=Stats`) shows a *reduction* of `~10 %` in the rendering time of the PDF document in issue 2813.

											
										
										
											2020-07-11 20:52:11 +09:00
 								      if (isSimpleGState) {
 								        localGStateCache.set(cacheKey, gStateRef, gStateObj);
 								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    });
 								  }
-												Attempt to fallback to a default font, for non-available ones, in `PartialEvaluator.loadFont`

This handles the two different ways that fonts can be loaded, either by Name (which is the common case) or by Reference.
Furthermore, this also takes the `ignoreErrors` option into account when deciding whether to fallback or Error.
Finally, by creating a minimal but valid Font dictionary, there's no special-cases necessary in any of the font parsing code.

Co-authored-by: huzjakd <huzjakd@gmail.com>
Co-Authored-By: Jonas Jenwald <jonas.jenwald@gmail.com>

											
										
										
											2019-10-02 21:13:49 +09:00
-												XFA -- Load fonts permanently from the pdf
  - Different fonts can be used in xfa and some of them are embedded in the pdf.
  - Load all the fonts in window.document.

Update src/core/document.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Update src/core/worker.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

											
										
										
											2021-03-26 17:28:18 +09:00
+								  loadFont(
 								    fontName,
 								    font,
 								    resources,
 								    fallbackFontDict = null,
 								    cssFontInfo = null
 								  ) {
-												Stop caching the *parsed* Font data on its `Dict` object (PR 7347 follow-up)

Given that *all* fonts are, ever since PR 7347, now cached in the "normal" `fontCache` there's actually no reason for the special `font.translated` construction. (Given how Objects in JavaScript are references, rather than raw values, the old code shouldn't have caused any significant memory overhead.)

Instead we can simply store the `cacheKey`, which is a simple string, on only the Font `Dict`s where it's needed and thus look-up all fonts using the `fontCache` instead.

											
										
										
											2020-10-17 00:45:01 +09:00
+								    const errorFont = async () => {
 								      return new TranslatedFont({
 								        loadedName: "g_font_error",
 								        font: new ErrorFont(`Font "${fontName}" is not available.`),
 								        dict: font,
-												Handle errors gracefully, in `PartialEvaluator.buildFontPaths`, when glyph path building fails

The building of glyph paths, in the `FontRendererFactory`, can fail in various ways for corrupt font data. However, we're currently not attempting to handle any such errors in the evaluator, which means that a single broken glyph *can* prevent an entire page from rendering.

To address this we simply have to pass along, and check, the existing `ignoreErrors` option in `PartialEvaluator.buildFontPaths` similar to the rest of the `PartialEvaluator` code.

											
										
										
											2021-05-16 01:21:18 +09:00
+								        evaluatorOptions: this.options,
-												Stop caching the *parsed* Font data on its `Dict` object (PR 7347 follow-up)

Given that *all* fonts are, ever since PR 7347, now cached in the "normal" `fontCache` there's actually no reason for the special `font.translated` construction. (Given how Objects in JavaScript are references, rather than raw values, the old code shouldn't have caused any significant memory overhead.)

Instead we can simply store the `cacheKey`, which is a simple string, on only the Font `Dict`s where it's needed and thus look-up all fonts using the `fontCache` instead.

											
										
										
											2020-10-17 00:45:01 +09:00
+								      });
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    };
-												Attempt to fallback to a default font, for non-available ones, in `PartialEvaluator.loadFont`

This handles the two different ways that fonts can be loaded, either by Name (which is the common case) or by Reference.
Furthermore, this also takes the `ignoreErrors` option into account when deciding whether to fallback or Error.
Finally, by creating a minimal but valid Font dictionary, there's no special-cases necessary in any of the font parsing code.

Co-authored-by: huzjakd <huzjakd@gmail.com>
Co-Authored-By: Jonas Jenwald <jonas.jenwald@gmail.com>

											
										
										
											2019-10-02 21:13:49 +09:00
-												Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file

The only *slight* complication here were some of the `switch`-cases, in `getOperatorList`/`getTextContent`, where the parsing is done asynchronously.
However, those cases are easy to deal with by wrapping the code within its own block; please see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/switch#block-scope_variables_within_switch_statements

											
										
										
											2021-05-06 17:08:09 +09:00
+								    const xref = this.xref;
 								    let fontRef;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (font) {
 								      // Loading by ref.
-												Remove the `isRef` helper function

This helper function is not really needed, since it's just a wrapper around a simple `instanceof` check, and it only adds unnecessary indirection in the code.

											
										
										
											2022-02-18 20:11:45 +09:00
+								      if (!(font instanceof Ref)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        throw new FormatError('The "font" object should be a reference.');
-												Return ErrorFont in loadFont when the fontRef is undefined

											
										
										
											2014-06-12 19:46:39 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      fontRef = font;
 								    } else {
 								      // Loading by name.
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const fontRes = resources.get("Font");
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (fontRes) {
 								        fontRef = fontRes.getRaw(fontName);
-												Cache fonts by reference.

											
										
										
											2013-06-26 02:33:53 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
 								    if (!fontRef) {
 								      const partialMsg = `Font "${
 								        fontName || (font && font.toString())
 								      }" is not available`;
-												Cache fonts by reference.

											
										
										
											2013-06-26 02:33:53 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (!this.options.ignoreErrors && !this.parsingType3Font) {
 								        warn(`${partialMsg}.`);
-												Incrementally render by sending the operator list by chunks as they're ready.

											
										
										
											2013-08-01 03:17:36 +09:00
+								        return errorFont();
-												Fix type3 font loading regression.

											
										
										
											2013-05-04 03:13:45 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      // Font not found -- sending unsupported feature notification.
 								      this.handler.send("UnsupportedFeature", {
 								        featureId: UNSUPPORTED_FEATURES.errorFontMissing,
 								      });
 								      warn(`${partialMsg} -- attempting to fallback to a default font.`);
-												Treat fonts with the same font descriptor, encoding and unicode map as aliases

Different fonts can point to the same font descriptor
(see https://github.com/mozilla/pdf.js/issues/4339 for details). With this
commit such fonts are treated as aliases if they have also the same encoding
and the same toUnicode map. The according info is stored on the font descriptor.
This change must also ensure that aliases use always the same font name
because translated fonts can get cleared depending on the CLEANUP_TIMEOUT setting.

											
										
										
											2014-03-04 02:44:45 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      // Falling back to a default font to avoid completely broken rendering,
 								      // but note that there're no guarantees that things will look "correct".
-												Fallback font for buttons must be ZapfDingbats.

Fix bug https://bugzilla.mozilla.org/show_bug.cgi?id=1669099.

											
										
										
											2020-10-22 00:21:33 +09:00
+								      if (fallbackFontDict) {
 								        fontRef = fallbackFontDict;
 								      } else {
 								        fontRef = PartialEvaluator.fallbackFontDict;
 								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Refactors loadFont for translateFont be async; fixes type3 dup data

											
										
										
											2014-05-20 06:27:54 +09:00
-												Prevent circular references in Type3 fonts

In corrupt PDF documents Type3 fonts may introduce circular dependencies, thus resulting in the affected font(s) never loading and parsing/rendering never completing.
Note that I've not seen any real-world examples of this kind of font corruption, but the attached PDF document was rather found in https://github.com/pdf-association/safedocs/tree/main/Miscellaneous%20Targeted%20Test%20PDFs

*Please note:* That repository contains a number of reduced test-cases that are specifically intended to test interoperability (between PDF viewer) and parsing/rendering for various kinds of strange/corrupt PDF documents.
Some of the test-cases found there may thus not make sense to try and "fix" upfront, in my opinion, unless the problems are also found in real-world PDF documents.

											
										
										
											2022-01-14 01:36:36 +09:00
+								    if (this.parsingType3Font && this.type3FontRefs.has(fontRef)) {
 								      return errorFont();
 								    }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (this.fontCache.has(fontRef)) {
 								      return this.fontCache.get(fontRef);
 								    }
-												Adds Promise to the getOperatorList

											
										
										
											2014-05-10 10:21:15 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    font = xref.fetchIfRef(fontRef);
-												Prefer `instanceof Dict` rather than calling `isDict()` with one argument

Unless you actually need to check that something is both a `Dict` and also of the *correct* type, using `instanceof Dict` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isDict` helper function for where it makes sense.

											
										
										
											2022-02-21 20:44:56 +09:00
+								    if (!(font instanceof Dict)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      return errorFont();
 								    }
-												Slightly refactor the `fontRef` handling in `PartialEvaluator_loadFont` (issue 7403 and issue 7402)

Originally, I was just going to change this code to use `Ref_toString` in a couple more places. When I started reading the code, I figured that it wouldn't hurt to clean up a couple of comments. While doing this, I noticed that the logic for the (rare) `isDict(fontRef)` case could do with a few improvements.

There should be no functional changes with this patch, but given the added reference checks, we will now avoid bogus `Ref`s when resolving font aliases. In practice, as issue 7403 shows, the current code can break certain PDF files even if it's very rare.

Note that the only thing that this patch will change, is the `font.loadedName` in the case where a `fontRef` is a reference *and* the font doesn't have a descriptor. Previously for `fontRef = Ref(4, 0)` we'd get `font.loadedName = 'g_d0_f4_0'`, and with this patch `font.loadedName = g_d0_f4R`, which is actually one character shorted in most cases. (Given that `Ref_toString` contains an optimization for the `gen === 0` case, which is by far the most common `gen` value.)

In the already existing fallback case, where the `fontName` is used to when creating the `font.loadedName`, we allow any alphanumeric character. Hence I don't see how (as mentioned above) e.g. `font.loadedName = g_d0_f4R` would be an issue here.

											
										
										
											2016-05-23 22:32:04 +09:00
-												Stop caching the *parsed* Font data on its `Dict` object (PR 7347 follow-up)

Given that *all* fonts are, ever since PR 7347, now cached in the "normal" `fontCache` there's actually no reason for the special `font.translated` construction. (Given how Objects in JavaScript are references, rather than raw values, the old code shouldn't have caused any significant memory overhead.)

Instead we can simply store the `cacheKey`, which is a simple string, on only the Font `Dict`s where it's needed and thus look-up all fonts using the `fontCache` instead.

											
										
										
											2020-10-17 00:45:01 +09:00
+								    // We are holding `font.cacheKey` references only for `fontRef`s that
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // are not actually `Ref`s, but rather `Dict`s. See explanation below.
-												Stop caching the *parsed* Font data on its `Dict` object (PR 7347 follow-up)

Given that *all* fonts are, ever since PR 7347, now cached in the "normal" `fontCache` there's actually no reason for the special `font.translated` construction. (Given how Objects in JavaScript are references, rather than raw values, the old code shouldn't have caused any significant memory overhead.)

Instead we can simply store the `cacheKey`, which is a simple string, on only the Font `Dict`s where it's needed and thus look-up all fonts using the `fontCache` instead.

											
										
										
											2020-10-17 00:45:01 +09:00
+								    if (font.cacheKey && this.fontCache.has(font.cacheKey)) {
 								      return this.fontCache.get(font.cacheKey);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Slightly refactor the `fontRef` handling in `PartialEvaluator_loadFont` (issue 7403 and issue 7402)

Originally, I was just going to change this code to use `Ref_toString` in a couple more places. When I started reading the code, I figured that it wouldn't hurt to clean up a couple of comments. While doing this, I noticed that the logic for the (rare) `isDict(fontRef)` case could do with a few improvements.

There should be no functional changes with this patch, but given the added reference checks, we will now avoid bogus `Ref`s when resolving font aliases. In practice, as issue 7403 shows, the current code can break certain PDF files even if it's very rare.

Note that the only thing that this patch will change, is the `font.loadedName` in the case where a `fontRef` is a reference *and* the font doesn't have a descriptor. Previously for `fontRef = Ref(4, 0)` we'd get `font.loadedName = 'g_d0_f4_0'`, and with this patch `font.loadedName = g_d0_f4R`, which is actually one character shorted in most cases. (Given that `Ref_toString` contains an optimization for the `gen === 0` case, which is by far the most common `gen` value.)

In the already existing fallback case, where the `fontName` is used to when creating the `font.loadedName`, we allow any alphanumeric character. Hence I don't see how (as mentioned above) e.g. `font.loadedName = g_d0_f4R` would be an issue here.

											
										
										
											2016-05-23 22:32:04 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const fontCapability = createPromiseCapability();
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
-												Improve the handling of errors, in `PartialEvaluator.loadFont`, occuring in `PartialEvaluator.preEvaluateFont` (issue 12823)

Currently any errors thrown in `preEvaluateFont`, which is a *synchronous* method, will not be handled at all in the `loadFont` method and we were thus failing to return an `ErrorFont`-instance as intended here.

Also, add an *explicit* check in `PartialEvaluator.preEvaluateFont` to ensure that Type0-fonts always have a *valid* dictionary.

											
										
										
											2021-01-07 19:25:09 +09:00
+								    let preEvaluatedFont;
 								    try {
 								      preEvaluatedFont = this.preEvaluateFont(font);
-												XFA -- Load fonts permanently from the pdf
  - Different fonts can be used in xfa and some of them are embedded in the pdf.
  - Load all the fonts in window.document.

Update src/core/document.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Update src/core/worker.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

											
										
										
											2021-03-26 17:28:18 +09:00
+								      preEvaluatedFont.cssFontInfo = cssFontInfo;
-												Improve the handling of errors, in `PartialEvaluator.loadFont`, occuring in `PartialEvaluator.preEvaluateFont` (issue 12823)

Currently any errors thrown in `preEvaluateFont`, which is a *synchronous* method, will not be handled at all in the `loadFont` method and we were thus failing to return an `ErrorFont`-instance as intended here.

Also, add an *explicit* check in `PartialEvaluator.preEvaluateFont` to ensure that Type0-fonts always have a *valid* dictionary.

											
										
										
											2021-01-07 19:25:09 +09:00
+								    } catch (reason) {
-												Handle errors gracefully, in `PartialEvaluator.translateFont`, when fetching the font file (issue 9462)

The *third* page of the referenced PDF document currently fails to render completely, since one of its font files fail to load.
Since that error isn't handled, a large part of the text is thus missing which looks quite bad. By "replacing" the font data with an *empty* stream, we'll thus be able to fallback to rendering the text with a standard font (instead of using `ErrorFont`). While there's obviously no guarantee that things will look perfect, actually rendering the text at all should be an improvement in general.

Also, print a warning in `PartialEvaluator.loadFont` when the `PartialEvaluator.translateFont` method rejects, since that'd have helped debug/fix the issue faster.

											
										
										
											2021-02-07 01:48:26 +09:00
+								      warn(`loadFont - preEvaluateFont failed: "${reason}".`);
-												Improve the handling of errors, in `PartialEvaluator.loadFont`, occuring in `PartialEvaluator.preEvaluateFont` (issue 12823)

Currently any errors thrown in `preEvaluateFont`, which is a *synchronous* method, will not be handled at all in the `loadFont` method and we were thus failing to return an `ErrorFont`-instance as intended here.

Also, add an *explicit* check in `PartialEvaluator.preEvaluateFont` to ensure that Type0-fonts always have a *valid* dictionary.

											
										
										
											2021-01-07 19:25:09 +09:00
+								      return errorFont();
 								    }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    const { descriptor, hash } = preEvaluatedFont;
-												Remove the `isRef` helper function

This helper function is not really needed, since it's just a wrapper around a simple `instanceof` check, and it only adds unnecessary indirection in the code.

											
										
										
											2022-02-18 20:11:45 +09:00
+								    const fontRefIsRef = fontRef instanceof Ref;
-												Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file

The only *slight* complication here were some of the `switch`-cases, in `getOperatorList`/`getTextContent`, where the parsing is done asynchronously.
However, those cases are easy to deal with by wrapping the code within its own block; please see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/switch#block-scope_variables_within_switch_statements

											
										
										
											2021-05-06 17:08:09 +09:00
+								    let fontID;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (fontRefIsRef) {
-												Re-factor the `idFactory` functionality, used in the `core/`-code, and move the `fontID` generation into it

Note how the `getFontID`-method in `src/core/fonts.js` is *completely* global, rather than properly tied to the current document. This means that if you repeatedly open and parse/render, and then close, even the *same* PDF document the `fontID`s will still be incremented continuously.

For comparison the `createObjId` method, on `idFactory`, will always create a *consistent* id, assuming of course that the document and its pages are parsed/rendered in the same order.

In order to address this inconsistency, it thus seems reasonable to add a new `createFontId` method on the `idFactory` and use that when obtaining `fontID`s. (When the current `getFontID` method was added the `idFactory` didn't actually exist yet, which explains why the code looks the way it does.)
*Please note:* Since the document id is (still) part of the `loadedName`, it's thus not possible for different documents to have identical font names.

											
										
										
											2020-07-07 23:00:05 +09:00
+								      fontID = `f${fontRef.toString()}`;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Prefer `instanceof Dict` rather than calling `isDict()` with one argument

Unless you actually need to check that something is both a `Dict` and also of the *correct* type, using `instanceof Dict` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isDict` helper function for where it makes sense.

											
										
										
											2022-02-21 20:44:56 +09:00
+								    if (hash && descriptor instanceof Dict) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (!descriptor.fontAliases) {
 								        descriptor.fontAliases = Object.create(null);
 								      }
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const fontAliases = descriptor.fontAliases;
-												Treat fonts with the same font descriptor, encoding and unicode map as aliases

Different fonts can point to the same font descriptor
(see https://github.com/mozilla/pdf.js/issues/4339 for details). With this
commit such fonts are treated as aliases if they have also the same encoding
and the same toUnicode map. The according info is stored on the font descriptor.
This change must also ensure that aliases use always the same font name
because translated fonts can get cleared depending on the CLEANUP_TIMEOUT setting.

											
										
										
											2014-03-04 02:44:45 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (fontAliases[hash]) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        const aliasFontRef = fontAliases[hash].aliasRef;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        if (fontRefIsRef && aliasFontRef && this.fontCache.has(aliasFontRef)) {
 								          this.fontCache.putAlias(fontRef, aliasFontRef);
 								          return this.fontCache.get(fontRef);
-												Slightly refactor the `fontRef` handling in `PartialEvaluator_loadFont` (issue 7403 and issue 7402)

Originally, I was just going to change this code to use `Ref_toString` in a couple more places. When I started reading the code, I figured that it wouldn't hurt to clean up a couple of comments. While doing this, I noticed that the logic for the (rare) `isDict(fontRef)` case could do with a few improvements.

There should be no functional changes with this patch, but given the added reference checks, we will now avoid bogus `Ref`s when resolving font aliases. In practice, as issue 7403 shows, the current code can break certain PDF files even if it's very rare.

Note that the only thing that this patch will change, is the `font.loadedName` in the case where a `fontRef` is a reference *and* the font doesn't have a descriptor. Previously for `fontRef = Ref(4, 0)` we'd get `font.loadedName = 'g_d0_f4_0'`, and with this patch `font.loadedName = g_d0_f4R`, which is actually one character shorted in most cases. (Given that `Ref_toString` contains an optimization for the `gen === 0` case, which is by far the most common `gen` value.)

In the already existing fallback case, where the `fontName` is used to when creating the `font.loadedName`, we allow any alphanumeric character. Hence I don't see how (as mentioned above) e.g. `font.loadedName = g_d0_f4R` would be an issue here.

											
										
										
											2016-05-23 22:32:04 +09:00
+								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      } else {
 								        fontAliases[hash] = {
-												Re-factor the `idFactory` functionality, used in the `core/`-code, and move the `fontID` generation into it

Note how the `getFontID`-method in `src/core/fonts.js` is *completely* global, rather than properly tied to the current document. This means that if you repeatedly open and parse/render, and then close, even the *same* PDF document the `fontID`s will still be incremented continuously.

For comparison the `createObjId` method, on `idFactory`, will always create a *consistent* id, assuming of course that the document and its pages are parsed/rendered in the same order.

In order to address this inconsistency, it thus seems reasonable to add a new `createFontId` method on the `idFactory` and use that when obtaining `fontID`s. (When the current `getFontID` method was added the `idFactory` didn't actually exist yet, which explains why the code looks the way it does.)
*Please note:* Since the document id is (still) part of the `loadedName`, it's thus not possible for different documents to have identical font names.

											
										
										
											2020-07-07 23:00:05 +09:00
+								          fontID: this.idFactory.createFontId(),
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        };
-												Treat fonts with the same font descriptor, encoding and unicode map as aliases

Different fonts can point to the same font descriptor
(see https://github.com/mozilla/pdf.js/issues/4339 for details). With this
commit such fonts are treated as aliases if they have also the same encoding
and the same toUnicode map. The according info is stored on the font descriptor.
This change must also ensure that aliases use always the same font name
because translated fonts can get cleared depending on the CLEANUP_TIMEOUT setting.

											
										
										
											2014-03-04 02:44:45 +09:00
+								      }
-												Slightly refactor the `fontRef` handling in `PartialEvaluator_loadFont` (issue 7403 and issue 7402)

Originally, I was just going to change this code to use `Ref_toString` in a couple more places. When I started reading the code, I figured that it wouldn't hurt to clean up a couple of comments. While doing this, I noticed that the logic for the (rare) `isDict(fontRef)` case could do with a few improvements.

There should be no functional changes with this patch, but given the added reference checks, we will now avoid bogus `Ref`s when resolving font aliases. In practice, as issue 7403 shows, the current code can break certain PDF files even if it's very rare.

Note that the only thing that this patch will change, is the `font.loadedName` in the case where a `fontRef` is a reference *and* the font doesn't have a descriptor. Previously for `fontRef = Ref(4, 0)` we'd get `font.loadedName = 'g_d0_f4_0'`, and with this patch `font.loadedName = g_d0_f4R`, which is actually one character shorted in most cases. (Given that `Ref_toString` contains an optimization for the `gen === 0` case, which is by far the most common `gen` value.)

In the already existing fallback case, where the `fontName` is used to when creating the `font.loadedName`, we allow any alphanumeric character. Hence I don't see how (as mentioned above) e.g. `font.loadedName = g_d0_f4R` would be an issue here.

											
										
										
											2016-05-23 22:32:04 +09:00
+								      if (fontRefIsRef) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        fontAliases[hash].aliasRef = fontRef;
-												Fix loading of fonts that are not referenced by an object identifier

											
										
										
											2013-12-17 08:19:31 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      fontID = fontAliases[hash].fontID;
 								    }
-												Fix type3 font loading regression.

											
										
										
											2013-05-04 03:13:45 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // Workaround for bad PDF generators that reference fonts incorrectly,
 								    // where `fontRef` is a `Dict` rather than a `Ref` (fixes bug946506.pdf).
-												Stop caching the *parsed* Font data on its `Dict` object (PR 7347 follow-up)

Given that *all* fonts are, ever since PR 7347, now cached in the "normal" `fontCache` there's actually no reason for the special `font.translated` construction. (Given how Objects in JavaScript are references, rather than raw values, the old code shouldn't have caused any significant memory overhead.)

Instead we can simply store the `cacheKey`, which is a simple string, on only the Font `Dict`s where it's needed and thus look-up all fonts using the `fontCache` instead.

											
										
										
											2020-10-17 00:45:01 +09:00
+								    // In this case we cannot put the font into `this.fontCache` (which is
 								    // a `RefSetCache`), since it's not possible to use a `Dict` as a key.
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    //
 								    // However, if we don't cache the font it's not possible to remove it
 								    // when `cleanup` is triggered from the API, which causes issues on
-												Stop caching the *parsed* Font data on its `Dict` object (PR 7347 follow-up)

Given that *all* fonts are, ever since PR 7347, now cached in the "normal" `fontCache` there's actually no reason for the special `font.translated` construction. (Given how Objects in JavaScript are references, rather than raw values, the old code shouldn't have caused any significant memory overhead.)

Instead we can simply store the `cacheKey`, which is a simple string, on only the Font `Dict`s where it's needed and thus look-up all fonts using the `fontCache` instead.

											
										
										
											2020-10-17 00:45:01 +09:00
+								    // subsequent rendering operations (see issue7403.pdf) and would force us
 								    // to unnecessarily load the same fonts over and over.
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    //
-												Stop caching the *parsed* Font data on its `Dict` object (PR 7347 follow-up)

Given that *all* fonts are, ever since PR 7347, now cached in the "normal" `fontCache` there's actually no reason for the special `font.translated` construction. (Given how Objects in JavaScript are references, rather than raw values, the old code shouldn't have caused any significant memory overhead.)

Instead we can simply store the `cacheKey`, which is a simple string, on only the Font `Dict`s where it's needed and thus look-up all fonts using the `fontCache` instead.

											
										
										
											2020-10-17 00:45:01 +09:00
+								    // Instead, we cheat a bit by using a modified `fontID` as a key in
 								    // `this.fontCache`, to allow the font to be cached.
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // NOTE: This works because `RefSetCache` calls `toString()` on provided
 								    //       keys. Also, since `fontRef` is used when getting cached fonts,
 								    //       we'll not accidentally match fonts cached with the `fontID`.
 								    if (fontRefIsRef) {
 								      this.fontCache.put(fontRef, fontCapability.promise);
 								    } else {
 								      if (!fontID) {
-												Re-factor the `idFactory` functionality, used in the `core/`-code, and move the `fontID` generation into it

Note how the `getFontID`-method in `src/core/fonts.js` is *completely* global, rather than properly tied to the current document. This means that if you repeatedly open and parse/render, and then close, even the *same* PDF document the `fontID`s will still be incremented continuously.

For comparison the `createObjId` method, on `idFactory`, will always create a *consistent* id, assuming of course that the document and its pages are parsed/rendered in the same order.

In order to address this inconsistency, it thus seems reasonable to add a new `createFontId` method on the `idFactory` and use that when obtaining `fontID`s. (When the current `getFontID` method was added the `idFactory` didn't actually exist yet, which explains why the code looks the way it does.)
*Please note:* Since the document id is (still) part of the `loadedName`, it's thus not possible for different documents to have identical font names.

											
										
										
											2020-07-07 23:00:05 +09:00
+								        fontID = this.idFactory.createFontId();
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      }
-												Stop caching the *parsed* Font data on its `Dict` object (PR 7347 follow-up)

Given that *all* fonts are, ever since PR 7347, now cached in the "normal" `fontCache` there's actually no reason for the special `font.translated` construction. (Given how Objects in JavaScript are references, rather than raw values, the old code shouldn't have caused any significant memory overhead.)

Instead we can simply store the `cacheKey`, which is a simple string, on only the Font `Dict`s where it's needed and thus look-up all fonts using the `fontCache` instead.

											
										
										
											2020-10-17 00:45:01 +09:00
+								      font.cacheKey = `cacheKey_${fontID}`;
 								      this.fontCache.put(font.cacheKey, fontCapability.promise);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Re-factor the `idFactory` functionality, used in the `core/`-code, and move the `fontID` generation into it

Note how the `getFontID`-method in `src/core/fonts.js` is *completely* global, rather than properly tied to the current document. This means that if you repeatedly open and parse/render, and then close, even the *same* PDF document the `fontID`s will still be incremented continuously.

For comparison the `createObjId` method, on `idFactory`, will always create a *consistent* id, assuming of course that the document and its pages are parsed/rendered in the same order.

In order to address this inconsistency, it thus seems reasonable to add a new `createFontId` method on the `idFactory` and use that when obtaining `fontID`s. (When the current `getFontID` method was added the `idFactory` didn't actually exist yet, which explains why the code looks the way it does.)
*Please note:* Since the document id is (still) part of the `loadedName`, it's thus not possible for different documents to have identical font names.

											
										
										
											2020-07-07 23:00:05 +09:00
+								    assert(
 								      fontID && fontID.startsWith("f"),
 								      'The "fontID" must be (correctly) defined.'
 								    );
-												Cleanup the way getIRQueue is called (no need to pass in a queue object in most cases anymore)

											
										
										
											2012-02-21 22:28:42 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // Keep track of each font we translated so the caller can
 								    // load them asynchronously before calling display on a page.
-												Re-factor the `idFactory` functionality, used in the `core/`-code, and move the `fontID` generation into it

Note how the `getFontID`-method in `src/core/fonts.js` is *completely* global, rather than properly tied to the current document. This means that if you repeatedly open and parse/render, and then close, even the *same* PDF document the `fontID`s will still be incremented continuously.

For comparison the `createObjId` method, on `idFactory`, will always create a *consistent* id, assuming of course that the document and its pages are parsed/rendered in the same order.

In order to address this inconsistency, it thus seems reasonable to add a new `createFontId` method on the `idFactory` and use that when obtaining `fontID`s. (When the current `getFontID` method was added the `idFactory` didn't actually exist yet, which explains why the code looks the way it does.)
*Please note:* Since the document id is (still) part of the `loadedName`, it's thus not possible for different documents to have identical font names.

											
										
										
											2020-07-07 23:00:05 +09:00
+								    font.loadedName = `${this.idFactory.getDocId()}_${fontID}`;
-												Make getOperatorList() calls independent and merge queues at end

											
										
										
											2013-04-09 07:14:56 +09:00
-												Convert `PartialEvaluator.translateFont` to an `async` method

This allows us to make a slight simplification in `PartialEvaluator.loadFont`, which thus removes an old TODO-comment from the method.
Furthermore, in `PartialEvaluator.translateFont`, the CMap-handling is now limited to only *composite* fonts to avoid having to wait for a "dummy"-Promise for most fonts.

											
										
										
											2020-10-15 16:30:54 +09:00
+								    this.translateFont(preEvaluatedFont)
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      .then(translatedFont => {
 								        if (translatedFont.fontType !== undefined) {
-												[api-minor] Replace `PDFDocumentProxy.getStats` with a synchronous `PDFDocumentProxy.stats` getter

*Please note:* These changes will primarily benefit longer documents, somewhat at the expense of e.g. one-page documents.

The existing `PDFDocumentProxy.getStats` function, which in the default viewer is called for each rendered page, requires a round-trip to the worker-thread in order to obtain the current document stats. In the default viewer, we currently make one such API-call for *every rendered* page.
This patch proposes replacing that method with a *synchronous* `PDFDocumentProxy.stats` getter instead, combined with re-factoring the worker-thread code by adding a `DocStats`-class to track Stream/Font-types and *only send* them to the main-thread *the first time* that a type is encountered.

Note that in practice most PDF documents only use a fairly limited number of Stream/Font-types, which means that in longer documents most of the `PDFDocumentProxy.getStats`-calls will return the same data.[1]
This re-factoring will obviously benefit longer document the most[2], and could actually be seen as a regression for one-page documents, since in practice there'll usually be a couple of "DocStats" messages sent during the parsing of the first page. However, if the user zooms/rotates the document (which causes re-rendering), note that even a one-page document would start to benefit from these changes.

Another benefit of having the data available/cached in the API is that unless the document stats change during parsing, repeated `PDFDocumentProxy.stats`-calls will return *the same identical* object.
This is something that we can easily take advantage of in the default viewer, by now *only* reporting "documentStats" telemetry[3] when the data actually have changed rather than once per rendered page (again beneficial in longer documents).

---
[1] Furthermore, the maximium number of `StreamType`/`FontType` are `10` respectively `12`, which means that regardless of the complexity and page count in a PDF document there'll never be more than twenty-two "DocStats" messages sent; see https://github.com/mozilla/pdf.js/blob/41ac3f0c07128bf34baccdcc067a108c712fd6ef/src/shared/util.js#L206-L232

[2] One example is the `pdf.pdf` document in the test-suite, where rendering all of its 1310 pages only result in a total of seven "DocStats" messages being sent from the worker-thread.

[3] Reporting telemetry, in Firefox, includes using `JSON.stringify` on the data and then sending an event to the `PdfStreamConverter.jsm`-code.
In that code the event is handled and `JSON.parse` is used to retrieve the data, and in the "documentStats"-case we'll then iterate through the data to avoid double-reporting telemetry; see https://searchfox.org/mozilla-central/rev/8f4c180b87e52f3345ef8a3432d6e54bd1eb18dc/toolkit/components/pdfjs/content/PdfStreamConverter.jsm#515-549

											
										
										
											2021-11-12 02:14:26 +09:00
+								          xref.stats.addFontType(translatedFont.fontType);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        }
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        fontCapability.resolve(
 								          new TranslatedFont({
 								            loadedName: font.loadedName,
 								            font: translatedFont,
 								            dict: font,
-												Handle errors gracefully, in `PartialEvaluator.buildFontPaths`, when glyph path building fails

The building of glyph paths, in the `FontRendererFactory`, can fail in various ways for corrupt font data. However, we're currently not attempting to handle any such errors in the evaluator, which means that a single broken glyph *can* prevent an entire page from rendering.

To address this we simply have to pass along, and check, the existing `ignoreErrors` option in `PartialEvaluator.buildFontPaths` similar to the rest of the `PartialEvaluator` code.

											
										
										
											2021-05-16 01:21:18 +09:00
+								            evaluatorOptions: this.options,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          })
 								        );
 								      })
 								      .catch(reason => {
 								        // TODO fontCapability.reject?
 								        // Error in the font data -- sending unsupported feature notification.
 								        this.handler.send("UnsupportedFeature", {
 								          featureId: UNSUPPORTED_FEATURES.errorFontTranslate,
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								        });
-												Handle errors gracefully, in `PartialEvaluator.translateFont`, when fetching the font file (issue 9462)

The *third* page of the referenced PDF document currently fails to render completely, since one of its font files fail to load.
Since that error isn't handled, a large part of the text is thus missing which looks quite bad. By "replacing" the font data with an *empty* stream, we'll thus be able to fallback to rendering the text with a standard font (instead of using `ErrorFont`). While there's obviously no guarantee that things will look perfect, actually rendering the text at all should be an improvement in general.

Also, print a warning in `PartialEvaluator.loadFont` when the `PartialEvaluator.translateFont` method rejects, since that'd have helped debug/fix the issue faster.

											
										
										
											2021-02-07 01:48:26 +09:00
+								        warn(`loadFont - translateFont failed: "${reason}".`);
-												Make getOperatorList() calls independent and merge queues at end

											
										
										
											2013-04-09 07:14:56 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        try {
 								          // error, but it's still nice to have font type reported
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								          const fontFile3 = descriptor && descriptor.get("FontFile3");
 								          const subtype = fontFile3 && fontFile3.get("Subtype");
 								          const fontType = getFontType(
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            preEvaluatedFont.type,
 								            subtype && subtype.name
 								          );
-												[api-minor] Replace `PDFDocumentProxy.getStats` with a synchronous `PDFDocumentProxy.stats` getter

*Please note:* These changes will primarily benefit longer documents, somewhat at the expense of e.g. one-page documents.

The existing `PDFDocumentProxy.getStats` function, which in the default viewer is called for each rendered page, requires a round-trip to the worker-thread in order to obtain the current document stats. In the default viewer, we currently make one such API-call for *every rendered* page.
This patch proposes replacing that method with a *synchronous* `PDFDocumentProxy.stats` getter instead, combined with re-factoring the worker-thread code by adding a `DocStats`-class to track Stream/Font-types and *only send* them to the main-thread *the first time* that a type is encountered.

Note that in practice most PDF documents only use a fairly limited number of Stream/Font-types, which means that in longer documents most of the `PDFDocumentProxy.getStats`-calls will return the same data.[1]
This re-factoring will obviously benefit longer document the most[2], and could actually be seen as a regression for one-page documents, since in practice there'll usually be a couple of "DocStats" messages sent during the parsing of the first page. However, if the user zooms/rotates the document (which causes re-rendering), note that even a one-page document would start to benefit from these changes.

Another benefit of having the data available/cached in the API is that unless the document stats change during parsing, repeated `PDFDocumentProxy.stats`-calls will return *the same identical* object.
This is something that we can easily take advantage of in the default viewer, by now *only* reporting "documentStats" telemetry[3] when the data actually have changed rather than once per rendered page (again beneficial in longer documents).

---
[1] Furthermore, the maximium number of `StreamType`/`FontType` are `10` respectively `12`, which means that regardless of the complexity and page count in a PDF document there'll never be more than twenty-two "DocStats" messages sent; see https://github.com/mozilla/pdf.js/blob/41ac3f0c07128bf34baccdcc067a108c712fd6ef/src/shared/util.js#L206-L232

[2] One example is the `pdf.pdf` document in the test-suite, where rendering all of its 1310 pages only result in a total of seven "DocStats" messages being sent from the worker-thread.

[3] Reporting telemetry, in Firefox, includes using `JSON.stringify` on the data and then sending an event to the `PdfStreamConverter.jsm`-code.
In that code the event is handled and `JSON.parse` is used to retrieve the data, and in the "documentStats"-case we'll then iterate through the data to avoid double-reporting telemetry; see https://searchfox.org/mozilla-central/rev/8f4c180b87e52f3345ef8a3432d6e54bd1eb18dc/toolkit/components/pdfjs/content/PdfStreamConverter.jsm#515-549

											
										
										
											2021-11-12 02:14:26 +09:00
+								          if (fontType !== undefined) {
 								            xref.stats.addFontType(fontType);
 								          }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        } catch (ex) {}
-												Attempt to handle corrupt PDF documents that contains path operators inside of text object (issue 10542)

First of all, while this simple approach appears to work OK in practice I'm not sure if it's the best way of addressing the problem (assuming that you even want to).
Second of all, while the solution implemented here only requires tracking/checking one new boolean in order for this to work, I'm nonetheless not entirely happy about this since it will add additional overhead (albeit *very* small) to the parsing of path operators in PDF documents just for a handful of *corrupt* ones.

											
										
										
											2019-04-22 00:03:38 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        fontCapability.resolve(
 								          new TranslatedFont({
 								            loadedName: font.loadedName,
 								            font: new ErrorFont(
 								              reason instanceof Error ? reason.message : reason
 								            ),
 								            dict: font,
-												Handle errors gracefully, in `PartialEvaluator.buildFontPaths`, when glyph path building fails

The building of glyph paths, in the `FontRendererFactory`, can fail in various ways for corrupt font data. However, we're currently not attempting to handle any such errors in the evaluator, which means that a single broken glyph *can* prevent an entire page from rendering.

To address this we simply have to pass along, and check, the existing `ignoreErrors` option in `PartialEvaluator.buildFontPaths` similar to the rest of the `PartialEvaluator` code.

											
										
										
											2021-05-16 01:21:18 +09:00
+								            evaluatorOptions: this.options,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          })
 								        );
 								      });
 								    return fontCapability.promise;
 								  }
-												Attempt to handle corrupt PDF documents that contains path operators inside of text object (issue 10542)

First of all, while this simple approach appears to work OK in practice I'm not sure if it's the best way of addressing the problem (assuming that you even want to).
Second of all, while the solution implemented here only requires tracking/checking one new boolean in order for this to work, I'm nonetheless not entirely happy about this since it will add additional overhead (albeit *very* small) to the parsing of path operators in PDF documents just for a handful of *corrupt* ones.

											
										
										
											2019-04-22 00:03:38 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  buildPath(operatorList, fn, args, parsingText = false) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const lastIndex = operatorList.length - 1;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (!args) {
 								      args = [];
 								    }
 								    if (
 								      lastIndex < 0 ||
 								      operatorList.fnArray[lastIndex] !== OPS.constructPath
 								    ) {
 								      // Handle corrupt PDF documents that contains path operators inside of
 								      // text objects, which may shift subsequent text, by enclosing the path
 								      // operator in save/restore operators (fixes issue10542_reduced.pdf).
 								      //
 								      // Note that this will effectively disable the optimization in the
 								      // `else` branch below, but given that this type of corruption is
 								      // *extremely* rare that shouldn't really matter much in practice.
 								      if (parsingText) {
 								        warn(`Encountered path operator "${fn}" inside of a text object.`);
 								        operatorList.addOp(OPS.save, null);
-												Groups path commands into single command

											
										
										
											2014-04-30 23:09:04 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      operatorList.addOp(OPS.constructPath, [[fn], args]);
-												Allow skipping of errors when parsing broken/unsupported ColorSpaces (issue 6707, issue 11287)

This will allow us to attempt to recover as much as possible of a page, rather than immediately failing, when a broken/unsupported ColorSpace is encountered. This patch thus extends the framework added in PRs such as e.g. 8240 and 8922, to also cover parsing of ColorSpaces.

											
										
										
											2019-10-31 23:53:51 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (parsingText) {
 								        operatorList.addOp(OPS.restore, null);
 								      }
 								    } else {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const opArgs = operatorList.argsArray[lastIndex];
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      opArgs[0].push(fn);
 								      Array.prototype.push.apply(opArgs[1], args);
 								    }
 								  }
 								  parseColorSpace({ cs, resources, localColorSpaceCache }) {
 								    return ColorSpace.parseAsync({
-												Improve (local) caching of parsed `ColorSpace`s (PR 12001 follow-up)

This patch contains the following *notable* improvements:
 - Changes the `ColorSpace.parse` call-sites to, where possible, pass in a reference rather than actual ColorSpace data (necessary for the next point).
 - Adds (local) caching of `ColorSpace`s by `Ref`, when applicable, in addition the caching by name. This (generally) improves `ColorSpace` caching for e.g. the SMask code-paths.
 - Extends the (local) `ColorSpace` caching to also apply when handling Images and Patterns, thus further reducing unneeded re-parsing.
 - Adds a new `ColorSpace.parseAsync` method, almost identical to the existing `ColorSpace.parse` one, but returning a Promise instead (this simplifies some code in the `PartialEvaluator`).

											
										
										
											2020-06-18 01:45:11 +09:00
+								      cs,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      xref: this.xref,
-												Improve (local) caching of parsed `ColorSpace`s (PR 12001 follow-up)

This patch contains the following *notable* improvements:
 - Changes the `ColorSpace.parse` call-sites to, where possible, pass in a reference rather than actual ColorSpace data (necessary for the next point).
 - Adds (local) caching of `ColorSpace`s by `Ref`, when applicable, in addition the caching by name. This (generally) improves `ColorSpace` caching for e.g. the SMask code-paths.
 - Extends the (local) `ColorSpace` caching to also apply when handling Images and Patterns, thus further reducing unneeded re-parsing.
 - Adds a new `ColorSpace.parseAsync` method, almost identical to the existing `ColorSpace.parse` one, but returning a Promise instead (this simplifies some code in the `PartialEvaluator`).

											
										
										
											2020-06-18 01:45:11 +09:00
+								      resources,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      pdfFunctionFactory: this._pdfFunctionFactory,
 								      localColorSpaceCache,
 								    }).catch(reason => {
 								      if (reason instanceof AbortException) {
 								        return null;
 								      }
 								      if (this.options.ignoreErrors) {
 								        // Error(s) in the ColorSpace -- sending unsupported feature
 								        // notification and allow rendering to continue.
 								        this.handler.send("UnsupportedFeature", {
 								          featureId: UNSUPPORTED_FEATURES.errorColorSpace,
 								        });
 								        warn(`parseColorSpace - ignoring ColorSpace: "${reason}".`);
 								        return null;
-												Moves ColorSpace logic into evaluator

											
										
										
											2014-05-22 02:47:42 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      throw reason;
 								    });
 								  }
-												Moves ColorSpace logic into evaluator

											
										
										
											2014-05-22 02:47:42 +09:00
-												Improve performance of reused patterns.

Bug 1721218 has a shading pattern that was used thousands of times.
To improve performance of this PDF:
 - add a cache for patterns in the evaluator and only send the IR form once
   to the main thread (this also makes caching in canvas easier)
 - cache the created canvas radial/axial patterns
 - for shading fill radial/axial use the pattern directly instead of creating temporary
   canvas

											
										
										
											2021-07-22 04:27:39 +09:00
+								  parseShading({
 								    shading,
 								    resources,
 								    localColorSpaceCache,
 								    localShadingPatternCache,
 								  }) {
 								    // Shadings and patterns may be referenced by the same name but the resource
 								    // dictionary could be different so we can't use the name for the cache key.
-												Improve caching of shading patterns. (bug 1721949)

The PDF in bug 1721949 uses many unique pattern objects
that references the same shading many times. This caused
a new canvas pattern to be created and cached many times
driving up memory use.

To fix, I've changed the cache in the worker to key off the
shading object and instead send the shading and matrix
separately. While that worked well to fix the above bug,
there could be PDFs that use many shading that could
cause memory issues, so I've also added a LRU cache
on the main thread for canvas patterns. This should prevent
memory use from getting too high.

											
										
										
											2021-07-28 11:58:06 +09:00
+								    let id = localShadingPatternCache.get(shading);
-												Improve performance of reused patterns.

Bug 1721218 has a shading pattern that was used thousands of times.
To improve performance of this PDF:
 - add a cache for patterns in the evaluator and only send the IR form once
   to the main thread (this also makes caching in canvas easier)
 - cache the created canvas radial/axial patterns
 - for shading fill radial/axial use the pattern directly instead of creating temporary
   canvas

											
										
										
											2021-07-22 04:27:39 +09:00
+								    if (!id) {
 								      var shadingFill = Pattern.parseShading(
 								        shading,
 								        this.xref,
 								        resources,
 								        this.handler,
 								        this._pdfFunctionFactory,
 								        localColorSpaceCache
 								      );
 								      const patternIR = shadingFill.getIR();
 								      id = `pattern_${this.idFactory.createObjId()}`;
-												Improve caching of shading patterns. (bug 1721949)

The PDF in bug 1721949 uses many unique pattern objects
that references the same shading many times. This caused
a new canvas pattern to be created and cached many times
driving up memory use.

To fix, I've changed the cache in the worker to key off the
shading object and instead send the shading and matrix
separately. While that worked well to fix the above bug,
there could be PDFs that use many shading that could
cause memory issues, so I've also added a LRU cache
on the main thread for canvas patterns. This should prevent
memory use from getting too high.

											
										
										
											2021-07-28 11:58:06 +09:00
+								      localShadingPatternCache.set(shading, id);
-												Improve performance of reused patterns.

Bug 1721218 has a shading pattern that was used thousands of times.
To improve performance of this PDF:
 - add a cache for patterns in the evaluator and only send the IR form once
   to the main thread (this also makes caching in canvas easier)
 - cache the created canvas radial/axial patterns
 - for shading fill radial/axial use the pattern directly instead of creating temporary
   canvas

											
										
										
											2021-07-22 04:27:39 +09:00
+								      this.handler.send("obj", [id, this.pageIndex, "Pattern", patternIR]);
 								    }
 								    return id;
 								  }
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								  handleColorN(
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    operatorList,
 								    fn,
 								    args,
 								    cs,
 								    patterns,
 								    resources,
 								    task,
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								    localColorSpaceCache,
-												Improve performance of reused patterns.

Bug 1721218 has a shading pattern that was used thousands of times.
To improve performance of this PDF:
 - add a cache for patterns in the evaluator and only send the IR form once
   to the main thread (this also makes caching in canvas easier)
 - cache the created canvas radial/axial patterns
 - for shading fill radial/axial use the pattern directly instead of creating temporary
   canvas

											
										
										
											2021-07-22 04:27:39 +09:00
+								    localTilingPatternCache,
 								    localShadingPatternCache
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  ) {
 								    // compile tiling patterns
-												Improve argument/name handling when parsing TilingPatterns (PR 12458 follow-up)

 - Handle the arguments correctly in `PartialEvaluator.handleColorN`.
   For TilingPatterns with a base-ColorSpace, we're currently using the `args` when computing the color. However, as can be seen we're passing the Array as-is to the `ColorSpace.getRgb` method, which means that the `Name` is included as well.[1]
   Thankfully this hasn't, as far as I know, caused any actual bugs, but that may be more luck than anything else given how the `ColorSpace` code is implemented. This can be easily fixed though, simply by popping the `Name`-object off of the `args` Array.

 - Cache TilingPatterns using the `Name`-string, rather than the object directly.
   This is not only consistent with other caches in `PartialEvaluator`, but importantly it also ensures that the cache lookup always works correctly. Note that since `Name`-objects, similar to other primitives, uses a cache themselves a *manually* triggered `cleanup`-call could thus (theoretically) cause the `LocalTilingPatternCache` to not find an existing entry. While the likelihood of this happening is *extremely* small, it's still something that we should fix.

---
[1] The `args` Array can e.g. look like this: `[0.043, 0.09, 0.188, 0.004, /P1]`, which means that we're passing in the `Name`-object to the `ColorSpace` method.

											
										
										
											2020-10-24 20:29:48 +09:00
+								    const patternName = args.pop();
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // SCN/scn applies patterns along with normal colors
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								    if (patternName instanceof Name) {
-												Re-factor the `LocalTilingPatternCache` to cache by Ref rather than Name (PR 12458 follow-up, issue 13780)

This way there cannot be any *incorrect* cache hits, since Refs are guaranteed to be unique.
Please note that the reason for caching by Ref rather than doing something along the lines of the `localShadingPatternCache` (which uses a `Map` directly), is that TilingPatterns are streams and those cannot be cached on the `XRef`-instance (this way we avoid unnecessary parsing).

											
										
										
											2021-08-18 19:49:01 +09:00
+								      const rawPattern = patterns.getRaw(patternName.name);
-												Improve argument/name handling when parsing TilingPatterns (PR 12458 follow-up)

 - Handle the arguments correctly in `PartialEvaluator.handleColorN`.
   For TilingPatterns with a base-ColorSpace, we're currently using the `args` when computing the color. However, as can be seen we're passing the Array as-is to the `ColorSpace.getRgb` method, which means that the `Name` is included as well.[1]
   Thankfully this hasn't, as far as I know, caused any actual bugs, but that may be more luck than anything else given how the `ColorSpace` code is implemented. This can be easily fixed though, simply by popping the `Name`-object off of the `args` Array.

 - Cache TilingPatterns using the `Name`-string, rather than the object directly.
   This is not only consistent with other caches in `PartialEvaluator`, but importantly it also ensures that the cache lookup always works correctly. Note that since `Name`-objects, similar to other primitives, uses a cache themselves a *manually* triggered `cleanup`-call could thus (theoretically) cause the `LocalTilingPatternCache` to not find an existing entry. While the likelihood of this happening is *extremely* small, it's still something that we should fix.

---
[1] The `args` Array can e.g. look like this: `[0.043, 0.09, 0.188, 0.004, /P1]`, which means that we're passing in the `Name`-object to the `ColorSpace` method.

											
										
										
											2020-10-24 20:29:48 +09:00
-												Re-factor the `LocalTilingPatternCache` to cache by Ref rather than Name (PR 12458 follow-up, issue 13780)

This way there cannot be any *incorrect* cache hits, since Refs are guaranteed to be unique.
Please note that the reason for caching by Ref rather than doing something along the lines of the `localShadingPatternCache` (which uses a `Map` directly), is that TilingPatterns are streams and those cannot be cached on the `XRef`-instance (this way we avoid unnecessary parsing).

											
										
										
											2021-08-18 19:49:01 +09:00
+								      const localTilingPattern =
 								        rawPattern instanceof Ref &&
 								        localTilingPatternCache.getByRef(rawPattern);
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								      if (localTilingPattern) {
 								        try {
 								          const color = cs.base ? cs.base.getRgb(args, 0) : null;
 								          const tilingPatternIR = getTilingPatternIR(
 								            localTilingPattern.operatorListIR,
 								            localTilingPattern.dict,
 								            color
 								          );
 								          operatorList.addOp(fn, tilingPatternIR);
 								          return undefined;
 								        } catch (ex) {
 								          // Handle any errors during normal TilingPattern parsing.
 								        }
 								      }
-												Re-factor the `LocalTilingPatternCache` to cache by Ref rather than Name (PR 12458 follow-up, issue 13780)

This way there cannot be any *incorrect* cache hits, since Refs are guaranteed to be unique.
Please note that the reason for caching by Ref rather than doing something along the lines of the `localShadingPatternCache` (which uses a `Map` directly), is that TilingPatterns are streams and those cannot be cached on the `XRef`-instance (this way we avoid unnecessary parsing).

											
										
										
											2021-08-18 19:49:01 +09:00
+								      const pattern = this.xref.fetchIfRef(rawPattern);
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								      if (pattern) {
-												Remove the `isStream` helper function

At this point all the various Stream-classes extends an abstract base-class, hence this helper function is no longer necessary and only adds unnecessary indirection in the code.

											
										
										
											2022-02-17 21:45:42 +09:00
+								        const dict = pattern instanceof BaseStream ? pattern.dict : pattern;
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        const typeNum = dict.get("PatternType");
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
 								        if (typeNum === PatternType.TILING) {
 								          const color = cs.base ? cs.base.getRgb(args, 0) : null;
 								          return this.handleTilingType(
 								            fn,
 								            color,
 								            resources,
 								            pattern,
 								            dict,
 								            operatorList,
 								            task,
 								            localTilingPatternCache
 								          );
 								        } else if (typeNum === PatternType.SHADING) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								          const shading = dict.get("Shading");
 								          const matrix = dict.getArray("Matrix");
-												Improve performance of reused patterns.

Bug 1721218 has a shading pattern that was used thousands of times.
To improve performance of this PDF:
 - add a cache for patterns in the evaluator and only send the IR form once
   to the main thread (this also makes caching in canvas easier)
 - cache the created canvas radial/axial patterns
 - for shading fill radial/axial use the pattern directly instead of creating temporary
   canvas

											
										
										
											2021-07-22 04:27:39 +09:00
+								          const objId = this.parseShading({
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								            shading,
 								            resources,
-												Improve performance of reused patterns.

Bug 1721218 has a shading pattern that was used thousands of times.
To improve performance of this PDF:
 - add a cache for patterns in the evaluator and only send the IR form once
   to the main thread (this also makes caching in canvas easier)
 - cache the created canvas radial/axial patterns
 - for shading fill radial/axial use the pattern directly instead of creating temporary
   canvas

											
										
										
											2021-07-22 04:27:39 +09:00
+								            localColorSpaceCache,
 								            localShadingPatternCache,
 								          });
-												Improve caching of shading patterns. (bug 1721949)

The PDF in bug 1721949 uses many unique pattern objects
that references the same shading many times. This caused
a new canvas pattern to be created and cached many times
driving up memory use.

To fix, I've changed the cache in the worker to key off the
shading object and instead send the shading and matrix
separately. While that worked well to fix the above bug,
there could be PDFs that use many shading that could
cause memory issues, so I've also added a LRU cache
on the main thread for canvas patterns. This should prevent
memory use from getting too high.

											
										
										
											2021-07-28 11:58:06 +09:00
+								          operatorList.addOp(fn, ["Shading", objId, matrix]);
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								          return undefined;
 								        }
 								        throw new FormatError(`Unknown PatternType: ${typeNum}`);
-												[api-minor] Always allow e.g. rendering to continue even if there are errors, and add a `stopAtErrors` parameter to `getDocument` to opt-out of this behaviour (issue 6342, issue 3795, bug 1130815)

Other PDF readers, e.g. Adobe Reader and PDFium (in Chrome), will attempt to render as much of a page as possible even if there are errors present.
Currently we just bail as soon the first error is hit, which means that we'll usually not render anything in these cases and just display a blank page instead.

NOTE: This patch changes the default behaviour of the PDF.js API to always attempt to recover as much data as possible, even when encountering errors during e.g. `getOperatorList`/`getTextContent`, which thus improve our handling of corrupt PDF files and allow the default viewer to handle errors slightly more gracefully.
In the event that an API consumer wishes to use the old behaviour, where we stop parsing as soon as an error is encountered, the `stopAtErrors` parameter can be set at `getDocument`.

Fixes, inasmuch it's possible since the PDF files are corrupt, e.g. issue 6342, issue 3795, and [bug 1130815](https://bugzilla.mozilla.org/show_bug.cgi?id=1130815) (and probably others too).

											
										
										
											2017-02-19 22:03:08 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
 								    throw new FormatError(`Unknown PatternName: ${patternName}`);
 								  }
-												[api-minor] Always allow e.g. rendering to continue even if there are errors, and add a `stopAtErrors` parameter to `getDocument` to opt-out of this behaviour (issue 6342, issue 3795, bug 1130815)

Other PDF readers, e.g. Adobe Reader and PDFium (in Chrome), will attempt to render as much of a page as possible even if there are errors present.
Currently we just bail as soon the first error is hit, which means that we'll usually not render anything in these cases and just display a blank page instead.

NOTE: This patch changes the default behaviour of the PDF.js API to always attempt to recover as much data as possible, even when encountering errors during e.g. `getOperatorList`/`getTextContent`, which thus improve our handling of corrupt PDF files and allow the default viewer to handle errors slightly more gracefully.
In the event that an API consumer wishes to use the old behaviour, where we stop parsing as soon as an error is encountered, the `stopAtErrors` parameter can be set at `getDocument`.

Fixes, inasmuch it's possible since the PDF files are corrupt, e.g. issue 6342, issue 3795, and [bug 1130815](https://bugzilla.mozilla.org/show_bug.cgi?id=1130815) (and probably others too).

											
										
										
											2017-02-19 22:03:08 +09:00
-												Implement visibility expressions for optional content

											
										
										
											2021-04-14 20:58:43 +09:00
+								  _parseVisibilityExpression(array, nestingCounter, currentResult) {
 								    const MAX_NESTING = 10;
 								    if (++nestingCounter > MAX_NESTING) {
 								      warn("Visibility expression is too deeply nested");
 								      return;
 								    }
 								    const length = array.length;
 								    const operator = this.xref.fetchIfRef(array[0]);
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								    if (length < 2 || !(operator instanceof Name)) {
-												Implement visibility expressions for optional content

											
										
										
											2021-04-14 20:58:43 +09:00
+								      warn("Invalid visibility expression");
 								      return;
 								    }
 								    switch (operator.name) {
 								      case "And":
 								      case "Or":
 								      case "Not":
 								        currentResult.push(operator.name);
 								        break;
 								      default:
 								        warn(`Invalid operator ${operator.name} in visibility expression`);
 								        return;
 								    }
 								    for (let i = 1; i < length; i++) {
 								      const raw = array[i];
 								      const object = this.xref.fetchIfRef(raw);
 								      if (Array.isArray(object)) {
 								        const nestedResult = [];
 								        currentResult.push(nestedResult);
 								        // Recursively parse a subarray.
 								        this._parseVisibilityExpression(object, nestingCounter, nestedResult);
-												Remove the `isRef` helper function

This helper function is not really needed, since it's just a wrapper around a simple `instanceof` check, and it only adds unnecessary indirection in the code.

											
										
										
											2022-02-18 20:11:45 +09:00
+								      } else if (raw instanceof Ref) {
-												Implement visibility expressions for optional content

											
										
										
											2021-04-14 20:58:43 +09:00
+								        // Reference to an OCG dictionary.
 								        currentResult.push(raw.toString());
 								      }
 								    }
 								  }
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								  async parseMarkedContentProps(contentProperties, resources) {
 								    let optionalContent;
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								    if (contentProperties instanceof Name) {
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								      const properties = resources.get("Properties");
 								      optionalContent = properties.get(contentProperties.name);
-												Prefer `instanceof Dict` rather than calling `isDict()` with one argument

Unless you actually need to check that something is both a `Dict` and also of the *correct* type, using `instanceof Dict` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isDict` helper function for where it makes sense.

											
										
										
											2022-02-21 20:44:56 +09:00
+								    } else if (contentProperties instanceof Dict) {
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								      optionalContent = contentProperties;
 								    } else {
 								      throw new FormatError("Optional content properties malformed.");
 								    }
 								    const optionalContentType = optionalContent.get("Type").name;
 								    if (optionalContentType === "OCG") {
 								      return {
 								        type: optionalContentType,
 								        id: optionalContent.objId,
 								      };
 								    } else if (optionalContentType === "OCMD") {
-												Implement visibility expressions for optional content

											
										
										
											2021-04-14 20:58:43 +09:00
+								      const expression = optionalContent.get("VE");
 								      if (Array.isArray(expression)) {
 								        const result = [];
 								        this._parseVisibilityExpression(expression, 0, result);
 								        if (result.length > 0) {
 								          return {
 								            type: "OCMD",
 								            expression: result,
 								          };
 								        }
 								      }
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								      const optionalContentGroups = optionalContent.get("OCGs");
 								      if (
 								        Array.isArray(optionalContentGroups) ||
-												Prefer `instanceof Dict` rather than calling `isDict()` with one argument

Unless you actually need to check that something is both a `Dict` and also of the *correct* type, using `instanceof Dict` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isDict` helper function for where it makes sense.

											
										
										
											2022-02-21 20:44:56 +09:00
+								        optionalContentGroups instanceof Dict
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								      ) {
 								        const groupIds = [];
 								        if (Array.isArray(optionalContentGroups)) {
-												Replace a bunch of `Array.prototype.forEach()` cases with `for...of` loops instead

Using `for...of` is a modern and generally much nicer pattern, since it gets rid of unnecessary callback-functions. (In a couple of spots, a "regular" `for` loop had to be used.)

											
										
										
											2021-04-24 19:36:01 +09:00
+								          for (const ocg of optionalContentGroups) {
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								            groupIds.push(ocg.toString());
-												Replace a bunch of `Array.prototype.forEach()` cases with `for...of` loops instead

Using `for...of` is a modern and generally much nicer pattern, since it gets rid of unnecessary callback-functions. (In a couple of spots, a "regular" `for` loop had to be used.)

											
										
										
											2021-04-24 19:36:01 +09:00
+								          }
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								        } else {
 								          // Dictionary, just use the obj id.
 								          groupIds.push(optionalContentGroups.objId);
 								        }
 								        return {
 								          type: optionalContentType,
 								          ids: groupIds,
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								          policy:
 								            optionalContent.get("P") instanceof Name
 								              ? optionalContent.get("P").name
 								              : null,
-												Implement visibility expressions for optional content

											
										
										
											2021-04-14 20:58:43 +09:00
+								          expression: null,
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								        };
-												Remove the `isRef` helper function

This helper function is not really needed, since it's just a wrapper around a simple `instanceof` check, and it only adds unnecessary indirection in the code.

											
										
										
											2022-02-18 20:11:45 +09:00
+								      } else if (optionalContentGroups instanceof Ref) {
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								        return {
 								          type: optionalContentType,
 								          id: optionalContentGroups.toString(),
 								        };
 								      }
 								    }
 								    return null;
 								  }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  getOperatorList({
 								    stream,
 								    task,
 								    resources,
 								    operatorList,
 								    initialState = null,
-												Fallback font for buttons must be ZapfDingbats.

Fix bug https://bugzilla.mozilla.org/show_bug.cgi?id=1669099.

											
										
										
											2020-10-22 00:21:33 +09:00
+								    fallbackFontDict = null,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  }) {
 								    // Ensure that `resources`/`initialState` is correctly initialized,
 								    // even if the provided parameter is e.g. `null`.
 								    resources = resources || Dict.empty;
 								    initialState = initialState || new EvalState();
-												Making src/core/evaluator.js adhere to the style guide

											
										
										
											2014-03-23 03:15:51 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (!operatorList) {
 								      throw new Error('getOperatorList: missing "operatorList" parameter');
 								    }
-												Allow `getOperatorList`/`getTextContent` to skip errors when parsing broken XObjects (issue 8702, issue 8704)

This patch makes use of the existing `ignoreErrors` property in `src/core/evaluator.js`, see PRs 8240 and 8441, thus allowing us to attempt to recovery as much as possible of a page even when it contains broken XObjects.

Fixes 8702.
Fixes 8704.

											
										
										
											2017-09-17 20:35:18 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const self = this;
 								    const xref = this.xref;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    let parsingText = false;
 								    const localImageCache = new LocalImageCache();
 								    const localColorSpaceCache = new LocalColorSpaceCache();
-												Add local caching of "simple" Graphics State (ExtGState) data in `PartialEvaluator.getOperatorList` (issue 2813)

This patch will help pathological cases the most, with issue 2813 being a particularily problematic example. While there's only *four* `/ExtGState` resources, there's a total `29062` of `setGState` operators. Even though parsing of a single `/ExtGState` resource is quite fast, having to re-parse them thousands of times does add up quite significantly.

For simplicity we'll only cache "simple" `/ExtGState` resource, since e.g. the general `SMask` case cannot be easily cached (without re-factoring other code, which may have undesirable effects on general parsing).

By caching "simple" `/ExtGState` resource, we thus improve performance by:
 - Not having to fetch/validate/parse the same `/ExtGState` data over and over.
 - Handling of repeated `setGState` operators becomes *synchronous* during the `OperatorList` building, instead of having to defer to the event-loop/microtask-queue since the `/ExtGState` parsing is done asynchronously.

---

Obviously I had intended to include (standard) benchmark results with this patch, but for reasons I don't understand the test run-time (even with `master`) of the document in issue 2813 is *a lot* slower than in the development viewer (making normal benchmarking infeasible).
However, testing this manually in the development viewer (using `pdfBug=Stats`) shows a *reduction* of `~10 %` in the rendering time of the PDF document in issue 2813.

											
										
										
											2020-07-11 20:52:11 +09:00
+								    const localGStateCache = new LocalGStateCache();
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								    const localTilingPatternCache = new LocalTilingPatternCache();
-												Improve performance of reused patterns.

Bug 1721218 has a shading pattern that was used thousands of times.
To improve performance of this PDF:
 - add a cache for patterns in the evaluator and only send the IR form once
   to the main thread (this also makes caching in canvas easier)
 - cache the created canvas radial/axial patterns
 - for shading fill radial/axial use the pattern directly instead of creating temporary
   canvas

											
										
										
											2021-07-22 04:27:39 +09:00
+								    const localShadingPatternCache = new Map();
-												Improve the *local* image caching in `PartialEvaluator.getOperatorList`

Currently the local `imageCache`, as used in `PartialEvaluator.getOperatorList`, will miss certain cases of repeated images because the caching is *only* done by name (usually using a format such as e.g. "Im0", "Im1", ...).
However, in some PDF documents the `/XObject` dictionaries many contain hundreds (or even thousands) of distinctly named images, despite them referring to only a handful of actual image objects (via the XRef table).

With these changes we'll now cache *local* images using both name and (where applicable) reference, thus improving re-usage of images resources even further.

This patch was tested using the PDF file from [bug 857031](https://bugzilla.mozilla.org/show_bug.cgi?id=857031), i.e. https://bug857031.bmoattachments.org/attachment.cgi?id=732270, with the following manifest file:
```
[
    {  "id": "bug857031",
       "file": "../web/pdfs/bug857031.pdf",
       "md5": "",
       "rounds": 250,
       "lastPage": 1,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
```
-- Grouped By browser, page, stat --
browser | page | stat         | Count | Baseline(ms) | Current(ms) | +/- |    %  | Result(P<.05)
------- | ---- | ------------ | ----- | ------------ | ----------- | --- | ----- | -------------
firefox | 0    | Overall      |   250 |         2749 |        2656 | -93 | -3.38 |        faster
firefox | 0    | Page Request |   250 |            3 |           4 |   1 | 50.14 |        slower
firefox | 0    | Rendering    |   250 |         2746 |        2652 | -94 | -3.44 |        faster
```

While this is certainly an improvement, since we now avoid re-parsing ~1000 images on the first page, all of the image resources are small enough that the total rendering time doesn't improve that much in this particular case.

In pathological cases, such as e.g. the PDF document in issue 4958, the improvements with this patch can be very significant. Looking for example at page 2, from issue 4958, the rendering time drops from ~60 seconds with `master` to ~30 seconds with this patch (obviously still slow, but it really showcases the potential of this patch nicely).

Finally, note that there's also potential for additional improvements by re-using `LocalImageCache` instances for e.g. /XObject data of the `Form`-type. However, given that recent changes in this area I purposely didn't want to complicate *this* patch more than necessary.

											
										
										
											2020-05-23 20:55:31 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const xobjs = resources.get("XObject") || Dict.empty;
 								    const patterns = resources.get("Pattern") || Dict.empty;
 								    const stateManager = new StateManager(initialState);
 								    const preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
 								    const timeSlotManager = new TimeSlotManager();
-												Improve the *local* image caching in `PartialEvaluator.getOperatorList`

Currently the local `imageCache`, as used in `PartialEvaluator.getOperatorList`, will miss certain cases of repeated images because the caching is *only* done by name (usually using a format such as e.g. "Im0", "Im1", ...).
However, in some PDF documents the `/XObject` dictionaries many contain hundreds (or even thousands) of distinctly named images, despite them referring to only a handful of actual image objects (via the XRef table).

With these changes we'll now cache *local* images using both name and (where applicable) reference, thus improving re-usage of images resources even further.

This patch was tested using the PDF file from [bug 857031](https://bugzilla.mozilla.org/show_bug.cgi?id=857031), i.e. https://bug857031.bmoattachments.org/attachment.cgi?id=732270, with the following manifest file:
```
[
    {  "id": "bug857031",
       "file": "../web/pdfs/bug857031.pdf",
       "md5": "",
       "rounds": 250,
       "lastPage": 1,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
```
-- Grouped By browser, page, stat --
browser | page | stat         | Count | Baseline(ms) | Current(ms) | +/- |    %  | Result(P<.05)
------- | ---- | ------------ | ----- | ------------ | ----------- | --- | ----- | -------------
firefox | 0    | Overall      |   250 |         2749 |        2656 | -93 | -3.38 |        faster
firefox | 0    | Page Request |   250 |            3 |           4 |   1 | 50.14 |        slower
firefox | 0    | Rendering    |   250 |         2746 |        2652 | -94 | -3.44 |        faster
```

While this is certainly an improvement, since we now avoid re-parsing ~1000 images on the first page, all of the image resources are small enough that the total rendering time doesn't improve that much in this particular case.

In pathological cases, such as e.g. the PDF document in issue 4958, the improvements with this patch can be very significant. Looking for example at page 2, from issue 4958, the rendering time drops from ~60 seconds with `master` to ~30 seconds with this patch (obviously still slow, but it really showcases the potential of this patch nicely).

Finally, note that there's also potential for additional improvements by re-using `LocalImageCache` instances for e.g. /XObject data of the `Form`-type. However, given that recent changes in this area I purposely didn't want to complicate *this* patch more than necessary.

											
										
										
											2020-05-23 20:55:31 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    function closePendingRestoreOPS(argument) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      for (let i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        operatorList.addOp(OPS.restore, []);
 								      }
 								    }
-												Attempt to cache repeated images at the document, rather than the page, level (issue 11878)

Currently image resources, as opposed to e.g. font resources, are handled exclusively on a page-specific basis. Generally speaking this makes sense, since pages are separate from each other, however there's PDF documents where many (or even all) pages actually references exactly the same image resources (through the XRef table). Hence, in some cases, we're decoding the *same* images over and over for every page which is obviously slow and wasting both CPU and memory resources better used elsewhere.[1]

Obviously we cannot simply treat all image resources as-if they're used throughout the entire PDF document, since that would end up increasing memory usage too much.[2]
However, by introducing a `GlobalImageCache` in the worker we can track image resources that appear on more than one page. Hence we can switch image resources from being page-specific to being document-specific, once the image resource has been seen on more than a certain number of pages.

In many cases, such as e.g. the referenced issue, this patch will thus lead to reduced memory usage for image resources. Scrolling through all pages of the document, there's now only a few main-thread copies of the same image data, as opposed to one for each rendered page (i.e. there could theoretically be *twenty* copies of the image data).
While this obviously benefit both CPU and memory usage in this case, for *very* large image data this patch *may* possibly increase persistent main-thread memory usage a tiny bit. Thus to avoid negatively affecting memory usage too much in general, particularly on the main-thread, the `GlobalImageCache` will *only* cache a certain number of image resources at the document level and simply fallback to the default behaviour.

Unfortunately the asynchronous nature of the code, with ranged/streamed loading of data, actually makes all of this much more complicated than if all data could be assumed to be immediately available.[3]

*Please note:* The patch will lead to *small* movement in some existing test-cases, since we're now using the built-in PDF.js JPEG decoder more. This was done in order to simplify the overall implementation, especially on the main-thread, by limiting it to only the `OPS.paintImageXObject` operator.

---
[1] There's e.g. PDF documents that use the same image as background on all pages.

[2] Given that data stored in the `commonObjs`, on the main-thread, are only cleared manually through `PDFDocumentProxy.cleanup`. This as opposed to data stored in the `objs` of each page, which is automatically removed when the page is cleaned-up e.g. by being evicted from the cache in the default viewer.

[3] If the latter case were true, we could simply check for repeat images *before* parsing started and thus avoid handling *any* duplicate image resources.

											
										
										
											2020-05-18 21:17:56 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    return new Promise(function promiseBody(resolve, reject) {
 								      const next = function (promise) {
 								        Promise.all([promise, operatorList.ready]).then(function () {
 								          try {
 								            promiseBody(resolve, reject);
 								          } catch (ex) {
 								            reject(ex);
 								          }
 								        }, reject);
 								      };
 								      task.ensureNotTerminated();
 								      timeSlotManager.reset();
-												Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file

The only *slight* complication here were some of the `switch`-cases, in `getOperatorList`/`getTextContent`, where the parsing is done asynchronously.
However, those cases are easy to deal with by wrapping the code within its own block; please see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/switch#block-scope_variables_within_switch_statements

											
										
										
											2021-05-06 17:08:09 +09:00
 								      const operation = {};
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
+								      let stop, i, ii, cs, name, isValidName;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      while (!(stop = timeSlotManager.check())) {
 								        // The arguments parsed by read() are used beyond this loop, so we
 								        // cannot reuse the same array on each iteration. Therefore we pass
 								        // in |null| as the initial value (see the comment on
 								        // EvaluatorPreprocessor_read() for why).
 								        operation.args = null;
 								        if (!preprocessor.read(operation)) {
 								          break;
 								        }
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        let args = operation.args;
 								        let fn = operation.fn;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
 								        switch (fn | 0) {
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								          case OPS.paintXObject:
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            // eagerly compile XForm objects
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
+								            isValidName = args[0] instanceof Name;
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								            name = args[0].name;
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
 								            if (isValidName) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              const localImage = localImageCache.getByName(name);
 								              if (localImage) {
 								                operatorList.addOp(localImage.fn, localImage.args);
 								                args = null;
 								                continue;
 								              }
 								            }
-												Improve the *local* image caching in `PartialEvaluator.getOperatorList`

Currently the local `imageCache`, as used in `PartialEvaluator.getOperatorList`, will miss certain cases of repeated images because the caching is *only* done by name (usually using a format such as e.g. "Im0", "Im1", ...).
However, in some PDF documents the `/XObject` dictionaries many contain hundreds (or even thousands) of distinctly named images, despite them referring to only a handful of actual image objects (via the XRef table).

With these changes we'll now cache *local* images using both name and (where applicable) reference, thus improving re-usage of images resources even further.

This patch was tested using the PDF file from [bug 857031](https://bugzilla.mozilla.org/show_bug.cgi?id=857031), i.e. https://bug857031.bmoattachments.org/attachment.cgi?id=732270, with the following manifest file:
```
[
    {  "id": "bug857031",
       "file": "../web/pdfs/bug857031.pdf",
       "md5": "",
       "rounds": 250,
       "lastPage": 1,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
```
-- Grouped By browser, page, stat --
browser | page | stat         | Count | Baseline(ms) | Current(ms) | +/- |    %  | Result(P<.05)
------- | ---- | ------------ | ----- | ------------ | ----------- | --- | ----- | -------------
firefox | 0    | Overall      |   250 |         2749 |        2656 | -93 | -3.38 |        faster
firefox | 0    | Page Request |   250 |            3 |           4 |   1 | 50.14 |        slower
firefox | 0    | Rendering    |   250 |         2746 |        2652 | -94 | -3.44 |        faster
```

While this is certainly an improvement, since we now avoid re-parsing ~1000 images on the first page, all of the image resources are small enough that the total rendering time doesn't improve that much in this particular case.

In pathological cases, such as e.g. the PDF document in issue 4958, the improvements with this patch can be very significant. Looking for example at page 2, from issue 4958, the rendering time drops from ~60 seconds with `master` to ~30 seconds with this patch (obviously still slow, but it really showcases the potential of this patch nicely).

Finally, note that there's also potential for additional improvements by re-using `LocalImageCache` instances for e.g. /XObject data of the `Form`-type. However, given that recent changes in this area I purposely didn't want to complicate *this* patch more than necessary.

											
										
										
											2020-05-23 20:55:31 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            next(
 								              new Promise(function (resolveXObject, rejectXObject) {
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
+								                if (!isValidName) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  throw new FormatError("XObject must be referred to by name.");
 								                }
 								                let xobj = xobjs.getRaw(name);
 								                if (xobj instanceof Ref) {
 								                  const localImage = localImageCache.getByRef(xobj);
 								                  if (localImage) {
 								                    operatorList.addOp(localImage.fn, localImage.args);
-												Attempt to cache repeated images at the document, rather than the page, level (issue 11878)

Currently image resources, as opposed to e.g. font resources, are handled exclusively on a page-specific basis. Generally speaking this makes sense, since pages are separate from each other, however there's PDF documents where many (or even all) pages actually references exactly the same image resources (through the XRef table). Hence, in some cases, we're decoding the *same* images over and over for every page which is obviously slow and wasting both CPU and memory resources better used elsewhere.[1]

Obviously we cannot simply treat all image resources as-if they're used throughout the entire PDF document, since that would end up increasing memory usage too much.[2]
However, by introducing a `GlobalImageCache` in the worker we can track image resources that appear on more than one page. Hence we can switch image resources from being page-specific to being document-specific, once the image resource has been seen on more than a certain number of pages.

In many cases, such as e.g. the referenced issue, this patch will thus lead to reduced memory usage for image resources. Scrolling through all pages of the document, there's now only a few main-thread copies of the same image data, as opposed to one for each rendered page (i.e. there could theoretically be *twenty* copies of the image data).
While this obviously benefit both CPU and memory usage in this case, for *very* large image data this patch *may* possibly increase persistent main-thread memory usage a tiny bit. Thus to avoid negatively affecting memory usage too much in general, particularly on the main-thread, the `GlobalImageCache` will *only* cache a certain number of image resources at the document level and simply fallback to the default behaviour.

Unfortunately the asynchronous nature of the code, with ranged/streamed loading of data, actually makes all of this much more complicated than if all data could be assumed to be immediately available.[3]

*Please note:* The patch will lead to *small* movement in some existing test-cases, since we're now using the built-in PDF.js JPEG decoder more. This was done in order to simplify the overall implementation, especially on the main-thread, by limiting it to only the `OPS.paintImageXObject` operator.

---
[1] There's e.g. PDF documents that use the same image as background on all pages.

[2] Given that data stored in the `commonObjs`, on the main-thread, are only cleared manually through `PDFDocumentProxy.cleanup`. This as opposed to data stored in the `objs` of each page, which is automatically removed when the page is cleaned-up e.g. by being evicted from the cache in the default viewer.

[3] If the latter case were true, we could simply check for repeat images *before* parsing started and thus avoid handling *any* duplicate image resources.

											
										
										
											2020-05-18 21:17:56 +09:00
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								                    resolveXObject();
 								                    return;
 								                  }
-												Adds Promise to the getOperatorList

											
										
										
											2014-05-10 10:21:15 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  const globalImage = self.globalImageCache.getData(
 								                    xobj,
 								                    self.pageIndex
 								                  );
 								                  if (globalImage) {
 								                    operatorList.addDependency(globalImage.objId);
 								                    operatorList.addOp(globalImage.fn, globalImage.args);
-												Adds Promise to the getOperatorList

											
										
										
											2014-05-10 10:21:15 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                    resolveXObject();
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								                    return;
 								                  }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
 								                  xobj = xref.fetch(xobj);
 								                }
-												Remove the `isStream` helper function

At this point all the various Stream-classes extends an abstract base-class, hence this helper function is no longer necessary and only adds unnecessary indirection in the code.

											
										
										
											2022-02-17 21:45:42 +09:00
+								                if (!(xobj instanceof BaseStream)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  throw new FormatError("XObject should be a stream");
 								                }
 								                const type = xobj.dict.get("Subtype");
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								                if (!(type instanceof Name)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  throw new FormatError("XObject should have a Name subtype");
 								                }
 								                if (type.name === "Form") {
 								                  stateManager.save();
 								                  self
 								                    .buildFormXObject(
 								                      resources,
 								                      xobj,
 								                      null,
 								                      operatorList,
 								                      task,
 								                      stateManager.state.clone(),
 								                      localColorSpaceCache
 								                    )
 								                    .then(function () {
 								                      stateManager.restore();
 								                      resolveXObject();
 								                    }, rejectXObject);
 								                  return;
 								                } else if (type.name === "Image") {
 								                  self
 								                    .buildPaintImageXObject({
 								                      resources,
 								                      image: xobj,
 								                      operatorList,
 								                      cacheKey: name,
 								                      localImageCache,
 								                      localColorSpaceCache,
 								                    })
 								                    .then(resolveXObject, rejectXObject);
 								                  return;
 								                } else if (type.name === "PS") {
 								                  // PostScript XObjects are unused when viewing documents.
 								                  // See section 4.7.1 of Adobe's PDF reference.
 								                  info("Ignored XObject subtype PS");
 								                } else {
 								                  throw new FormatError(
 								                    `Unhandled XObject subtype ${type.name}`
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								                  );
-												Adds Promise to the getOperatorList

											
										
										
											2014-05-10 10:21:15 +09:00
+								                }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                resolveXObject();
 								              }).catch(function (reason) {
 								                if (reason instanceof AbortException) {
 								                  return;
 								                }
 								                if (self.options.ignoreErrors) {
 								                  // Error(s) in the XObject -- sending unsupported feature
 								                  // notification and allow rendering to continue.
 								                  self.handler.send("UnsupportedFeature", {
 								                    featureId: UNSUPPORTED_FEATURES.errorXObject,
 								                  });
 								                  warn(`getOperatorList - ignoring XObject: "${reason}".`);
 								                  return;
 								                }
 								                throw reason;
 								              })
 								            );
 								            return;
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								          case OPS.setFont:
 								            var fontSize = args[1];
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            // eagerly collect all fonts
 								            next(
 								              self
 								                .handleSetFont(
 								                  resources,
 								                  args,
 								                  null,
 								                  operatorList,
 								                  task,
-												Fallback font for buttons must be ZapfDingbats.

Fix bug https://bugzilla.mozilla.org/show_bug.cgi?id=1669099.

											
										
										
											2020-10-22 00:21:33 +09:00
+								                  stateManager.state,
 								                  fallbackFontDict
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                )
 								                .then(function (loadedName) {
 								                  operatorList.addDependency(loadedName);
 								                  operatorList.addOp(OPS.setFont, [loadedName, fontSize]);
 								                })
 								            );
 								            return;
 								          case OPS.beginText:
 								            parsingText = true;
 								            break;
 								          case OPS.endText:
 								            parsingText = false;
 								            break;
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								          case OPS.endInlineImage:
 								            var cacheKey = args[0].cacheKey;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            if (cacheKey) {
 								              const localImage = localImageCache.getByName(cacheKey);
 								              if (localImage) {
 								                operatorList.addOp(localImage.fn, localImage.args);
 								                args = null;
-												Ensure that there's always a setFont (Tf) operator before text rendering operators (issue 11651)

The PDF document in question is *corrupt*, since it contains multiple instances of incorrect operators.
We obviously don't want to slow down parsing of *all* documents (since most are valid), just to accommodate a particular bad PDF generator, hence the reason for the inline check before calling the `ensureStateFont` method.

											
										
										
											2020-03-02 23:34:00 +09:00
+								                continue;
 								              }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            }
 								            next(
 								              self.buildPaintImageXObject({
 								                resources,
 								                image: args[0],
 								                isInline: true,
 								                operatorList,
 								                cacheKey,
 								                localImageCache,
 								                localColorSpaceCache,
 								              })
 								            );
 								            return;
 								          case OPS.showText:
 								            if (!stateManager.state.font) {
 								              self.ensureStateFont(stateManager.state);
 								              continue;
 								            }
 								            args[0] = self.handleText(args[0], stateManager.state);
 								            break;
 								          case OPS.showSpacedText:
 								            if (!stateManager.state.font) {
 								              self.ensureStateFont(stateManager.state);
 								              continue;
 								            }
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								            var arr = args[0];
 								            var combinedGlyphs = [];
 								            var arrLength = arr.length;
 								            var state = stateManager.state;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            for (i = 0; i < arrLength; ++i) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								              const arrItem = arr[i];
-												Remove the `isString` helper function

The call-sites are replaced by direct `typeof`-checks instead, which removes unnecessary function calls. Note that in the `src/`-folder we already had more `typeof`-cases than `isString`-calls.

											
										
										
											2022-02-24 01:02:19 +09:00
+								              if (typeof arrItem === "string") {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                Array.prototype.push.apply(
 								                  combinedGlyphs,
 								                  self.handleText(arrItem, state)
 								                );
-												Remove the `isNum` helper function

The call-sites are replaced by direct `typeof`-checks instead, which removes unnecessary function calls. Note that in the `src/`-folder we already had more `typeof`-cases than `isNum`-calls.

These changes were *mostly* done using regular expression search-and-replace, with two exceptions:
 - In `Font._charToGlyph` we no longer unconditionally update the `width`, since that seems completely unnecessary.
 - In `PDFDocument.documentInfo`, when parsing custom entries, we now do the `typeof`-check once.

											
										
										
											2022-02-22 19:55:34 +09:00
+								              } else if (typeof arrItem === "number") {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                combinedGlyphs.push(arrItem);
-												Ensure that there's always a setFont (Tf) operator before text rendering operators (issue 11651)

The PDF document in question is *corrupt*, since it contains multiple instances of incorrect operators.
We obviously don't want to slow down parsing of *all* documents (since most are valid), just to accommodate a particular bad PDF generator, hence the reason for the inline check before calling the `ensureStateFont` method.

											
										
										
											2020-03-02 23:34:00 +09:00
+								              }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            }
 								            args[0] = combinedGlyphs;
 								            fn = OPS.showText;
 								            break;
 								          case OPS.nextLineShowText:
 								            if (!stateManager.state.font) {
 								              self.ensureStateFont(stateManager.state);
 								              continue;
 								            }
 								            operatorList.addOp(OPS.nextLine);
 								            args[0] = self.handleText(args[0], stateManager.state);
 								            fn = OPS.showText;
 								            break;
 								          case OPS.nextLineSetSpacingShowText:
 								            if (!stateManager.state.font) {
 								              self.ensureStateFont(stateManager.state);
 								              continue;
 								            }
 								            operatorList.addOp(OPS.nextLine);
 								            operatorList.addOp(OPS.setWordSpacing, [args.shift()]);
 								            operatorList.addOp(OPS.setCharSpacing, [args.shift()]);
 								            args[0] = self.handleText(args[0], stateManager.state);
 								            fn = OPS.showText;
 								            break;
 								          case OPS.setTextRenderingMode:
 								            stateManager.state.textRenderingMode = args[0];
 								            break;
-												Moves ColorSpace logic into evaluator

											
										
										
											2014-05-22 02:47:42 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          case OPS.setFillColorSpace: {
 								            const cachedColorSpace = ColorSpace.getCached(
 								              args[0],
 								              xref,
 								              localColorSpaceCache
 								            );
 								            if (cachedColorSpace) {
 								              stateManager.state.fillColorSpace = cachedColorSpace;
 								              continue;
 								            }
-												Add local caching of `ColorSpace`s, by name, in `PartialEvaluator.getOperatorList` (issue 2504)

By caching parsed `ColorSpace`s, we thus don't need to re-parse the same data over and over which saves CPU cycles *and* reduces peak memory usage. (Obviously persistent memory usage *may* increase a tiny bit, but since the caching is done per `PartialEvaluator.getOperatorList` invocation and given that `ColorSpace` instances generally hold very little data this shouldn't be much of an issue.)
Furthermore, by caching `ColorSpace`s we can also lookup the already parsed ones *synchronously* during the `OperatorList` building, instead of having to defer to the event loop/microtask queue since the parsing is done asynchronously (such that error handling is easier).

Possible future improvements:
 - Cache/lookup parsed `ColorSpaces` used in `Pattern`s and `Image`s.
 - Attempt to cache *local* `ColorSpace`s by reference as well, in addition to only by name, assuming that there's documents where that would be beneficial and that it's not too difficult to implement.
 - Assuming there's documents that would benefit from it, also cache repeated `ColorSpace`s *globally* as well.

Given that we've never, until now, been doing *any* caching of parsed `ColorSpace`s and that even using a simple name-only *local* cache helps tremendously in pathological cases, I purposely decided against complicating the implementation too much initially.
Also, compared to parsing of `Image`s, simply creating a `ColorSpace` instance isn't that expensive (hence I'd be somewhat surprised if adding a *global* cache would help much).

---

This patch was tested using:
 - The default `tracemonkey` PDF file, which was included mostly to show that "normal" documents aren't negatively affected by these changes.
 - The PDF file from issue 2504, i.e. https://dl-ctlg.panasonic.com/jp/manual/sd/sd_rbm1000_0.pdf, where most pages will switch *thousands* of times between a handful of `ColorSpace`s.

with the following manifest file:
```
[
    {  "id": "tracemonkey",
       "file": "pdfs/tracemonkey.pdf",
       "md5": "9a192d8b1a7dc652a19835f6f08098bd",
       "rounds": 100,
       "type": "eq"
    },
    {  "id": "issue2504",
       "file": "../web/pdfs/issue2504.pdf",
       "md5": "",
       "rounds": 20,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
 - Overall
```
-- Grouped By browser, pdf, stat --
browser | pdf         | stat         | Count | Baseline(ms) | Current(ms) |  +/- |     %  | Result(P<.05)
------- | ----------- | ------------ | ----- | ------------ | ----------- | ---- | ------ | -------------
firefox | issue2504   | Overall      |   640 |          977 |         497 | -479 | -49.08 |        faster
firefox | issue2504   | Page Request |   640 |            3 |           4 |    1 |  59.18 |
firefox | issue2504   | Rendering    |   640 |          974 |         493 | -481 | -49.37 |        faster
firefox | tracemonkey | Overall      |  1400 |          116 |         111 |   -5 |  -4.43 |
firefox | tracemonkey | Page Request |  1400 |            2 |           2 |    0 |  -2.86 |
firefox | tracemonkey | Rendering    |  1400 |          114 |         109 |   -5 |  -4.47 |
```

 - Page-specific
```
-- Grouped By browser, pdf, page, stat --
browser | pdf         | page | stat         | Count | Baseline(ms) | Current(ms) |   +/- |      %  | Result(P<.05)
------- | ----------- | ---- | ------------ | ----- | ------------ | ----------- | ----- | ------- | -------------
firefox | issue2504   | 0    | Overall      |    20 |         2295 |        1268 | -1027 |  -44.76 |        faster
firefox | issue2504   | 0    | Page Request |    20 |            6 |           7 |     1 |   15.32 |
firefox | issue2504   | 0    | Rendering    |    20 |         2288 |        1260 | -1028 |  -44.93 |        faster
firefox | issue2504   | 1    | Overall      |    20 |         3059 |        2806 |  -252 |   -8.25 |        faster
firefox | issue2504   | 1    | Page Request |    20 |           11 |          14 |     3 |   23.25 |        slower
firefox | issue2504   | 1    | Rendering    |    20 |         3047 |        2792 |  -255 |   -8.37 |        faster
firefox | issue2504   | 2    | Overall      |    20 |          411 |         295 |  -116 |  -28.20 |        faster
firefox | issue2504   | 2    | Page Request |    20 |            2 |          42 |    40 | 1897.62 |
firefox | issue2504   | 2    | Rendering    |    20 |          409 |         253 |  -156 |  -38.09 |        faster
firefox | issue2504   | 3    | Overall      |    20 |          736 |         299 |  -437 |  -59.34 |        faster
firefox | issue2504   | 3    | Page Request |    20 |            2 |           2 |     0 |    0.00 |
firefox | issue2504   | 3    | Rendering    |    20 |          734 |         297 |  -437 |  -59.49 |        faster
firefox | issue2504   | 4    | Overall      |    20 |          356 |         458 |   102 |   28.63 |
firefox | issue2504   | 4    | Page Request |    20 |            1 |           2 |     1 |   57.14 |        slower
firefox | issue2504   | 4    | Rendering    |    20 |          354 |         455 |   101 |   28.53 |
firefox | issue2504   | 5    | Overall      |    20 |         1381 |         765 |  -616 |  -44.59 |        faster
firefox | issue2504   | 5    | Page Request |    20 |            3 |           5 |     2 |   50.00 |        slower
firefox | issue2504   | 5    | Rendering    |    20 |         1378 |         760 |  -617 |  -44.81 |        faster
firefox | issue2504   | 6    | Overall      |    20 |          757 |         299 |  -459 |  -60.57 |        faster
firefox | issue2504   | 6    | Page Request |    20 |            2 |           5 |     3 |  150.00 |        slower
firefox | issue2504   | 6    | Rendering    |    20 |          755 |         294 |  -462 |  -61.11 |        faster
firefox | issue2504   | 7    | Overall      |    20 |          394 |         302 |   -92 |  -23.39 |        faster
firefox | issue2504   | 7    | Page Request |    20 |            2 |           1 |    -1 |  -34.88 |        faster
firefox | issue2504   | 7    | Rendering    |    20 |          392 |         301 |   -91 |  -23.32 |        faster
firefox | issue2504   | 8    | Overall      |    20 |         2875 |         979 | -1896 |  -65.95 |        faster
firefox | issue2504   | 8    | Page Request |    20 |            1 |           2 |     0 |   11.11 |
firefox | issue2504   | 8    | Rendering    |    20 |         2874 |         978 | -1896 |  -65.99 |        faster
firefox | issue2504   | 9    | Overall      |    20 |          700 |         332 |  -368 |  -52.60 |        faster
firefox | issue2504   | 9    | Page Request |    20 |            3 |           2 |     0 |   -4.00 |
firefox | issue2504   | 9    | Rendering    |    20 |          698 |         329 |  -368 |  -52.78 |        faster
firefox | issue2504   | 10   | Overall      |    20 |         3296 |         926 | -2370 |  -71.91 |        faster
firefox | issue2504   | 10   | Page Request |    20 |            2 |           2 |     0 |  -18.75 |
firefox | issue2504   | 10   | Rendering    |    20 |         3293 |         924 | -2370 |  -71.96 |        faster
firefox | issue2504   | 11   | Overall      |    20 |          524 |         197 |  -327 |  -62.34 |        faster
firefox | issue2504   | 11   | Page Request |    20 |            2 |           3 |     1 |   58.54 |
firefox | issue2504   | 11   | Rendering    |    20 |          522 |         194 |  -328 |  -62.81 |        faster
firefox | issue2504   | 12   | Overall      |    20 |          752 |         369 |  -384 |  -50.98 |        faster
firefox | issue2504   | 12   | Page Request |    20 |            3 |           2 |    -1 |  -36.51 |        faster
firefox | issue2504   | 12   | Rendering    |    20 |          749 |         367 |  -382 |  -51.05 |        faster
firefox | issue2504   | 13   | Overall      |    20 |          679 |         487 |  -193 |  -28.38 |        faster
firefox | issue2504   | 13   | Page Request |    20 |            4 |           2 |    -2 |  -48.68 |        faster
firefox | issue2504   | 13   | Rendering    |    20 |          676 |         485 |  -191 |  -28.28 |        faster
firefox | issue2504   | 14   | Overall      |    20 |          474 |         283 |  -191 |  -40.26 |        faster
firefox | issue2504   | 14   | Page Request |    20 |            2 |           4 |     2 |   78.57 |
firefox | issue2504   | 14   | Rendering    |    20 |          471 |         279 |  -192 |  -40.79 |        faster
firefox | issue2504   | 15   | Overall      |    20 |          860 |         618 |  -241 |  -28.05 |        faster
firefox | issue2504   | 15   | Page Request |    20 |            2 |           3 |     0 |   10.87 |
firefox | issue2504   | 15   | Rendering    |    20 |          857 |         616 |  -241 |  -28.15 |        faster
firefox | issue2504   | 16   | Overall      |    20 |          389 |         243 |  -147 |  -37.71 |        faster
firefox | issue2504   | 16   | Page Request |    20 |            2 |           2 |     0 |    2.33 |
firefox | issue2504   | 16   | Rendering    |    20 |          387 |         240 |  -147 |  -37.94 |        faster
firefox | issue2504   | 17   | Overall      |    20 |         1484 |         672 |  -812 |  -54.70 |        faster
firefox | issue2504   | 17   | Page Request |    20 |            2 |           3 |     1 |   37.21 |
firefox | issue2504   | 17   | Rendering    |    20 |         1482 |         669 |  -812 |  -54.84 |        faster
firefox | issue2504   | 18   | Overall      |    20 |          575 |         252 |  -323 |  -56.12 |        faster
firefox | issue2504   | 18   | Page Request |    20 |            2 |           2 |     0 |  -16.22 |
firefox | issue2504   | 18   | Rendering    |    20 |          573 |         251 |  -322 |  -56.24 |        faster
firefox | issue2504   | 19   | Overall      |    20 |          517 |         227 |  -290 |  -56.08 |        faster
firefox | issue2504   | 19   | Page Request |    20 |            2 |           2 |     0 |   21.62 |
firefox | issue2504   | 19   | Rendering    |    20 |          515 |         225 |  -290 |  -56.37 |        faster
firefox | issue2504   | 20   | Overall      |    20 |          668 |         670 |     2 |    0.31 |
firefox | issue2504   | 20   | Page Request |    20 |            4 |           2 |    -1 |  -34.29 |
firefox | issue2504   | 20   | Rendering    |    20 |          664 |         667 |     3 |    0.49 |
firefox | issue2504   | 21   | Overall      |    20 |          486 |         309 |  -177 |  -36.44 |        faster
firefox | issue2504   | 21   | Page Request |    20 |            2 |           2 |     0 |   16.13 |
firefox | issue2504   | 21   | Rendering    |    20 |          484 |         307 |  -177 |  -36.60 |        faster
firefox | issue2504   | 22   | Overall      |    20 |          543 |         267 |  -276 |  -50.85 |        faster
firefox | issue2504   | 22   | Page Request |    20 |            2 |           2 |     0 |   10.26 |
firefox | issue2504   | 22   | Rendering    |    20 |          541 |         265 |  -276 |  -51.07 |        faster
firefox | issue2504   | 23   | Overall      |    20 |         3246 |         871 | -2375 |  -73.17 |        faster
firefox | issue2504   | 23   | Page Request |    20 |            2 |           3 |     1 |   37.21 |
firefox | issue2504   | 23   | Rendering    |    20 |         3243 |         868 | -2376 |  -73.25 |        faster
firefox | issue2504   | 24   | Overall      |    20 |          379 |         156 |  -223 |  -58.83 |        faster
firefox | issue2504   | 24   | Page Request |    20 |            2 |           2 |     0 |   -2.86 |
firefox | issue2504   | 24   | Rendering    |    20 |          378 |         154 |  -223 |  -59.10 |        faster
firefox | issue2504   | 25   | Overall      |    20 |          176 |         127 |   -50 |  -28.19 |        faster
firefox | issue2504   | 25   | Page Request |    20 |            2 |           1 |     0 |  -15.63 |
firefox | issue2504   | 25   | Rendering    |    20 |          175 |         125 |   -49 |  -28.31 |        faster
firefox | issue2504   | 26   | Overall      |    20 |          181 |         108 |   -74 |  -40.67 |        faster
firefox | issue2504   | 26   | Page Request |    20 |            3 |           2 |    -1 |  -39.13 |        faster
firefox | issue2504   | 26   | Rendering    |    20 |          178 |         105 |   -72 |  -40.69 |        faster
firefox | issue2504   | 27   | Overall      |    20 |          208 |         104 |  -104 |  -49.92 |        faster
firefox | issue2504   | 27   | Page Request |    20 |            2 |           2 |     1 |   48.39 |
firefox | issue2504   | 27   | Rendering    |    20 |          206 |         102 |  -104 |  -50.64 |        faster
firefox | issue2504   | 28   | Overall      |    20 |          241 |         111 |  -131 |  -54.16 |        faster
firefox | issue2504   | 28   | Page Request |    20 |            2 |           2 |    -1 |  -33.33 |
firefox | issue2504   | 28   | Rendering    |    20 |          239 |         109 |  -130 |  -54.39 |        faster
firefox | issue2504   | 29   | Overall      |    20 |          321 |         196 |  -125 |  -39.05 |        faster
firefox | issue2504   | 29   | Page Request |    20 |            1 |           2 |     0 |   17.86 |
firefox | issue2504   | 29   | Rendering    |    20 |          319 |         194 |  -126 |  -39.35 |        faster
firefox | issue2504   | 30   | Overall      |    20 |          651 |         271 |  -380 |  -58.41 |        faster
firefox | issue2504   | 30   | Page Request |    20 |            1 |           2 |     1 |   50.00 |
firefox | issue2504   | 30   | Rendering    |    20 |          649 |         269 |  -381 |  -58.60 |        faster
firefox | issue2504   | 31   | Overall      |    20 |         1635 |         647 |  -988 |  -60.42 |        faster
firefox | issue2504   | 31   | Page Request |    20 |            1 |           2 |     0 |   30.43 |
firefox | issue2504   | 31   | Rendering    |    20 |         1634 |         645 |  -988 |  -60.49 |        faster
firefox | tracemonkey | 0    | Overall      |   100 |           51 |          51 |     0 |    0.02 |
firefox | tracemonkey | 0    | Page Request |   100 |            1 |           1 |     0 |   -4.76 |
firefox | tracemonkey | 0    | Rendering    |   100 |           50 |          50 |     0 |    0.12 |
firefox | tracemonkey | 1    | Overall      |   100 |           97 |          91 |    -5 |   -5.52 |        faster
firefox | tracemonkey | 1    | Page Request |   100 |            3 |           3 |     0 |   -1.32 |
firefox | tracemonkey | 1    | Rendering    |   100 |           94 |          88 |    -5 |   -5.73 |        faster
firefox | tracemonkey | 2    | Overall      |   100 |           40 |          40 |     0 |    0.50 |
firefox | tracemonkey | 2    | Page Request |   100 |            1 |           1 |     0 |    3.16 |
firefox | tracemonkey | 2    | Rendering    |   100 |           39 |          39 |     0 |    0.54 |
firefox | tracemonkey | 3    | Overall      |   100 |           62 |          62 |    -1 |   -0.94 |
firefox | tracemonkey | 3    | Page Request |   100 |            1 |           1 |     0 |   17.05 |
firefox | tracemonkey | 3    | Rendering    |   100 |           61 |          61 |    -1 |   -1.11 |
firefox | tracemonkey | 4    | Overall      |   100 |           56 |          58 |     2 |    3.41 |
firefox | tracemonkey | 4    | Page Request |   100 |            1 |           1 |     0 |   15.31 |
firefox | tracemonkey | 4    | Rendering    |   100 |           55 |          57 |     2 |    3.23 |
firefox | tracemonkey | 5    | Overall      |   100 |           73 |          71 |    -2 |   -2.28 |
firefox | tracemonkey | 5    | Page Request |   100 |            2 |           2 |     0 |   12.20 |
firefox | tracemonkey | 5    | Rendering    |   100 |           71 |          69 |    -2 |   -2.69 |
firefox | tracemonkey | 6    | Overall      |   100 |           85 |          69 |   -16 |  -18.73 |        faster
firefox | tracemonkey | 6    | Page Request |   100 |            2 |           2 |     0 |   -9.90 |
firefox | tracemonkey | 6    | Rendering    |   100 |           83 |          67 |   -16 |  -18.97 |        faster
firefox | tracemonkey | 7    | Overall      |   100 |           65 |          64 |     0 |   -0.37 |
firefox | tracemonkey | 7    | Page Request |   100 |            1 |           1 |     0 |  -11.94 |
firefox | tracemonkey | 7    | Rendering    |   100 |           63 |          63 |     0 |   -0.05 |
firefox | tracemonkey | 8    | Overall      |   100 |           53 |          54 |     1 |    2.04 |
firefox | tracemonkey | 8    | Page Request |   100 |            1 |           1 |     0 |   17.02 |
firefox | tracemonkey | 8    | Rendering    |   100 |           52 |          53 |     1 |    1.82 |
firefox | tracemonkey | 9    | Overall      |   100 |           79 |          73 |    -6 |   -7.86 |        faster
firefox | tracemonkey | 9    | Page Request |   100 |            2 |           2 |     0 |  -15.14 |
firefox | tracemonkey | 9    | Rendering    |   100 |           77 |          71 |    -6 |   -7.86 |        faster
firefox | tracemonkey | 10   | Overall      |   100 |          545 |         519 |   -27 |   -4.86 |        faster
firefox | tracemonkey | 10   | Page Request |   100 |           14 |          13 |     0 |   -3.56 |
firefox | tracemonkey | 10   | Rendering    |   100 |          532 |         506 |   -26 |   -4.90 |        faster
firefox | tracemonkey | 11   | Overall      |   100 |           42 |          41 |    -1 |   -2.50 |
firefox | tracemonkey | 11   | Page Request |   100 |            1 |           1 |     0 |  -27.42 |        faster
firefox | tracemonkey | 11   | Rendering    |   100 |           41 |          40 |    -1 |   -1.75 |
firefox | tracemonkey | 12   | Overall      |   100 |          350 |         332 |   -18 |   -5.16 |        faster
firefox | tracemonkey | 12   | Page Request |   100 |            3 |           3 |     0 |   -5.17 |
firefox | tracemonkey | 12   | Rendering    |   100 |          347 |         329 |   -18 |   -5.15 |        faster
firefox | tracemonkey | 13   | Overall      |   100 |           31 |          31 |     0 |    0.52 |
firefox | tracemonkey | 13   | Page Request |   100 |            1 |           1 |     0 |    4.95 |
firefox | tracemonkey | 13   | Rendering    |   100 |           30 |          30 |     0 |    0.20 |
```

											
										
										
											2020-06-13 21:12:40 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            next(
 								              self
 								                .parseColorSpace({
 								                  cs: args[0],
 								                  resources,
 								                  localColorSpaceCache,
 								                })
 								                .then(function (colorSpace) {
 								                  if (colorSpace) {
 								                    stateManager.state.fillColorSpace = colorSpace;
 								                  }
 								                })
 								            );
 								            return;
 								          }
 								          case OPS.setStrokeColorSpace: {
 								            const cachedColorSpace = ColorSpace.getCached(
 								              args[0],
 								              xref,
 								              localColorSpaceCache
 								            );
 								            if (cachedColorSpace) {
 								              stateManager.state.strokeColorSpace = cachedColorSpace;
 								              continue;
-												Add local caching of `ColorSpace`s, by name, in `PartialEvaluator.getOperatorList` (issue 2504)

By caching parsed `ColorSpace`s, we thus don't need to re-parse the same data over and over which saves CPU cycles *and* reduces peak memory usage. (Obviously persistent memory usage *may* increase a tiny bit, but since the caching is done per `PartialEvaluator.getOperatorList` invocation and given that `ColorSpace` instances generally hold very little data this shouldn't be much of an issue.)
Furthermore, by caching `ColorSpace`s we can also lookup the already parsed ones *synchronously* during the `OperatorList` building, instead of having to defer to the event loop/microtask queue since the parsing is done asynchronously (such that error handling is easier).

Possible future improvements:
 - Cache/lookup parsed `ColorSpaces` used in `Pattern`s and `Image`s.
 - Attempt to cache *local* `ColorSpace`s by reference as well, in addition to only by name, assuming that there's documents where that would be beneficial and that it's not too difficult to implement.
 - Assuming there's documents that would benefit from it, also cache repeated `ColorSpace`s *globally* as well.

Given that we've never, until now, been doing *any* caching of parsed `ColorSpace`s and that even using a simple name-only *local* cache helps tremendously in pathological cases, I purposely decided against complicating the implementation too much initially.
Also, compared to parsing of `Image`s, simply creating a `ColorSpace` instance isn't that expensive (hence I'd be somewhat surprised if adding a *global* cache would help much).

---

This patch was tested using:
 - The default `tracemonkey` PDF file, which was included mostly to show that "normal" documents aren't negatively affected by these changes.
 - The PDF file from issue 2504, i.e. https://dl-ctlg.panasonic.com/jp/manual/sd/sd_rbm1000_0.pdf, where most pages will switch *thousands* of times between a handful of `ColorSpace`s.

with the following manifest file:
```
[
    {  "id": "tracemonkey",
       "file": "pdfs/tracemonkey.pdf",
       "md5": "9a192d8b1a7dc652a19835f6f08098bd",
       "rounds": 100,
       "type": "eq"
    },
    {  "id": "issue2504",
       "file": "../web/pdfs/issue2504.pdf",
       "md5": "",
       "rounds": 20,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
 - Overall
```
-- Grouped By browser, pdf, stat --
browser | pdf         | stat         | Count | Baseline(ms) | Current(ms) |  +/- |     %  | Result(P<.05)
------- | ----------- | ------------ | ----- | ------------ | ----------- | ---- | ------ | -------------
firefox | issue2504   | Overall      |   640 |          977 |         497 | -479 | -49.08 |        faster
firefox | issue2504   | Page Request |   640 |            3 |           4 |    1 |  59.18 |
firefox | issue2504   | Rendering    |   640 |          974 |         493 | -481 | -49.37 |        faster
firefox | tracemonkey | Overall      |  1400 |          116 |         111 |   -5 |  -4.43 |
firefox | tracemonkey | Page Request |  1400 |            2 |           2 |    0 |  -2.86 |
firefox | tracemonkey | Rendering    |  1400 |          114 |         109 |   -5 |  -4.47 |
```

 - Page-specific
```
-- Grouped By browser, pdf, page, stat --
browser | pdf         | page | stat         | Count | Baseline(ms) | Current(ms) |   +/- |      %  | Result(P<.05)
------- | ----------- | ---- | ------------ | ----- | ------------ | ----------- | ----- | ------- | -------------
firefox | issue2504   | 0    | Overall      |    20 |         2295 |        1268 | -1027 |  -44.76 |        faster
firefox | issue2504   | 0    | Page Request |    20 |            6 |           7 |     1 |   15.32 |
firefox | issue2504   | 0    | Rendering    |    20 |         2288 |        1260 | -1028 |  -44.93 |        faster
firefox | issue2504   | 1    | Overall      |    20 |         3059 |        2806 |  -252 |   -8.25 |        faster
firefox | issue2504   | 1    | Page Request |    20 |           11 |          14 |     3 |   23.25 |        slower
firefox | issue2504   | 1    | Rendering    |    20 |         3047 |        2792 |  -255 |   -8.37 |        faster
firefox | issue2504   | 2    | Overall      |    20 |          411 |         295 |  -116 |  -28.20 |        faster
firefox | issue2504   | 2    | Page Request |    20 |            2 |          42 |    40 | 1897.62 |
firefox | issue2504   | 2    | Rendering    |    20 |          409 |         253 |  -156 |  -38.09 |        faster
firefox | issue2504   | 3    | Overall      |    20 |          736 |         299 |  -437 |  -59.34 |        faster
firefox | issue2504   | 3    | Page Request |    20 |            2 |           2 |     0 |    0.00 |
firefox | issue2504   | 3    | Rendering    |    20 |          734 |         297 |  -437 |  -59.49 |        faster
firefox | issue2504   | 4    | Overall      |    20 |          356 |         458 |   102 |   28.63 |
firefox | issue2504   | 4    | Page Request |    20 |            1 |           2 |     1 |   57.14 |        slower
firefox | issue2504   | 4    | Rendering    |    20 |          354 |         455 |   101 |   28.53 |
firefox | issue2504   | 5    | Overall      |    20 |         1381 |         765 |  -616 |  -44.59 |        faster
firefox | issue2504   | 5    | Page Request |    20 |            3 |           5 |     2 |   50.00 |        slower
firefox | issue2504   | 5    | Rendering    |    20 |         1378 |         760 |  -617 |  -44.81 |        faster
firefox | issue2504   | 6    | Overall      |    20 |          757 |         299 |  -459 |  -60.57 |        faster
firefox | issue2504   | 6    | Page Request |    20 |            2 |           5 |     3 |  150.00 |        slower
firefox | issue2504   | 6    | Rendering    |    20 |          755 |         294 |  -462 |  -61.11 |        faster
firefox | issue2504   | 7    | Overall      |    20 |          394 |         302 |   -92 |  -23.39 |        faster
firefox | issue2504   | 7    | Page Request |    20 |            2 |           1 |    -1 |  -34.88 |        faster
firefox | issue2504   | 7    | Rendering    |    20 |          392 |         301 |   -91 |  -23.32 |        faster
firefox | issue2504   | 8    | Overall      |    20 |         2875 |         979 | -1896 |  -65.95 |        faster
firefox | issue2504   | 8    | Page Request |    20 |            1 |           2 |     0 |   11.11 |
firefox | issue2504   | 8    | Rendering    |    20 |         2874 |         978 | -1896 |  -65.99 |        faster
firefox | issue2504   | 9    | Overall      |    20 |          700 |         332 |  -368 |  -52.60 |        faster
firefox | issue2504   | 9    | Page Request |    20 |            3 |           2 |     0 |   -4.00 |
firefox | issue2504   | 9    | Rendering    |    20 |          698 |         329 |  -368 |  -52.78 |        faster
firefox | issue2504   | 10   | Overall      |    20 |         3296 |         926 | -2370 |  -71.91 |        faster
firefox | issue2504   | 10   | Page Request |    20 |            2 |           2 |     0 |  -18.75 |
firefox | issue2504   | 10   | Rendering    |    20 |         3293 |         924 | -2370 |  -71.96 |        faster
firefox | issue2504   | 11   | Overall      |    20 |          524 |         197 |  -327 |  -62.34 |        faster
firefox | issue2504   | 11   | Page Request |    20 |            2 |           3 |     1 |   58.54 |
firefox | issue2504   | 11   | Rendering    |    20 |          522 |         194 |  -328 |  -62.81 |        faster
firefox | issue2504   | 12   | Overall      |    20 |          752 |         369 |  -384 |  -50.98 |        faster
firefox | issue2504   | 12   | Page Request |    20 |            3 |           2 |    -1 |  -36.51 |        faster
firefox | issue2504   | 12   | Rendering    |    20 |          749 |         367 |  -382 |  -51.05 |        faster
firefox | issue2504   | 13   | Overall      |    20 |          679 |         487 |  -193 |  -28.38 |        faster
firefox | issue2504   | 13   | Page Request |    20 |            4 |           2 |    -2 |  -48.68 |        faster
firefox | issue2504   | 13   | Rendering    |    20 |          676 |         485 |  -191 |  -28.28 |        faster
firefox | issue2504   | 14   | Overall      |    20 |          474 |         283 |  -191 |  -40.26 |        faster
firefox | issue2504   | 14   | Page Request |    20 |            2 |           4 |     2 |   78.57 |
firefox | issue2504   | 14   | Rendering    |    20 |          471 |         279 |  -192 |  -40.79 |        faster
firefox | issue2504   | 15   | Overall      |    20 |          860 |         618 |  -241 |  -28.05 |        faster
firefox | issue2504   | 15   | Page Request |    20 |            2 |           3 |     0 |   10.87 |
firefox | issue2504   | 15   | Rendering    |    20 |          857 |         616 |  -241 |  -28.15 |        faster
firefox | issue2504   | 16   | Overall      |    20 |          389 |         243 |  -147 |  -37.71 |        faster
firefox | issue2504   | 16   | Page Request |    20 |            2 |           2 |     0 |    2.33 |
firefox | issue2504   | 16   | Rendering    |    20 |          387 |         240 |  -147 |  -37.94 |        faster
firefox | issue2504   | 17   | Overall      |    20 |         1484 |         672 |  -812 |  -54.70 |        faster
firefox | issue2504   | 17   | Page Request |    20 |            2 |           3 |     1 |   37.21 |
firefox | issue2504   | 17   | Rendering    |    20 |         1482 |         669 |  -812 |  -54.84 |        faster
firefox | issue2504   | 18   | Overall      |    20 |          575 |         252 |  -323 |  -56.12 |        faster
firefox | issue2504   | 18   | Page Request |    20 |            2 |           2 |     0 |  -16.22 |
firefox | issue2504   | 18   | Rendering    |    20 |          573 |         251 |  -322 |  -56.24 |        faster
firefox | issue2504   | 19   | Overall      |    20 |          517 |         227 |  -290 |  -56.08 |        faster
firefox | issue2504   | 19   | Page Request |    20 |            2 |           2 |     0 |   21.62 |
firefox | issue2504   | 19   | Rendering    |    20 |          515 |         225 |  -290 |  -56.37 |        faster
firefox | issue2504   | 20   | Overall      |    20 |          668 |         670 |     2 |    0.31 |
firefox | issue2504   | 20   | Page Request |    20 |            4 |           2 |    -1 |  -34.29 |
firefox | issue2504   | 20   | Rendering    |    20 |          664 |         667 |     3 |    0.49 |
firefox | issue2504   | 21   | Overall      |    20 |          486 |         309 |  -177 |  -36.44 |        faster
firefox | issue2504   | 21   | Page Request |    20 |            2 |           2 |     0 |   16.13 |
firefox | issue2504   | 21   | Rendering    |    20 |          484 |         307 |  -177 |  -36.60 |        faster
firefox | issue2504   | 22   | Overall      |    20 |          543 |         267 |  -276 |  -50.85 |        faster
firefox | issue2504   | 22   | Page Request |    20 |            2 |           2 |     0 |   10.26 |
firefox | issue2504   | 22   | Rendering    |    20 |          541 |         265 |  -276 |  -51.07 |        faster
firefox | issue2504   | 23   | Overall      |    20 |         3246 |         871 | -2375 |  -73.17 |        faster
firefox | issue2504   | 23   | Page Request |    20 |            2 |           3 |     1 |   37.21 |
firefox | issue2504   | 23   | Rendering    |    20 |         3243 |         868 | -2376 |  -73.25 |        faster
firefox | issue2504   | 24   | Overall      |    20 |          379 |         156 |  -223 |  -58.83 |        faster
firefox | issue2504   | 24   | Page Request |    20 |            2 |           2 |     0 |   -2.86 |
firefox | issue2504   | 24   | Rendering    |    20 |          378 |         154 |  -223 |  -59.10 |        faster
firefox | issue2504   | 25   | Overall      |    20 |          176 |         127 |   -50 |  -28.19 |        faster
firefox | issue2504   | 25   | Page Request |    20 |            2 |           1 |     0 |  -15.63 |
firefox | issue2504   | 25   | Rendering    |    20 |          175 |         125 |   -49 |  -28.31 |        faster
firefox | issue2504   | 26   | Overall      |    20 |          181 |         108 |   -74 |  -40.67 |        faster
firefox | issue2504   | 26   | Page Request |    20 |            3 |           2 |    -1 |  -39.13 |        faster
firefox | issue2504   | 26   | Rendering    |    20 |          178 |         105 |   -72 |  -40.69 |        faster
firefox | issue2504   | 27   | Overall      |    20 |          208 |         104 |  -104 |  -49.92 |        faster
firefox | issue2504   | 27   | Page Request |    20 |            2 |           2 |     1 |   48.39 |
firefox | issue2504   | 27   | Rendering    |    20 |          206 |         102 |  -104 |  -50.64 |        faster
firefox | issue2504   | 28   | Overall      |    20 |          241 |         111 |  -131 |  -54.16 |        faster
firefox | issue2504   | 28   | Page Request |    20 |            2 |           2 |    -1 |  -33.33 |
firefox | issue2504   | 28   | Rendering    |    20 |          239 |         109 |  -130 |  -54.39 |        faster
firefox | issue2504   | 29   | Overall      |    20 |          321 |         196 |  -125 |  -39.05 |        faster
firefox | issue2504   | 29   | Page Request |    20 |            1 |           2 |     0 |   17.86 |
firefox | issue2504   | 29   | Rendering    |    20 |          319 |         194 |  -126 |  -39.35 |        faster
firefox | issue2504   | 30   | Overall      |    20 |          651 |         271 |  -380 |  -58.41 |        faster
firefox | issue2504   | 30   | Page Request |    20 |            1 |           2 |     1 |   50.00 |
firefox | issue2504   | 30   | Rendering    |    20 |          649 |         269 |  -381 |  -58.60 |        faster
firefox | issue2504   | 31   | Overall      |    20 |         1635 |         647 |  -988 |  -60.42 |        faster
firefox | issue2504   | 31   | Page Request |    20 |            1 |           2 |     0 |   30.43 |
firefox | issue2504   | 31   | Rendering    |    20 |         1634 |         645 |  -988 |  -60.49 |        faster
firefox | tracemonkey | 0    | Overall      |   100 |           51 |          51 |     0 |    0.02 |
firefox | tracemonkey | 0    | Page Request |   100 |            1 |           1 |     0 |   -4.76 |
firefox | tracemonkey | 0    | Rendering    |   100 |           50 |          50 |     0 |    0.12 |
firefox | tracemonkey | 1    | Overall      |   100 |           97 |          91 |    -5 |   -5.52 |        faster
firefox | tracemonkey | 1    | Page Request |   100 |            3 |           3 |     0 |   -1.32 |
firefox | tracemonkey | 1    | Rendering    |   100 |           94 |          88 |    -5 |   -5.73 |        faster
firefox | tracemonkey | 2    | Overall      |   100 |           40 |          40 |     0 |    0.50 |
firefox | tracemonkey | 2    | Page Request |   100 |            1 |           1 |     0 |    3.16 |
firefox | tracemonkey | 2    | Rendering    |   100 |           39 |          39 |     0 |    0.54 |
firefox | tracemonkey | 3    | Overall      |   100 |           62 |          62 |    -1 |   -0.94 |
firefox | tracemonkey | 3    | Page Request |   100 |            1 |           1 |     0 |   17.05 |
firefox | tracemonkey | 3    | Rendering    |   100 |           61 |          61 |    -1 |   -1.11 |
firefox | tracemonkey | 4    | Overall      |   100 |           56 |          58 |     2 |    3.41 |
firefox | tracemonkey | 4    | Page Request |   100 |            1 |           1 |     0 |   15.31 |
firefox | tracemonkey | 4    | Rendering    |   100 |           55 |          57 |     2 |    3.23 |
firefox | tracemonkey | 5    | Overall      |   100 |           73 |          71 |    -2 |   -2.28 |
firefox | tracemonkey | 5    | Page Request |   100 |            2 |           2 |     0 |   12.20 |
firefox | tracemonkey | 5    | Rendering    |   100 |           71 |          69 |    -2 |   -2.69 |
firefox | tracemonkey | 6    | Overall      |   100 |           85 |          69 |   -16 |  -18.73 |        faster
firefox | tracemonkey | 6    | Page Request |   100 |            2 |           2 |     0 |   -9.90 |
firefox | tracemonkey | 6    | Rendering    |   100 |           83 |          67 |   -16 |  -18.97 |        faster
firefox | tracemonkey | 7    | Overall      |   100 |           65 |          64 |     0 |   -0.37 |
firefox | tracemonkey | 7    | Page Request |   100 |            1 |           1 |     0 |  -11.94 |
firefox | tracemonkey | 7    | Rendering    |   100 |           63 |          63 |     0 |   -0.05 |
firefox | tracemonkey | 8    | Overall      |   100 |           53 |          54 |     1 |    2.04 |
firefox | tracemonkey | 8    | Page Request |   100 |            1 |           1 |     0 |   17.02 |
firefox | tracemonkey | 8    | Rendering    |   100 |           52 |          53 |     1 |    1.82 |
firefox | tracemonkey | 9    | Overall      |   100 |           79 |          73 |    -6 |   -7.86 |        faster
firefox | tracemonkey | 9    | Page Request |   100 |            2 |           2 |     0 |  -15.14 |
firefox | tracemonkey | 9    | Rendering    |   100 |           77 |          71 |    -6 |   -7.86 |        faster
firefox | tracemonkey | 10   | Overall      |   100 |          545 |         519 |   -27 |   -4.86 |        faster
firefox | tracemonkey | 10   | Page Request |   100 |           14 |          13 |     0 |   -3.56 |
firefox | tracemonkey | 10   | Rendering    |   100 |          532 |         506 |   -26 |   -4.90 |        faster
firefox | tracemonkey | 11   | Overall      |   100 |           42 |          41 |    -1 |   -2.50 |
firefox | tracemonkey | 11   | Page Request |   100 |            1 |           1 |     0 |  -27.42 |        faster
firefox | tracemonkey | 11   | Rendering    |   100 |           41 |          40 |    -1 |   -1.75 |
firefox | tracemonkey | 12   | Overall      |   100 |          350 |         332 |   -18 |   -5.16 |        faster
firefox | tracemonkey | 12   | Page Request |   100 |            3 |           3 |     0 |   -5.17 |
firefox | tracemonkey | 12   | Rendering    |   100 |          347 |         329 |   -18 |   -5.15 |        faster
firefox | tracemonkey | 13   | Overall      |   100 |           31 |          31 |     0 |    0.52 |
firefox | tracemonkey | 13   | Page Request |   100 |            1 |           1 |     0 |    4.95 |
firefox | tracemonkey | 13   | Rendering    |   100 |           30 |          30 |     0 |    0.20 |
```

											
										
										
											2020-06-13 21:12:40 +09:00
+								            }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            next(
 								              self
 								                .parseColorSpace({
 								                  cs: args[0],
 								                  resources,
 								                  localColorSpaceCache,
 								                })
 								                .then(function (colorSpace) {
 								                  if (colorSpace) {
 								                    stateManager.state.strokeColorSpace = colorSpace;
 								                  }
 								                })
 								            );
 								            return;
 								          }
 								          case OPS.setFillColor:
 								            cs = stateManager.state.fillColorSpace;
 								            args = cs.getRgb(args, 0);
 								            fn = OPS.setFillRGBColor;
 								            break;
 								          case OPS.setStrokeColor:
 								            cs = stateManager.state.strokeColorSpace;
 								            args = cs.getRgb(args, 0);
 								            fn = OPS.setStrokeRGBColor;
 								            break;
 								          case OPS.setFillGray:
 								            stateManager.state.fillColorSpace = ColorSpace.singletons.gray;
 								            args = ColorSpace.singletons.gray.getRgb(args, 0);
 								            fn = OPS.setFillRGBColor;
 								            break;
 								          case OPS.setStrokeGray:
 								            stateManager.state.strokeColorSpace = ColorSpace.singletons.gray;
 								            args = ColorSpace.singletons.gray.getRgb(args, 0);
 								            fn = OPS.setStrokeRGBColor;
 								            break;
 								          case OPS.setFillCMYKColor:
 								            stateManager.state.fillColorSpace = ColorSpace.singletons.cmyk;
 								            args = ColorSpace.singletons.cmyk.getRgb(args, 0);
 								            fn = OPS.setFillRGBColor;
 								            break;
 								          case OPS.setStrokeCMYKColor:
 								            stateManager.state.strokeColorSpace = ColorSpace.singletons.cmyk;
 								            args = ColorSpace.singletons.cmyk.getRgb(args, 0);
 								            fn = OPS.setStrokeRGBColor;
 								            break;
 								          case OPS.setFillRGBColor:
 								            stateManager.state.fillColorSpace = ColorSpace.singletons.rgb;
 								            args = ColorSpace.singletons.rgb.getRgb(args, 0);
 								            break;
 								          case OPS.setStrokeRGBColor:
 								            stateManager.state.strokeColorSpace = ColorSpace.singletons.rgb;
 								            args = ColorSpace.singletons.rgb.getRgb(args, 0);
 								            break;
 								          case OPS.setFillColorN:
 								            cs = stateManager.state.fillColorSpace;
 								            if (cs.name === "Pattern") {
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								              next(
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                self.handleColorN(
 								                  operatorList,
 								                  OPS.setFillColorN,
 								                  args,
 								                  cs,
 								                  patterns,
 								                  resources,
 								                  task,
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								                  localColorSpaceCache,
-												Improve performance of reused patterns.

Bug 1721218 has a shading pattern that was used thousands of times.
To improve performance of this PDF:
 - add a cache for patterns in the evaluator and only send the IR form once
   to the main thread (this also makes caching in canvas easier)
 - cache the created canvas radial/axial patterns
 - for shading fill radial/axial use the pattern directly instead of creating temporary
   canvas

											
										
										
											2021-07-22 04:27:39 +09:00
+								                  localTilingPatternCache,
 								                  localShadingPatternCache
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                )
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								              );
-												Allow skipping of errors when parsing broken/unsupported ColorSpaces (issue 6707, issue 11287)

This will allow us to attempt to recover as much as possible of a page, rather than immediately failing, when a broken/unsupported ColorSpace is encountered. This patch thus extends the framework added in PRs such as e.g. 8240 and 8922, to also cover parsing of ColorSpaces.

											
										
										
											2019-10-31 23:53:51 +09:00
+								              return;
-												Add local caching of `ColorSpace`s, by name, in `PartialEvaluator.getOperatorList` (issue 2504)

By caching parsed `ColorSpace`s, we thus don't need to re-parse the same data over and over which saves CPU cycles *and* reduces peak memory usage. (Obviously persistent memory usage *may* increase a tiny bit, but since the caching is done per `PartialEvaluator.getOperatorList` invocation and given that `ColorSpace` instances generally hold very little data this shouldn't be much of an issue.)
Furthermore, by caching `ColorSpace`s we can also lookup the already parsed ones *synchronously* during the `OperatorList` building, instead of having to defer to the event loop/microtask queue since the parsing is done asynchronously (such that error handling is easier).

Possible future improvements:
 - Cache/lookup parsed `ColorSpaces` used in `Pattern`s and `Image`s.
 - Attempt to cache *local* `ColorSpace`s by reference as well, in addition to only by name, assuming that there's documents where that would be beneficial and that it's not too difficult to implement.
 - Assuming there's documents that would benefit from it, also cache repeated `ColorSpace`s *globally* as well.

Given that we've never, until now, been doing *any* caching of parsed `ColorSpace`s and that even using a simple name-only *local* cache helps tremendously in pathological cases, I purposely decided against complicating the implementation too much initially.
Also, compared to parsing of `Image`s, simply creating a `ColorSpace` instance isn't that expensive (hence I'd be somewhat surprised if adding a *global* cache would help much).

---

This patch was tested using:
 - The default `tracemonkey` PDF file, which was included mostly to show that "normal" documents aren't negatively affected by these changes.
 - The PDF file from issue 2504, i.e. https://dl-ctlg.panasonic.com/jp/manual/sd/sd_rbm1000_0.pdf, where most pages will switch *thousands* of times between a handful of `ColorSpace`s.

with the following manifest file:
```
[
    {  "id": "tracemonkey",
       "file": "pdfs/tracemonkey.pdf",
       "md5": "9a192d8b1a7dc652a19835f6f08098bd",
       "rounds": 100,
       "type": "eq"
    },
    {  "id": "issue2504",
       "file": "../web/pdfs/issue2504.pdf",
       "md5": "",
       "rounds": 20,
       "type": "eq"
    }
]
```

which gave the following results when comparing this patch against the `master` branch:
 - Overall
```
-- Grouped By browser, pdf, stat --
browser | pdf         | stat         | Count | Baseline(ms) | Current(ms) |  +/- |     %  | Result(P<.05)
------- | ----------- | ------------ | ----- | ------------ | ----------- | ---- | ------ | -------------
firefox | issue2504   | Overall      |   640 |          977 |         497 | -479 | -49.08 |        faster
firefox | issue2504   | Page Request |   640 |            3 |           4 |    1 |  59.18 |
firefox | issue2504   | Rendering    |   640 |          974 |         493 | -481 | -49.37 |        faster
firefox | tracemonkey | Overall      |  1400 |          116 |         111 |   -5 |  -4.43 |
firefox | tracemonkey | Page Request |  1400 |            2 |           2 |    0 |  -2.86 |
firefox | tracemonkey | Rendering    |  1400 |          114 |         109 |   -5 |  -4.47 |
```

 - Page-specific
```
-- Grouped By browser, pdf, page, stat --
browser | pdf         | page | stat         | Count | Baseline(ms) | Current(ms) |   +/- |      %  | Result(P<.05)
------- | ----------- | ---- | ------------ | ----- | ------------ | ----------- | ----- | ------- | -------------
firefox | issue2504   | 0    | Overall      |    20 |         2295 |        1268 | -1027 |  -44.76 |        faster
firefox | issue2504   | 0    | Page Request |    20 |            6 |           7 |     1 |   15.32 |
firefox | issue2504   | 0    | Rendering    |    20 |         2288 |        1260 | -1028 |  -44.93 |        faster
firefox | issue2504   | 1    | Overall      |    20 |         3059 |        2806 |  -252 |   -8.25 |        faster
firefox | issue2504   | 1    | Page Request |    20 |           11 |          14 |     3 |   23.25 |        slower
firefox | issue2504   | 1    | Rendering    |    20 |         3047 |        2792 |  -255 |   -8.37 |        faster
firefox | issue2504   | 2    | Overall      |    20 |          411 |         295 |  -116 |  -28.20 |        faster
firefox | issue2504   | 2    | Page Request |    20 |            2 |          42 |    40 | 1897.62 |
firefox | issue2504   | 2    | Rendering    |    20 |          409 |         253 |  -156 |  -38.09 |        faster
firefox | issue2504   | 3    | Overall      |    20 |          736 |         299 |  -437 |  -59.34 |        faster
firefox | issue2504   | 3    | Page Request |    20 |            2 |           2 |     0 |    0.00 |
firefox | issue2504   | 3    | Rendering    |    20 |          734 |         297 |  -437 |  -59.49 |        faster
firefox | issue2504   | 4    | Overall      |    20 |          356 |         458 |   102 |   28.63 |
firefox | issue2504   | 4    | Page Request |    20 |            1 |           2 |     1 |   57.14 |        slower
firefox | issue2504   | 4    | Rendering    |    20 |          354 |         455 |   101 |   28.53 |
firefox | issue2504   | 5    | Overall      |    20 |         1381 |         765 |  -616 |  -44.59 |        faster
firefox | issue2504   | 5    | Page Request |    20 |            3 |           5 |     2 |   50.00 |        slower
firefox | issue2504   | 5    | Rendering    |    20 |         1378 |         760 |  -617 |  -44.81 |        faster
firefox | issue2504   | 6    | Overall      |    20 |          757 |         299 |  -459 |  -60.57 |        faster
firefox | issue2504   | 6    | Page Request |    20 |            2 |           5 |     3 |  150.00 |        slower
firefox | issue2504   | 6    | Rendering    |    20 |          755 |         294 |  -462 |  -61.11 |        faster
firefox | issue2504   | 7    | Overall      |    20 |          394 |         302 |   -92 |  -23.39 |        faster
firefox | issue2504   | 7    | Page Request |    20 |            2 |           1 |    -1 |  -34.88 |        faster
firefox | issue2504   | 7    | Rendering    |    20 |          392 |         301 |   -91 |  -23.32 |        faster
firefox | issue2504   | 8    | Overall      |    20 |         2875 |         979 | -1896 |  -65.95 |        faster
firefox | issue2504   | 8    | Page Request |    20 |            1 |           2 |     0 |   11.11 |
firefox | issue2504   | 8    | Rendering    |    20 |         2874 |         978 | -1896 |  -65.99 |        faster
firefox | issue2504   | 9    | Overall      |    20 |          700 |         332 |  -368 |  -52.60 |        faster
firefox | issue2504   | 9    | Page Request |    20 |            3 |           2 |     0 |   -4.00 |
firefox | issue2504   | 9    | Rendering    |    20 |          698 |         329 |  -368 |  -52.78 |        faster
firefox | issue2504   | 10   | Overall      |    20 |         3296 |         926 | -2370 |  -71.91 |        faster
firefox | issue2504   | 10   | Page Request |    20 |            2 |           2 |     0 |  -18.75 |
firefox | issue2504   | 10   | Rendering    |    20 |         3293 |         924 | -2370 |  -71.96 |        faster
firefox | issue2504   | 11   | Overall      |    20 |          524 |         197 |  -327 |  -62.34 |        faster
firefox | issue2504   | 11   | Page Request |    20 |            2 |           3 |     1 |   58.54 |
firefox | issue2504   | 11   | Rendering    |    20 |          522 |         194 |  -328 |  -62.81 |        faster
firefox | issue2504   | 12   | Overall      |    20 |          752 |         369 |  -384 |  -50.98 |        faster
firefox | issue2504   | 12   | Page Request |    20 |            3 |           2 |    -1 |  -36.51 |        faster
firefox | issue2504   | 12   | Rendering    |    20 |          749 |         367 |  -382 |  -51.05 |        faster
firefox | issue2504   | 13   | Overall      |    20 |          679 |         487 |  -193 |  -28.38 |        faster
firefox | issue2504   | 13   | Page Request |    20 |            4 |           2 |    -2 |  -48.68 |        faster
firefox | issue2504   | 13   | Rendering    |    20 |          676 |         485 |  -191 |  -28.28 |        faster
firefox | issue2504   | 14   | Overall      |    20 |          474 |         283 |  -191 |  -40.26 |        faster
firefox | issue2504   | 14   | Page Request |    20 |            2 |           4 |     2 |   78.57 |
firefox | issue2504   | 14   | Rendering    |    20 |          471 |         279 |  -192 |  -40.79 |        faster
firefox | issue2504   | 15   | Overall      |    20 |          860 |         618 |  -241 |  -28.05 |        faster
firefox | issue2504   | 15   | Page Request |    20 |            2 |           3 |     0 |   10.87 |
firefox | issue2504   | 15   | Rendering    |    20 |          857 |         616 |  -241 |  -28.15 |        faster
firefox | issue2504   | 16   | Overall      |    20 |          389 |         243 |  -147 |  -37.71 |        faster
firefox | issue2504   | 16   | Page Request |    20 |            2 |           2 |     0 |    2.33 |
firefox | issue2504   | 16   | Rendering    |    20 |          387 |         240 |  -147 |  -37.94 |        faster
firefox | issue2504   | 17   | Overall      |    20 |         1484 |         672 |  -812 |  -54.70 |        faster
firefox | issue2504   | 17   | Page Request |    20 |            2 |           3 |     1 |   37.21 |
firefox | issue2504   | 17   | Rendering    |    20 |         1482 |         669 |  -812 |  -54.84 |        faster
firefox | issue2504   | 18   | Overall      |    20 |          575 |         252 |  -323 |  -56.12 |        faster
firefox | issue2504   | 18   | Page Request |    20 |            2 |           2 |     0 |  -16.22 |
firefox | issue2504   | 18   | Rendering    |    20 |          573 |         251 |  -322 |  -56.24 |        faster
firefox | issue2504   | 19   | Overall      |    20 |          517 |         227 |  -290 |  -56.08 |        faster
firefox | issue2504   | 19   | Page Request |    20 |            2 |           2 |     0 |   21.62 |
firefox | issue2504   | 19   | Rendering    |    20 |          515 |         225 |  -290 |  -56.37 |        faster
firefox | issue2504   | 20   | Overall      |    20 |          668 |         670 |     2 |    0.31 |
firefox | issue2504   | 20   | Page Request |    20 |            4 |           2 |    -1 |  -34.29 |
firefox | issue2504   | 20   | Rendering    |    20 |          664 |         667 |     3 |    0.49 |
firefox | issue2504   | 21   | Overall      |    20 |          486 |         309 |  -177 |  -36.44 |        faster
firefox | issue2504   | 21   | Page Request |    20 |            2 |           2 |     0 |   16.13 |
firefox | issue2504   | 21   | Rendering    |    20 |          484 |         307 |  -177 |  -36.60 |        faster
firefox | issue2504   | 22   | Overall      |    20 |          543 |         267 |  -276 |  -50.85 |        faster
firefox | issue2504   | 22   | Page Request |    20 |            2 |           2 |     0 |   10.26 |
firefox | issue2504   | 22   | Rendering    |    20 |          541 |         265 |  -276 |  -51.07 |        faster
firefox | issue2504   | 23   | Overall      |    20 |         3246 |         871 | -2375 |  -73.17 |        faster
firefox | issue2504   | 23   | Page Request |    20 |            2 |           3 |     1 |   37.21 |
firefox | issue2504   | 23   | Rendering    |    20 |         3243 |         868 | -2376 |  -73.25 |        faster
firefox | issue2504   | 24   | Overall      |    20 |          379 |         156 |  -223 |  -58.83 |        faster
firefox | issue2504   | 24   | Page Request |    20 |            2 |           2 |     0 |   -2.86 |
firefox | issue2504   | 24   | Rendering    |    20 |          378 |         154 |  -223 |  -59.10 |        faster
firefox | issue2504   | 25   | Overall      |    20 |          176 |         127 |   -50 |  -28.19 |        faster
firefox | issue2504   | 25   | Page Request |    20 |            2 |           1 |     0 |  -15.63 |
firefox | issue2504   | 25   | Rendering    |    20 |          175 |         125 |   -49 |  -28.31 |        faster
firefox | issue2504   | 26   | Overall      |    20 |          181 |         108 |   -74 |  -40.67 |        faster
firefox | issue2504   | 26   | Page Request |    20 |            3 |           2 |    -1 |  -39.13 |        faster
firefox | issue2504   | 26   | Rendering    |    20 |          178 |         105 |   -72 |  -40.69 |        faster
firefox | issue2504   | 27   | Overall      |    20 |          208 |         104 |  -104 |  -49.92 |        faster
firefox | issue2504   | 27   | Page Request |    20 |            2 |           2 |     1 |   48.39 |
firefox | issue2504   | 27   | Rendering    |    20 |          206 |         102 |  -104 |  -50.64 |        faster
firefox | issue2504   | 28   | Overall      |    20 |          241 |         111 |  -131 |  -54.16 |        faster
firefox | issue2504   | 28   | Page Request |    20 |            2 |           2 |    -1 |  -33.33 |
firefox | issue2504   | 28   | Rendering    |    20 |          239 |         109 |  -130 |  -54.39 |        faster
firefox | issue2504   | 29   | Overall      |    20 |          321 |         196 |  -125 |  -39.05 |        faster
firefox | issue2504   | 29   | Page Request |    20 |            1 |           2 |     0 |   17.86 |
firefox | issue2504   | 29   | Rendering    |    20 |          319 |         194 |  -126 |  -39.35 |        faster
firefox | issue2504   | 30   | Overall      |    20 |          651 |         271 |  -380 |  -58.41 |        faster
firefox | issue2504   | 30   | Page Request |    20 |            1 |           2 |     1 |   50.00 |
firefox | issue2504   | 30   | Rendering    |    20 |          649 |         269 |  -381 |  -58.60 |        faster
firefox | issue2504   | 31   | Overall      |    20 |         1635 |         647 |  -988 |  -60.42 |        faster
firefox | issue2504   | 31   | Page Request |    20 |            1 |           2 |     0 |   30.43 |
firefox | issue2504   | 31   | Rendering    |    20 |         1634 |         645 |  -988 |  -60.49 |        faster
firefox | tracemonkey | 0    | Overall      |   100 |           51 |          51 |     0 |    0.02 |
firefox | tracemonkey | 0    | Page Request |   100 |            1 |           1 |     0 |   -4.76 |
firefox | tracemonkey | 0    | Rendering    |   100 |           50 |          50 |     0 |    0.12 |
firefox | tracemonkey | 1    | Overall      |   100 |           97 |          91 |    -5 |   -5.52 |        faster
firefox | tracemonkey | 1    | Page Request |   100 |            3 |           3 |     0 |   -1.32 |
firefox | tracemonkey | 1    | Rendering    |   100 |           94 |          88 |    -5 |   -5.73 |        faster
firefox | tracemonkey | 2    | Overall      |   100 |           40 |          40 |     0 |    0.50 |
firefox | tracemonkey | 2    | Page Request |   100 |            1 |           1 |     0 |    3.16 |
firefox | tracemonkey | 2    | Rendering    |   100 |           39 |          39 |     0 |    0.54 |
firefox | tracemonkey | 3    | Overall      |   100 |           62 |          62 |    -1 |   -0.94 |
firefox | tracemonkey | 3    | Page Request |   100 |            1 |           1 |     0 |   17.05 |
firefox | tracemonkey | 3    | Rendering    |   100 |           61 |          61 |    -1 |   -1.11 |
firefox | tracemonkey | 4    | Overall      |   100 |           56 |          58 |     2 |    3.41 |
firefox | tracemonkey | 4    | Page Request |   100 |            1 |           1 |     0 |   15.31 |
firefox | tracemonkey | 4    | Rendering    |   100 |           55 |          57 |     2 |    3.23 |
firefox | tracemonkey | 5    | Overall      |   100 |           73 |          71 |    -2 |   -2.28 |
firefox | tracemonkey | 5    | Page Request |   100 |            2 |           2 |     0 |   12.20 |
firefox | tracemonkey | 5    | Rendering    |   100 |           71 |          69 |    -2 |   -2.69 |
firefox | tracemonkey | 6    | Overall      |   100 |           85 |          69 |   -16 |  -18.73 |        faster
firefox | tracemonkey | 6    | Page Request |   100 |            2 |           2 |     0 |   -9.90 |
firefox | tracemonkey | 6    | Rendering    |   100 |           83 |          67 |   -16 |  -18.97 |        faster
firefox | tracemonkey | 7    | Overall      |   100 |           65 |          64 |     0 |   -0.37 |
firefox | tracemonkey | 7    | Page Request |   100 |            1 |           1 |     0 |  -11.94 |
firefox | tracemonkey | 7    | Rendering    |   100 |           63 |          63 |     0 |   -0.05 |
firefox | tracemonkey | 8    | Overall      |   100 |           53 |          54 |     1 |    2.04 |
firefox | tracemonkey | 8    | Page Request |   100 |            1 |           1 |     0 |   17.02 |
firefox | tracemonkey | 8    | Rendering    |   100 |           52 |          53 |     1 |    1.82 |
firefox | tracemonkey | 9    | Overall      |   100 |           79 |          73 |    -6 |   -7.86 |        faster
firefox | tracemonkey | 9    | Page Request |   100 |            2 |           2 |     0 |  -15.14 |
firefox | tracemonkey | 9    | Rendering    |   100 |           77 |          71 |    -6 |   -7.86 |        faster
firefox | tracemonkey | 10   | Overall      |   100 |          545 |         519 |   -27 |   -4.86 |        faster
firefox | tracemonkey | 10   | Page Request |   100 |           14 |          13 |     0 |   -3.56 |
firefox | tracemonkey | 10   | Rendering    |   100 |          532 |         506 |   -26 |   -4.90 |        faster
firefox | tracemonkey | 11   | Overall      |   100 |           42 |          41 |    -1 |   -2.50 |
firefox | tracemonkey | 11   | Page Request |   100 |            1 |           1 |     0 |  -27.42 |        faster
firefox | tracemonkey | 11   | Rendering    |   100 |           41 |          40 |    -1 |   -1.75 |
firefox | tracemonkey | 12   | Overall      |   100 |          350 |         332 |   -18 |   -5.16 |        faster
firefox | tracemonkey | 12   | Page Request |   100 |            3 |           3 |     0 |   -5.17 |
firefox | tracemonkey | 12   | Rendering    |   100 |          347 |         329 |   -18 |   -5.15 |        faster
firefox | tracemonkey | 13   | Overall      |   100 |           31 |          31 |     0 |    0.52 |
firefox | tracemonkey | 13   | Page Request |   100 |            1 |           1 |     0 |    4.95 |
firefox | tracemonkey | 13   | Rendering    |   100 |           30 |          30 |     0 |    0.20 |
```

											
										
										
											2020-06-13 21:12:40 +09:00
+								            }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            args = cs.getRgb(args, 0);
 								            fn = OPS.setFillRGBColor;
 								            break;
 								          case OPS.setStrokeColorN:
 								            cs = stateManager.state.strokeColorSpace;
 								            if (cs.name === "Pattern") {
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								              next(
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                self.handleColorN(
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								                  operatorList,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  OPS.setStrokeColorN,
 								                  args,
 								                  cs,
 								                  patterns,
 								                  resources,
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								                  task,
-												Add local caching of TilingPatterns in `PartialEvaluator.getOperatorList` (issue 2765 and 8473)

In practice it's not uncommon for PDF documents to re-use the same TilingPatterns more than once, and parsing them is essentially equal to parsing of a (small) page since a `getOperatorList` call is required.

By caching the internal TilingPattern representation we can thus avoid having to re-parse the same data over and over, and there's also *less* asynchronous parsing required for repeated TilingPatterns.

Initially I had intended to include (standard) benchmark results with this patch, however it's not entirely clear that this is actually necessary here given the preliminary results.
When testing this manually in the development viewer, using `pdfBug=Stats`, the following (approximate) reduction in rendering times were observed when comparing `master` against this patch:
 - http://pubs.usgs.gov/sim/3067/pdf/sim3067sheet-2.pdf (from issue 2765): `6800 ms` -> `4100 ms`.
 - https://github.com/mozilla/pdf.js/files/1046131/stepped.pdf (from issue 8473): `54000 ms` -> `13000 ms`
 - https://github.com/mozilla/pdf.js/files/1046130/proof.pdf (from issue 8473): `5900 ms` -> `2500 ms`

As always, whenever you're dealing with documents which are "slow", there's usually a certain level of subjectivity involved with regards to what's deemed acceptable performance.
Hence it's not clear to me that we want to regard any of the referenced issues as fixed, however the improvements are significant enough to warrant caching of TilingPatterns in my opinion.

											
										
										
											2020-10-09 00:33:23 +09:00
+								                  localColorSpaceCache,
-												Improve performance of reused patterns.

Bug 1721218 has a shading pattern that was used thousands of times.
To improve performance of this PDF:
 - add a cache for patterns in the evaluator and only send the IR form once
   to the main thread (this also makes caching in canvas easier)
 - cache the created canvas radial/axial patterns
 - for shading fill radial/axial use the pattern directly instead of creating temporary
   canvas

											
										
										
											2021-07-22 04:27:39 +09:00
+								                  localTilingPatternCache,
 								                  localShadingPatternCache
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								                )
 								              );
-												Better errors capturing at the core and stop rendering on error.

											
										
										
											2016-03-11 22:59:09 +09:00
+								              return;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            }
 								            args = cs.getRgb(args, 0);
 								            fn = OPS.setStrokeRGBColor;
 								            break;
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								          case OPS.shadingFill:
 								            var shadingRes = resources.get("Shading");
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            if (!shadingRes) {
 								              throw new FormatError("No shading resource found");
 								            }
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								            var shading = shadingRes.get(args[0].name);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            if (!shading) {
 								              throw new FormatError("No shading object found");
 								            }
-												Improve performance of reused patterns.

Bug 1721218 has a shading pattern that was used thousands of times.
To improve performance of this PDF:
 - add a cache for patterns in the evaluator and only send the IR form once
   to the main thread (this also makes caching in canvas easier)
 - cache the created canvas radial/axial patterns
 - for shading fill radial/axial use the pattern directly instead of creating temporary
   canvas

											
										
										
											2021-07-22 04:27:39 +09:00
+								            const patternId = self.parseShading({
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              shading,
 								              resources,
-												Improve performance of reused patterns.

Bug 1721218 has a shading pattern that was used thousands of times.
To improve performance of this PDF:
 - add a cache for patterns in the evaluator and only send the IR form once
   to the main thread (this also makes caching in canvas easier)
 - cache the created canvas radial/axial patterns
 - for shading fill radial/axial use the pattern directly instead of creating temporary
   canvas

											
										
										
											2021-07-22 04:27:39 +09:00
+								              localColorSpaceCache,
 								              localShadingPatternCache,
 								            });
 								            args = [patternId];
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            fn = OPS.shadingFill;
 								            break;
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								          case OPS.setGState:
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
+								            isValidName = args[0] instanceof Name;
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								            name = args[0].name;
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
 								            if (isValidName) {
-												Add local caching of "simple" Graphics State (ExtGState) data in `PartialEvaluator.getOperatorList` (issue 2813)

This patch will help pathological cases the most, with issue 2813 being a particularily problematic example. While there's only *four* `/ExtGState` resources, there's a total `29062` of `setGState` operators. Even though parsing of a single `/ExtGState` resource is quite fast, having to re-parse them thousands of times does add up quite significantly.

For simplicity we'll only cache "simple" `/ExtGState` resource, since e.g. the general `SMask` case cannot be easily cached (without re-factoring other code, which may have undesirable effects on general parsing).

By caching "simple" `/ExtGState` resource, we thus improve performance by:
 - Not having to fetch/validate/parse the same `/ExtGState` data over and over.
 - Handling of repeated `setGState` operators becomes *synchronous* during the `OperatorList` building, instead of having to defer to the event-loop/microtask-queue since the `/ExtGState` parsing is done asynchronously.

---

Obviously I had intended to include (standard) benchmark results with this patch, but for reasons I don't understand the test run-time (even with `master`) of the document in issue 2813 is *a lot* slower than in the development viewer (making normal benchmarking infeasible).
However, testing this manually in the development viewer (using `pdfBug=Stats`) shows a *reduction* of `~10 %` in the rendering time of the PDF document in issue 2813.

											
										
										
											2020-07-11 20:52:11 +09:00
+								              const localGStateObj = localGStateCache.getByName(name);
 								              if (localGStateObj) {
 								                if (localGStateObj.length > 0) {
 								                  operatorList.addOp(OPS.setGState, [localGStateObj]);
 								                }
 								                args = null;
 								                continue;
 								              }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            }
 								            next(
-												Add local caching of "simple" Graphics State (ExtGState) data in `PartialEvaluator.getOperatorList` (issue 2813)

This patch will help pathological cases the most, with issue 2813 being a particularily problematic example. While there's only *four* `/ExtGState` resources, there's a total `29062` of `setGState` operators. Even though parsing of a single `/ExtGState` resource is quite fast, having to re-parse them thousands of times does add up quite significantly.

For simplicity we'll only cache "simple" `/ExtGState` resource, since e.g. the general `SMask` case cannot be easily cached (without re-factoring other code, which may have undesirable effects on general parsing).

By caching "simple" `/ExtGState` resource, we thus improve performance by:
 - Not having to fetch/validate/parse the same `/ExtGState` data over and over.
 - Handling of repeated `setGState` operators becomes *synchronous* during the `OperatorList` building, instead of having to defer to the event-loop/microtask-queue since the `/ExtGState` parsing is done asynchronously.

---

Obviously I had intended to include (standard) benchmark results with this patch, but for reasons I don't understand the test run-time (even with `master`) of the document in issue 2813 is *a lot* slower than in the development viewer (making normal benchmarking infeasible).
However, testing this manually in the development viewer (using `pdfBug=Stats`) shows a *reduction* of `~10 %` in the rendering time of the PDF document in issue 2813.

											
										
										
											2020-07-11 20:52:11 +09:00
+								              new Promise(function (resolveGState, rejectGState) {
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
+								                if (!isValidName) {
-												Add local caching of "simple" Graphics State (ExtGState) data in `PartialEvaluator.getOperatorList` (issue 2813)

This patch will help pathological cases the most, with issue 2813 being a particularily problematic example. While there's only *four* `/ExtGState` resources, there's a total `29062` of `setGState` operators. Even though parsing of a single `/ExtGState` resource is quite fast, having to re-parse them thousands of times does add up quite significantly.

For simplicity we'll only cache "simple" `/ExtGState` resource, since e.g. the general `SMask` case cannot be easily cached (without re-factoring other code, which may have undesirable effects on general parsing).

By caching "simple" `/ExtGState` resource, we thus improve performance by:
 - Not having to fetch/validate/parse the same `/ExtGState` data over and over.
 - Handling of repeated `setGState` operators becomes *synchronous* during the `OperatorList` building, instead of having to defer to the event-loop/microtask-queue since the `/ExtGState` parsing is done asynchronously.

---

Obviously I had intended to include (standard) benchmark results with this patch, but for reasons I don't understand the test run-time (even with `master`) of the document in issue 2813 is *a lot* slower than in the development viewer (making normal benchmarking infeasible).
However, testing this manually in the development viewer (using `pdfBug=Stats`) shows a *reduction* of `~10 %` in the rendering time of the PDF document in issue 2813.

											
										
										
											2020-07-11 20:52:11 +09:00
+								                  throw new FormatError("GState must be referred to by name.");
 								                }
 								                const extGState = resources.get("ExtGState");
 								                if (!(extGState instanceof Dict)) {
 								                  throw new FormatError("ExtGState should be a dictionary.");
 								                }
 								                const gState = extGState.get(name);
 								                // TODO: Attempt to lookup cached GStates by reference as well,
 								                //       if and only if there are PDF documents where doing so
 								                //       would significantly improve performance.
 								                if (!(gState instanceof Dict)) {
 								                  throw new FormatError("GState should be a dictionary.");
 								                }
 								                self
 								                  .setGState({
 								                    resources,
 								                    gState,
 								                    operatorList,
 								                    cacheKey: name,
 								                    task,
 								                    stateManager,
 								                    localGStateCache,
 								                    localColorSpaceCache,
 								                  })
 								                  .then(resolveGState, rejectGState);
 								              }).catch(function (reason) {
 								                if (reason instanceof AbortException) {
 								                  return;
 								                }
 								                if (self.options.ignoreErrors) {
 								                  // Error(s) in the ExtGState -- sending unsupported feature
 								                  // notification and allow parsing/rendering to continue.
 								                  self.handler.send("UnsupportedFeature", {
 								                    featureId: UNSUPPORTED_FEATURES.errorExtGState,
 								                  });
 								                  warn(`getOperatorList - ignoring ExtGState: "${reason}".`);
 								                  return;
 								                }
 								                throw reason;
 								              })
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            );
 								            return;
 								          case OPS.moveTo:
 								          case OPS.lineTo:
 								          case OPS.curveTo:
 								          case OPS.curveTo2:
 								          case OPS.curveTo3:
 								          case OPS.closePath:
 								          case OPS.rectangle:
 								            self.buildPath(operatorList, fn, args, parsingText);
 								            continue;
 								          case OPS.markPoint:
 								          case OPS.markPointProps:
 								          case OPS.beginCompat:
 								          case OPS.endCompat:
 								            // Ignore operators where the corresponding handlers are known to
 								            // be no-op in CanvasGraphics (display/canvas.js). This prevents
 								            // serialization errors and is also a bit more efficient.
 								            // We could also try to serialize all objects in a general way,
 								            // e.g. as done in https://github.com/mozilla/pdf.js/pull/6266,
 								            // but doing so is meaningless without knowing the semantics.
 								            continue;
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								          case OPS.beginMarkedContentProps:
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								            if (!(args[0] instanceof Name)) {
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
+								              warn(`Expected name for beginMarkedContentProps arg0=${args[0]}`);
 								              continue;
 								            }
 								            if (args[0].name === "OC") {
 								              next(
 								                self
 								                  .parseMarkedContentProps(args[1], resources)
 								                  .then(data => {
 								                    operatorList.addOp(OPS.beginMarkedContentProps, [
 								                      "OC",
 								                      data,
 								                    ]);
 								                  })
 								                  .catch(reason => {
 								                    if (reason instanceof AbortException) {
 								                      return;
 								                    }
 								                    if (self.options.ignoreErrors) {
 								                      self.handler.send("UnsupportedFeature", {
 								                        featureId: UNSUPPORTED_FEATURES.errorMarkedContent,
 								                      });
 								                      warn(
 								                        `getOperatorList - ignoring beginMarkedContentProps: "${reason}".`
 								                      );
 								                      return;
 								                    }
 								                    throw reason;
 								                  })
 								              );
 								              return;
 								            }
 								            // Other marked content types aren't supported yet.
-												Add support for basic structure tree for accessibility.

When a PDF is "marked" we now generate a separate DOM that represents
the structure tree from the PDF.  This DOM is inserted into the <canvas>
element and allows screen readers to walk the tree and have more
information about headings, images, links, etc. To link the structure
tree DOM (which is empty) to the text layer aria-owns is used. This
required modifying the text layer creation so that marked items are
now tracked.

											
										
										
											2021-04-01 07:07:02 +09:00
+								            args = [
 								              args[0].name,
 								              args[1] instanceof Dict ? args[1].get("MCID") : null,
 								            ];
-												Add support for optional marked content.

Add a new method to the API to get the optional content configuration. Add
a new render task param that accepts the above configuration.
For now, the optional content is not controllable by the user in
the viewer, but renders with the default configuration in the PDF.

All of the test files added exhibit different uses of optional content.

Fixes #269.

Fix test to work with optional content.

- Change the stopAtErrors test to ensure the operator list has something,
  instead of asserting the exact number of operators.

											
										
										
											2020-07-15 07:17:27 +09:00
 								            break;
 								          case OPS.beginMarkedContent:
 								          case OPS.endMarkedContent:
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          default:
 								            // Note: Ignore the operator if it has `Dict` arguments, since
 								            // those are non-serializable, otherwise postMessage will throw
 								            // "An object could not be cloned.".
 								            if (args !== null) {
 								              for (i = 0, ii = args.length; i < ii; i++) {
 								                if (args[i] instanceof Dict) {
 								                  break;
-												Remove `getAll` from `EvaluatorPreprocessor_read`

For the operators that we currently support, the arguments are not `Dict`s, which means that it's not really necessary to use `Dict_getAll` in `EvaluatorPreprocessor_read`.
Also, I do think that if/when we support operators that use `Dict`s as arguments, that should be dealt with in the corresponding `case` in `PartialEvaluator_getOperatorList` which handles the operator.

The only reason that I can find for using `Dict_getAll` like that, is that prior to PR 6550 we would just append certain (currently unsupported) operators without doing any further processing/checking. But as issue 6549 showed, that can lead to issues in practice, which is why it was changed.

In an effort to prevent possible issue with unsupported operators, this patch simply ignores operators with `Dict` arguments in `PartialEvaluator_getOperatorList`.

											
										
										
											2016-02-13 02:15:49 +09:00
+								                }
 								              }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              if (i < ii) {
 								                warn("getOperatorList - ignoring operator: " + fn);
 								                continue;
 								              }
 								            }
-												[api-minor] Always allow e.g. rendering to continue even if there are errors, and add a `stopAtErrors` parameter to `getDocument` to opt-out of this behaviour (issue 6342, issue 3795, bug 1130815)

Other PDF readers, e.g. Adobe Reader and PDFium (in Chrome), will attempt to render as much of a page as possible even if there are errors present.
Currently we just bail as soon the first error is hit, which means that we'll usually not render anything in these cases and just display a blank page instead.

NOTE: This patch changes the default behaviour of the PDF.js API to always attempt to recover as much data as possible, even when encountering errors during e.g. `getOperatorList`/`getTextContent`, which thus improve our handling of corrupt PDF files and allow the default viewer to handle errors slightly more gracefully.
In the event that an API consumer wishes to use the old behaviour, where we stop parsing as soon as an error is encountered, the `stopAtErrors` parameter can be set at `getDocument`.

Fixes, inasmuch it's possible since the PDF files are corrupt, e.g. issue 6342, issue 3795, and [bug 1130815](https://bugzilla.mozilla.org/show_bug.cgi?id=1130815) (and probably others too).

											
										
										
											2017-02-19 22:03:08 +09:00
+								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        operatorList.addOp(fn, args);
 								      }
 								      if (stop) {
 								        next(deferred);
 								        return;
 								      }
 								      // Some PDFs don't close all restores inside object/form.
 								      // Closing those for them.
 								      closePendingRestoreOPS();
 								      resolve();
 								    }).catch(reason => {
 								      if (reason instanceof AbortException) {
 								        return;
 								      }
 								      if (this.options.ignoreErrors) {
 								        // Error(s) in the OperatorList -- sending unsupported feature
 								        // notification and allow rendering to continue.
 								        this.handler.send("UnsupportedFeature", {
 								          featureId: UNSUPPORTED_FEATURES.errorOperatorList,
 								        });
 								        warn(
 								          `getOperatorList - ignoring errors during "${task.name}" ` +
 								            `task: "${reason}".`
 								        );
-												Text char codes extraction

											
										
										
											2011-12-11 08:24:54 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        closePendingRestoreOPS();
 								        return;
 								      }
 								      throw reason;
 								    });
 								  }
-												Incrementally render by sending the operator list by chunks as they're ready.

											
										
										
											2013-08-01 03:17:36 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  getTextContent({
 								    stream,
 								    task,
 								    resources,
 								    stateManager = null,
 								    combineTextItems = false,
-												Add support for basic structure tree for accessibility.

When a PDF is "marked" we now generate a separate DOM that represents
the structure tree from the PDF.  This DOM is inserted into the <canvas>
element and allows screen readers to walk the tree and have more
information about headings, images, links, etc. To link the structure
tree DOM (which is empty) to the text layer aria-owns is used. This
required modifying the text layer creation so that marked items are
now tracked.

											
										
										
											2021-04-01 07:07:02 +09:00
+								    includeMarkedContent = false,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    sink,
-												Change the `seenStyles` object, in `PartialEvaluator.getTextContent`, to a Set

Given that what we actually want is only to keep track of the loadedFont-names, rather than storing any actual data, using an object isn't really necessary here. Furthermore, in the current code, we're also using `in` when checking if the data exists, which is generally less efficient than just checking for the value directly.

											
										
										
											2021-04-02 01:08:20 +09:00
+								    seenStyles = new Set(),
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								    viewBox,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  }) {
 								    // Ensure that `resources`/`stateManager` is correctly initialized,
 								    // even if the provided parameter is e.g. `null`.
 								    resources = resources || Dict.empty;
 								    stateManager = stateManager || new StateManager(new TextState());
-												Text char codes extraction

											
										
										
											2011-12-11 08:24:54 +09:00
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								    const NormalizedUnicodes = getNormalizedUnicodes();
-												Make getOperatorList() calls independent and merge queues at end

											
										
										
											2013-04-09 07:14:56 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const textContent = {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      items: [],
 								      styles: Object.create(null),
 								    };
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const textContentItem = {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      initialized: false,
 								      str: [],
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      totalWidth: 0,
 								      totalHeight: 0,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      width: 0,
 								      height: 0,
 								      vertical: false,
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      prevTransform: null,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      textAdvanceScale: 0,
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      spaceInFlowMin: 0,
 								      spaceInFlowMax: 0,
 								      trackingSpaceMin: Infinity,
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								      negativeSpaceMax: -Infinity,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      transform: null,
 								      fontName: null,
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      hasEOL: false,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    };
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
 								    // Used in addFakeSpaces.
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								    // A white <= fontSize * TRACKING_SPACE_FACTOR is a tracking space
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								    // so it doesn't count as a space.
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								    const TRACKING_SPACE_FACTOR = 0.1;
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								    // A negative white < fontSize * NEGATIVE_SPACE_FACTOR induces
 								    // a break (a new chunk of text is created).
 								    // It doesn't change anything when the text is copied but
 								    // it improves potential mismatch between text layer and canvas.
 								    const NEGATIVE_SPACE_FACTOR = -0.2;
 								    // A white with a width in [fontSize * MIN_FACTOR; fontSize * MAX_FACTOR]
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								    // is a space which will be inserted in the current flow of words.
 								    // If the width is outside of this range then the flow is broken
 								    // (which means a new span in the text layer).
 								    // It's useful to adjust the best as possible the span in the layer
 								    // to what is displayed in the canvas.
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								    const SPACE_IN_FLOW_MIN_FACTOR = 0.1;
 								    const SPACE_IN_FLOW_MAX_FACTOR = 0.6;
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const self = this;
 								    const xref = this.xref;
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								    const showSpacedTextBuffer = [];
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    let xobjs = null;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    const emptyXObjectCache = new LocalImageCache();
-												Add local caching of non-font Graphics State (ExtGState) data in `PartialEvaluator.getTextContent`

It turns out that `getTextContent` suffers from *similar* problems with repeated GStates as `getOperatorList`; please see the previous patch.

While only `/ExtGState` resources containing Fonts will actually be *parsed* by `PartialEvaluator.getTextContent`, we're still forced to fetch/validate repeated `/ExtGState` resources even though *most* of them won't affect the textContent (since they mostly contain purely graphical state).

With these changes we also no longer need to immediately reset the current text-state when encountering a `setGState` operator, which may thus improve text-selection in some cases.

											
										
										
											2020-07-11 21:05:53 +09:00
+								    const emptyGStateCache = new LocalGStateCache();
-												Combines standalone divs into text groups.

											
										
										
											2015-11-04 01:12:41 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
-												Combines standalone divs into text groups.

											
										
										
											2015-11-04 01:12:41 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    let textState;
-												Refactors fake space heuristics for speed.

											
										
										
											2015-11-06 23:40:44 +09:00
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								    function getCurrentTextTransform() {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      // 9.4.4 Text Space Details
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      const font = textState.font;
 								      const tsm = [
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        textState.fontSize * textState.textHScale,
 ,
 ,
 								        textState.fontSize,
 ,
 								        textState.textRise,
 								      ];
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (
 								        font.isType3Font &&
-												Improve text-selection for Type3 fonts with empty /FontBBox-entries (issue 6605)

For Type3 fonts where the /CharProcs-streams of the individual glyph starts with a `d1` operator, we can use that to build a fallback bounding box for the font and thus improve text-selection in some cases.

											
										
										
											2021-05-30 01:06:49 +09:00
+								        (textState.fontSize <= 1 || font.isCharBBox) &&
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        !isArrayEqual(textState.fontMatrix, FONT_IDENTITY_MATRIX)
 								      ) {
 								        const glyphHeight = font.bbox[3] - font.bbox[1];
 								        if (glyphHeight > 0) {
 								          tsm[3] *= glyphHeight * textState.fontMatrix[3];
-												[api-minor] Add a parameter to `PDFPageProxy_getTextContent` that enables replacing of all whitespace with standard spaces in the textLayer (issue 6612)

This patch goes a bit further than issue 6612 requires, and replaces all kinds of whitespace with standard spaces.

When testing this locally, it actually seemed to slightly improve two existing test-cases (`tracemonkey-text` and `taro-text`).

Fixes 6612.

											
										
										
											2015-11-24 00:57:43 +09:00
+								        }
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
+								      }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      return Util.transform(
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        textState.ctm,
 								        Util.transform(textState.textMatrix, tsm)
 								      );
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								    }
 								    function ensureTextContentItem() {
 								      if (textContentItem.initialized) {
 								        return textContentItem;
 								      }
 								      const font = textState.font,
 								        loadedName = font.loadedName;
 								      if (!seenStyles.has(loadedName)) {
 								        seenStyles.add(loadedName);
 								        textContent.styles[loadedName] = {
 								          fontFamily: font.fallbackName,
 								          ascent: font.ascent,
 								          descent: font.descent,
 								          vertical: font.vertical,
 								        };
 								      }
 								      textContentItem.fontName = loadedName;
 								      const trm = (textContentItem.transform = getCurrentTextTransform());
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (!font.vertical) {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        textContentItem.width = textContentItem.totalWidth = 0;
 								        textContentItem.height = textContentItem.totalHeight = Math.hypot(
 								          trm[2],
 								          trm[3]
 								        );
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        textContentItem.vertical = false;
 								      } else {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        textContentItem.width = textContentItem.totalWidth = Math.hypot(
 								          trm[0],
 								          trm[1]
 								        );
 								        textContentItem.height = textContentItem.totalHeight = 0;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        textContentItem.vertical = true;
 								      }
-												Use `Math.hypot`, instead of `Math.sqrt` with manual squaring (#12973)

When the PDF.js project started `Math.hypot` didn't exist yet, and until recently we still supported browsers (IE 11) without a native `Math.hypot` implementation; please see this compatibility information: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/hypot#browser_compatibility

Furthermore, somewhat recently there were performance improvements of `Math.hypot` in Firefox; see https://bugzilla.mozilla.org/show_bug.cgi?id=1648820

Finally, this patch also replaces a couple of multiplications with the exponentiation operator.
											
										
										
											2021-02-10 20:28:49 +09:00
+								      const scaleLineX = Math.hypot(
 								        textState.textLineMatrix[0],
 								        textState.textLineMatrix[1]
 								      );
 								      const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
 								      textContentItem.trackingSpaceMin =
 								        textState.fontSize * TRACKING_SPACE_FACTOR;
 								      textContentItem.negativeSpaceMax =
 								        textState.fontSize * NEGATIVE_SPACE_FACTOR;
 								      textContentItem.spaceInFlowMin =
 								        textState.fontSize * SPACE_IN_FLOW_MIN_FACTOR;
 								      textContentItem.spaceInFlowMax =
 								        textState.fontSize * SPACE_IN_FLOW_MAX_FACTOR;
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      textContentItem.hasEOL = false;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      textContentItem.initialized = true;
 								      return textContentItem;
 								    }
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								    function updateAdvanceScale() {
 								      if (!textContentItem.initialized) {
 								        return;
 								      }
 								      const scaleLineX = Math.hypot(
 								        textState.textLineMatrix[0],
 								        textState.textLineMatrix[1]
 								      );
 								      const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
 								      const scaleFactor = scaleCtmX * scaleLineX;
 								      if (scaleFactor === textContentItem.textAdvanceScale) {
 								        return;
 								      }
 								      if (!textContentItem.vertical) {
 								        textContentItem.totalWidth +=
 								          textContentItem.width * textContentItem.textAdvanceScale;
 								        textContentItem.width = 0;
 								      } else {
 								        textContentItem.totalHeight +=
 								          textContentItem.height * textContentItem.textAdvanceScale;
 								        textContentItem.height = 0;
 								      }
 								      textContentItem.textAdvanceScale = scaleFactor;
 								    }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    function runBidiTransform(textChunk) {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      const text = textChunk.str.join("");
 								      const bidiResult = bidi(text, -1, textChunk.vertical);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      return {
-												Make the `normalizeWhitespace` handling, in the `PartialEvaluator`, more efficient (PR 14428 follow-up)

After the changes in PR 14428 we can *directly*, and more efficiently, handle whitespace conversion in `PartialEvaluator.getTextContent` when the `normalizeWhitespace` option is being used.
This way we no longer need a separate helper function for this, and can avoid having to (again) iterate through the text and checking each character. Finally, this also removes the need for using a regular expression on e.g. all non-ASCII text.

											
										
										
											2022-01-16 06:32:10 +09:00
+								        str: bidiResult.str,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        dir: bidiResult.dir,
-												Use positive dimensions for text chunks in the text layer (issue #14415).

											
										
										
											2022-01-05 05:36:25 +09:00
+								        width: Math.abs(textChunk.totalWidth),
 								        height: Math.abs(textChunk.totalHeight),
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        transform: textChunk.transform,
 								        fontName: textChunk.fontName,
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        hasEOL: textChunk.hasEOL,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      };
 								    }
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    function handleSetFont(fontName, fontRef) {
 								      return self
 								        .loadFont(fontName, fontRef, resources)
-												Ensure that we fully load Type3 fonts in `PartialEvaluator.getTextContent`

This is necessary now, since with the previous patch the /FontBBox potentially depends on the contents of the /CharProcs-streams.
Note that if `getOperatorList` is called *before* `getTextContent`, this patch doesn't matter since the font is already fully loaded/parsed. However, for e.g. the `text` test-cases this is necessary to ensure correct reference images.

											
										
										
											2021-05-30 03:01:52 +09:00
+								        .then(function (translated) {
 								          if (!translated.font.isType3Font) {
 								            return translated;
 								          }
 								          return translated
 								            .loadType3Data(self, resources, task)
 								            .catch(function () {
 								              // Ignore Type3-parsing errors, since we only use `loadType3Data`
 								              // here to ensure that we'll always obtain a useful /FontBBox.
 								            })
 								            .then(function () {
 								              return translated;
 								            });
 								        })
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        .then(function (translated) {
 								          textState.font = translated.font;
 								          textState.fontMatrix =
 								            translated.font.fontMatrix || FONT_IDENTITY_MATRIX;
 								        });
 								    }
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
-												Take into account all rotations before comparing glyph positions
 - it aims to fix #14497;
 - previously, only rotations with an angle 0, 90, 180 or 270 were taken into account;
 - so generalize to any angle but keep the fast path for 0, 90, ... because they're likely more common than anything else.

											
										
										
											2022-01-26 23:35:46 +09:00
+								    function applyInverseRotation(x, y, matrix) {
 								      const scale = Math.hypot(matrix[0], matrix[1]);
 								      return [
 								        (matrix[0] * x + matrix[1] * y) / scale,
 								        (matrix[2] * x + matrix[3] * y) / scale,
 								      ];
 								    }
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								    function compareWithLastPosition() {
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								      const currentTransform = getCurrentTextTransform();
 								      let posX = currentTransform[4];
 								      let posY = currentTransform[5];
 								      const shiftedX = posX - viewBox[0];
 								      const shiftedY = posY - viewBox[1];
 								      if (
 								        shiftedX < 0 ||
 								        shiftedX > viewBox[2] ||
 								        shiftedY < 0 ||
 								        shiftedY > viewBox[3]
 								      ) {
 								        return false;
 								      }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      if (
 								        !combineTextItems ||
 								        !textState.font ||
 								        !textContentItem.prevTransform
 								      ) {
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								        return true;
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      }
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								      let lastPosX = textContentItem.prevTransform[4];
 								      let lastPosY = textContentItem.prevTransform[5];
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
 								      if (lastPosX === posX && lastPosY === posY) {
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								        return true;
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      }
-												Take into account all rotations before comparing glyph positions
 - it aims to fix #14497;
 - previously, only rotations with an angle 0, 90, 180 or 270 were taken into account;
 - so generalize to any angle but keep the fast path for 0, 90, ... because they're likely more common than anything else.

											
										
										
											2022-01-26 23:35:46 +09:00
+								      let rotate = -1;
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								      // Take into account the rotation is the current transform.
 								      if (
 								        currentTransform[0] &&
 								        currentTransform[1] === 0 &&
 								        currentTransform[2] === 0
 								      ) {
 								        rotate = currentTransform[0] > 0 ? 0 : 180;
 								      } else if (
 								        currentTransform[1] &&
 								        currentTransform[0] === 0 &&
 								        currentTransform[3] === 0
 								      ) {
-												Take into account all rotations before comparing glyph positions
 - it aims to fix #14497;
 - previously, only rotations with an angle 0, 90, 180 or 270 were taken into account;
 - so generalize to any angle but keep the fast path for 0, 90, ... because they're likely more common than anything else.

											
										
										
											2022-01-26 23:35:46 +09:00
+								        rotate = currentTransform[1] > 0 ? 90 : 270;
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								      }
-												Take into account all rotations before comparing glyph positions
 - it aims to fix #14497;
 - previously, only rotations with an angle 0, 90, 180 or 270 were taken into account;
 - so generalize to any angle but keep the fast path for 0, 90, ... because they're likely more common than anything else.

											
										
										
											2022-01-26 23:35:46 +09:00
+								      switch (rotate) {
 								        case 0:
 								          break;
 								        case 90:
 								          [posX, posY] = [posY, posX];
 								          [lastPosX, lastPosY] = [lastPosY, lastPosX];
 								          break;
 								        case 180:
 								          [posX, posY, lastPosX, lastPosY] = [
 								            -posX,
 								            -posY,
 								            -lastPosX,
 								            -lastPosY,
 								          ];
 								          break;
 								        case 270:
 								          [posX, posY] = [-posY, -posX];
 								          [lastPosX, lastPosY] = [-lastPosY, -lastPosX];
 								          break;
 								        default:
 								          // This is not a 0, 90, 180, 270 rotation so:
 								          //  - remove the scale factor from the matrix to get a rotation matrix
 								          //  - apply the inverse (which is the transposed) to the positions
 								          // and we can then compare positions of the glyphes to detect
 								          // a whitespace.
 								          [posX, posY] = applyInverseRotation(posX, posY, currentTransform);
 								          [lastPosX, lastPosY] = applyInverseRotation(
 								            lastPosX,
 								            lastPosY,
 								            textContentItem.prevTransform
 								          );
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								      }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
 								      if (textState.font.vertical) {
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								        const advanceY = (lastPosY - posY) / textContentItem.textAdvanceScale;
 								        const advanceX = posX - lastPosX;
-												Use positive dimensions for text chunks in the text layer (issue #14415).

											
										
										
											2022-01-05 05:36:25 +09:00
 								        // When the total height of the current chunk is negative
 								        // then we're writing from bottom to top.
 								        const textOrientation = Math.sign(textContentItem.height);
 								        if (advanceY < textOrientation * textContentItem.negativeSpaceMax) {
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								          if (
 								            Math.abs(advanceX) >
 .5 * textContentItem.width /* not the same column */
 								          ) {
 								            appendEOL();
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								            return true;
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								          }
 								          flushTextContentItem();
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								          return true;
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        }
-												Use the correct dimension to know if we have to add an EOL in vertical mode

											
										
										
											2022-01-07 22:02:28 +09:00
+								        if (Math.abs(advanceX) > textContentItem.width) {
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								          appendEOL();
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								          return true;
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        }
-												Use positive dimensions for text chunks in the text layer (issue #14415).

											
										
										
											2022-01-05 05:36:25 +09:00
+								        if (advanceY <= textOrientation * textContentItem.trackingSpaceMin) {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								          textContentItem.height += advanceY;
-												Use positive dimensions for text chunks in the text layer (issue #14415).

											
										
										
											2022-01-05 05:36:25 +09:00
+								        } else if (
 								          !addFakeSpaces(
 								            advanceY,
 								            textContentItem.prevTransform,
 								            textOrientation
 								          )
 								        ) {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								          if (textContentItem.str.length === 0) {
 								            textContent.items.push({
 								              str: " ",
 								              dir: "ltr",
 								              width: 0,
-												Use positive dimensions for text chunks in the text layer (issue #14415).

											
										
										
											2022-01-05 05:36:25 +09:00
+								              height: Math.abs(advanceY),
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								              transform: textContentItem.prevTransform,
 								              fontName: textContentItem.fontName,
 								              hasEOL: false,
 								            });
 								          } else {
 								            textContentItem.height += advanceY;
 								          }
 								        }
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								        return true;
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      }
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								      const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
 								      const advanceY = posY - lastPosY;
-												Use positive dimensions for text chunks in the text layer (issue #14415).

											
										
										
											2022-01-05 05:36:25 +09:00
 								      // When the total width of the current chunk is negative
 								      // then we're writing from right to left.
 								      const textOrientation = Math.sign(textContentItem.width);
 								      if (advanceX < textOrientation * textContentItem.negativeSpaceMax) {
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								        if (
 								          Math.abs(advanceY) >
 .5 * textContentItem.height /* not the same line */
 								        ) {
 								          appendEOL();
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								          return true;
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								        }
 								        flushTextContentItem();
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								        return true;
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      }
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								      if (Math.abs(advanceY) > textContentItem.height) {
 								        appendEOL();
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								        return true;
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      }
-												Use positive dimensions for text chunks in the text layer (issue #14415).

											
										
										
											2022-01-05 05:36:25 +09:00
+								      if (advanceX <= textOrientation * textContentItem.trackingSpaceMin) {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        textContentItem.width += advanceX;
-												Use positive dimensions for text chunks in the text layer (issue #14415).

											
										
										
											2022-01-05 05:36:25 +09:00
+								      } else if (
 								        !addFakeSpaces(advanceX, textContentItem.prevTransform, textOrientation)
 								      ) {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        if (textContentItem.str.length === 0) {
 								          textContent.items.push({
 								            str: " ",
 								            dir: "ltr",
-												Use positive dimensions for text chunks in the text layer (issue #14415).

											
										
										
											2022-01-05 05:36:25 +09:00
+								            width: Math.abs(advanceX),
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								            height: 0,
 								            transform: textContentItem.prevTransform,
 								            fontName: textContentItem.fontName,
 								            hasEOL: false,
 								          });
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
+								        } else {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								          textContentItem.width += advanceX;
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
+								        }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      }
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
 								      return true;
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								    }
-												Refactors fake space heuristics for speed.

											
										
										
											2015-11-06 23:40:44 +09:00
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								    function buildTextContentItem({ chars, extraSpacing }) {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      const font = textState.font;
 								      if (!chars) {
 								        // Just move according to the space we have.
 								        const charSpacing = textState.charSpacing + extraSpacing;
 								        if (charSpacing) {
 								          if (!font.vertical) {
 								            textState.translateTextMatrix(
 								              charSpacing * textState.textHScale,
 
 								            );
 								          } else {
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								            textState.translateTextMatrix(0, -charSpacing);
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								          }
-												Better spacing in text layer.

											
										
										
											2015-11-02 23:54:15 +09:00
+								        }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        return;
 								      }
 								      const glyphs = font.charsToGlyphs(chars);
 								      const scale = textState.fontMatrix[0] * textState.fontSize;
-												Handle all the whitespaces the same way when creating text chunks

											
										
										
											2022-01-08 05:20:53 +09:00
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      for (let i = 0, ii = glyphs.length; i < ii; i++) {
 								        const glyph = glyphs[i];
-												Remove the invisible format marks from the text chunks
 - it aims to fix issue #9186.

											
										
										
											2022-01-24 07:04:18 +09:00
+								        if (glyph.isInvisibleFormatMark) {
 								          continue;
 								        }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        let charSpacing =
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								          textState.charSpacing + (i + 1 === ii ? extraSpacing : 0);
 								        let glyphWidth = glyph.width;
 								        if (font.vertical) {
 								          glyphWidth = glyph.vmetric ? glyph.vmetric[0] : -glyphWidth;
 								        }
 								        let scaledDim = glyphWidth * scale;
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								        if (
-												Handle all the whitespaces the same way when creating text chunks

											
										
										
											2022-01-08 05:20:53 +09:00
+								          glyph.isWhitespace &&
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								          (i === 0 ||
 								            i + 1 === ii ||
-												Handle all the whitespaces the same way when creating text chunks

											
										
										
											2022-01-08 05:20:53 +09:00
+								            glyphs[i - 1].isWhitespace ||
 								            glyphs[i + 1].isWhitespace ||
-												Don't consider space as real space when there is an extra spacing (bug 931481)
 - it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=931481;
 - real space chars are pushed in the chunk but when there is an extra spacing, the next char position must be compared with the previous one;
 - for example, an extra spacing can cancel a space so visually there are no space.

											
										
										
											2021-11-13 02:04:17 +09:00
+								            extraSpacing)
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								        ) {
 								          // Don't push a " " in the textContentItem
 								          // (except when it's between two non-spaces chars),
 								          // it will be done (if required) in next call to
 								          // compareWithLastPosition.
 								          // This way we can merge real spaces and spaces due to cursor moves.
 								          if (!font.vertical) {
 								            charSpacing += scaledDim + textState.wordSpacing;
 								            textState.translateTextMatrix(
 								              charSpacing * textState.textHScale,
 
 								            );
 								          } else {
 								            charSpacing += -scaledDim + textState.wordSpacing;
 								            textState.translateTextMatrix(0, -charSpacing);
 								          }
 								          continue;
-												Combines standalone divs into text groups.

											
										
										
											2015-11-04 01:12:41 +09:00
+								        }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								        if (!compareWithLastPosition()) {
 								          // The glyph is not in page so just skip it.
 								          continue;
 								        }
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
 								        // Must be called after compareWithLastPosition because
 								        // the textContentItem could have been flushed.
 								        const textChunk = ensureTextContentItem();
-												Remove the invisible format marks from the text chunks
 - it aims to fix issue #9186.

											
										
										
											2022-01-24 07:04:18 +09:00
+								        if (glyph.isZeroWidthDiacritic) {
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								          scaledDim = 0;
 								        }
-												Consitently apply textAdvanceScale during building of textContentItems for improved highlighting. Fixes #7878.

											
										
										
											2016-12-07 07:07:16 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        if (!font.vertical) {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								          scaledDim *= textState.textHScale;
 								          textState.translateTextMatrix(scaledDim, 0);
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								          textChunk.width += scaledDim;
-												Do the final text scaling correctly in `flushTextContentItem` (issue 8276)

It's necessary to take into account whether or not the text is vertical, to avoid either the textContent `width` or `height` becoming incorrect.

											
										
										
											2019-01-29 22:24:48 +09:00
+								        } else {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								          textState.translateTextMatrix(0, scaledDim);
 								          scaledDim = Math.abs(scaledDim);
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								          textChunk.height += scaledDim;
-												Do the final text scaling correctly in `flushTextContentItem` (issue 8276)

It's necessary to take into account whether or not the text is vertical, to avoid either the textContent `width` or `height` becoming incorrect.

											
										
										
											2019-01-29 22:24:48 +09:00
+								        }
-												Combines standalone divs into text groups.

											
										
										
											2015-11-04 01:12:41 +09:00
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								        if (scaledDim) {
 								          // Save the position of the last visible character.
 								          textChunk.prevTransform = getCurrentTextTransform();
 								        }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
-												[api-minor] Remove the `normalizeWhitespace` option in the `PDFPageProxy.{getTextContent, streamTextContent}` methods (issue 14519, PR 14428 follow-up)

With these changes, we'll now *always* replace all whitespaces with standard spaces (0x20). This behaviour is already, since many years, the default in both the viewer and the browser-tests.

											
										
										
											2022-02-01 01:48:35 +09:00
+								        if (glyph.isWhitespace) {
-												Make the `normalizeWhitespace` handling, in the `PartialEvaluator`, more efficient (PR 14428 follow-up)

After the changes in PR 14428 we can *directly*, and more efficiently, handle whitespace conversion in `PartialEvaluator.getTextContent` when the `normalizeWhitespace` option is being used.
This way we no longer need a separate helper function for this, and can avoid having to (again) iterate through the text and checking each character. Finally, this also removes the need for using a regular expression on e.g. all non-ASCII text.

											
										
										
											2022-01-16 06:32:10 +09:00
+								          // Replaces all whitespaces with standard spaces (0x20), to avoid
 								          // alignment issues between the textLayer and the canvas if the text
 								          // contains e.g. tabs (fixes issue6612.pdf).
 								          textChunk.str.push(" ");
 								        } else {
 								          let glyphUnicode = glyph.unicode;
 								          glyphUnicode = NormalizedUnicodes[glyphUnicode] || glyphUnicode;
 								          glyphUnicode = reverseIfRtl(glyphUnicode);
 								          textChunk.str.push(glyphUnicode);
 								        }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								        if (charSpacing) {
 								          if (!font.vertical) {
 								            textState.translateTextMatrix(
 								              charSpacing * textState.textHScale,
 
 								            );
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								          } else {
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								            textState.translateTextMatrix(0, -charSpacing);
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								          }
 								        }
-												Adds Streams API in getTextContent to stream data.

This patch adds Streams API support in getTextContent
so that we can stream data in chunks instead of fetching
whole data from worker thread to main thread. This patch
supports Streams API without changing the core functionality
of getTextContent.

Enqueue textContent directly at getTextContent in partialEvaluator.

Adds desiredSize and ready property in streamSink.

											
										
										
											2017-04-17 21:46:53 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Terminate getOperationList and getTextContent every 20 ms

											
										
										
											2014-05-10 10:41:03 +09:00
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								    function appendEOL() {
 								      if (textContentItem.initialized) {
 								        textContentItem.hasEOL = true;
 								        flushTextContentItem();
 								      } else {
 								        textContent.items.push({
 								          str: "",
 								          dir: "ltr",
 								          width: 0,
 								          height: 0,
 								          transform: getCurrentTextTransform(),
 								          fontName: textState.font.loadedName,
 								          hasEOL: true,
 								        });
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								    }
-												Use positive dimensions for text chunks in the text layer (issue #14415).

											
										
										
											2022-01-05 05:36:25 +09:00
+								    function addFakeSpaces(width, transf, textOrientation) {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      if (
-												Use positive dimensions for text chunks in the text layer (issue #14415).

											
										
										
											2022-01-05 05:36:25 +09:00
+								        textOrientation * textContentItem.spaceInFlowMin <= width &&
 								        width <= textOrientation * textContentItem.spaceInFlowMax
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      ) {
 								        if (textContentItem.initialized) {
 								          textContentItem.str.push(" ");
 								        }
 								        return false;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
 								      const fontName = textContentItem.fontName;
 								      let height = 0;
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								      if (textContentItem.vertical) {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        height = width;
 								        width = 0;
 								      }
 								      flushTextContentItem();
 								      textContent.items.push({
 								        str: " ",
 								        // TODO: check if using the orientation from last chunk is
 								        // better or not.
 								        dir: "ltr",
-												Use positive dimensions for text chunks in the text layer (issue #14415).

											
										
										
											2022-01-05 05:36:25 +09:00
+								        width: Math.abs(width),
 								        height: Math.abs(height),
-												Fix issues in text selection
  - PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues.
  - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn;
    - no space are "drawn": it just moves the cursor but they aren't added in the chunk;
    - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one.
  - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font)
    - it was a pretty good idea in general but it fails with some fonts where space was too big:
    - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).

											
										
										
											2021-05-24 02:03:53 +09:00
+								        transform: transf || getCurrentTextTransform(),
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        fontName,
 								        hasEOL: false,
 								      });
 								      return true;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Attempt to ignore multiple identical Tf (setFont) commands in `PartialEvaluator_getTextContent` (issue 5808)

This patch improves the performance of issue 5808, but I'm not sure if it's enough to call it fixed. On average, this patch reduces the number of textLayer div's by a factor of 3, and it also reduces the time spend in `getTextContent` by a factor of ~2.

The PDF file is generated by `Scribus PDF`, which for reasons I cannot understand is placing redundant `Tf` commands before *every* showText command.
Note how the PDF file also contains lots of (basically) identical fonts, but with slightly different names, which causes unnecessary font-switching. This causes some unnecessary breaking of textLayer div's, but this issue cannot be easily worked around.

											
										
										
											2016-06-01 06:01:35 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    function flushTextContentItem() {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      if (!textContentItem.initialized || !textContentItem.str) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        return;
 								      }
 								      // Do final text scaling.
 								      if (!textContentItem.vertical) {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        textContentItem.totalWidth +=
 								          textContentItem.width * textContentItem.textAdvanceScale;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      } else {
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								        textContentItem.totalHeight +=
 								          textContentItem.height * textContentItem.textAdvanceScale;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								      textContent.items.push(runBidiTransform(textContentItem));
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      textContentItem.initialized = false;
 								      textContentItem.str.length = 0;
 								    }
-												[api-minor] Reduce `postMessage` overhead, in `PartialEvaluator.getTextContent`, by sending text chunks in batches (issue 13962)

Following the STR in the issue, this patch reduces the number of `PartialEvaluator.getTextContent`-related `postMessage`-calls by approximately 78 percent.[1]
Note that by enforcing a relatively low value when batching text chunks, we should thus improve worst-case scenarios while not negatively affect all `textLayer` building.

While working on these changes I noticed, thanks to our unit-tests, that the implementation of the `appendEOL` function unfortunately means that the number and content of the textItems could actually be affected by the particular chunking used.
That seems *extremely* unfortunate, since in practice this means that the particular chunking used is thus observable through the API. Obviously that should be a completely internal implementation detail, which is why this patch also modifies `appendEOL` to mitigate that.[2]

Given that this patch adds a *minimum* batch size in `enqueueChunk`, there's obviously nothing preventing it from becoming a lot larger then the limit (depending e.g. on the PDF structure and the CPU load/speed).
While sending more text chunks at once isn't an issue in itself, it could become problematic at the main-thread during `textLayer` building. Note how both the `PartialEvaluator` and `CanvasGraphics` implementations utilize `Date.now()`-checks, to prevent long-running parsing/rendering from "hanging" the respective thread. In the `textLayer` building we don't utilize such a construction[3], and streaming of textContent is thus essentially acting as a *simple* stand-in for that functionality.
Hence why we want to avoid choosing a too large minimum batch size, since that could thus indirectly affect main-thread performance negatively.

---
[1] While it'd be possible to go even lower, that'd likely require more invasive re-factoring/changes to the `PartialEvaluator.getTextContent`-code to ensure that the batches don't become too large.

[2] This should also, as far as I can tell, explain some of the regressions observed in the "enhance" text-selection tests back in PR 13257.
    Looking closer at the `appendEOL` function it should potentially be changed even more, however that should probably not be done here.

[3] I'd really like to avoid implementing something like that for the `textLayer` building as well, given that it'd require adding a fair bit of complexity.

											
										
										
											2021-09-03 20:07:04 +09:00
+								    function enqueueChunk(batch = false) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      const length = textContent.items.length;
-												[api-minor] Reduce `postMessage` overhead, in `PartialEvaluator.getTextContent`, by sending text chunks in batches (issue 13962)

Following the STR in the issue, this patch reduces the number of `PartialEvaluator.getTextContent`-related `postMessage`-calls by approximately 78 percent.[1]
Note that by enforcing a relatively low value when batching text chunks, we should thus improve worst-case scenarios while not negatively affect all `textLayer` building.

While working on these changes I noticed, thanks to our unit-tests, that the implementation of the `appendEOL` function unfortunately means that the number and content of the textItems could actually be affected by the particular chunking used.
That seems *extremely* unfortunate, since in practice this means that the particular chunking used is thus observable through the API. Obviously that should be a completely internal implementation detail, which is why this patch also modifies `appendEOL` to mitigate that.[2]

Given that this patch adds a *minimum* batch size in `enqueueChunk`, there's obviously nothing preventing it from becoming a lot larger then the limit (depending e.g. on the PDF structure and the CPU load/speed).
While sending more text chunks at once isn't an issue in itself, it could become problematic at the main-thread during `textLayer` building. Note how both the `PartialEvaluator` and `CanvasGraphics` implementations utilize `Date.now()`-checks, to prevent long-running parsing/rendering from "hanging" the respective thread. In the `textLayer` building we don't utilize such a construction[3], and streaming of textContent is thus essentially acting as a *simple* stand-in for that functionality.
Hence why we want to avoid choosing a too large minimum batch size, since that could thus indirectly affect main-thread performance negatively.

---
[1] While it'd be possible to go even lower, that'd likely require more invasive re-factoring/changes to the `PartialEvaluator.getTextContent`-code to ensure that the batches don't become too large.

[2] This should also, as far as I can tell, explain some of the regressions observed in the "enhance" text-selection tests back in PR 13257.
    Looking closer at the `appendEOL` function it should potentially be changed even more, however that should probably not be done here.

[3] I'd really like to avoid implementing something like that for the `textLayer` building as well, given that it'd require adding a fair bit of complexity.

											
										
										
											2021-09-03 20:07:04 +09:00
+								      if (length === 0) {
 								        return;
 								      }
 								      if (batch && length < TEXT_CHUNK_BATCH_SIZE) {
 								        return;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      }
-												[api-minor] Reduce `postMessage` overhead, in `PartialEvaluator.getTextContent`, by sending text chunks in batches (issue 13962)

Following the STR in the issue, this patch reduces the number of `PartialEvaluator.getTextContent`-related `postMessage`-calls by approximately 78 percent.[1]
Note that by enforcing a relatively low value when batching text chunks, we should thus improve worst-case scenarios while not negatively affect all `textLayer` building.

While working on these changes I noticed, thanks to our unit-tests, that the implementation of the `appendEOL` function unfortunately means that the number and content of the textItems could actually be affected by the particular chunking used.
That seems *extremely* unfortunate, since in practice this means that the particular chunking used is thus observable through the API. Obviously that should be a completely internal implementation detail, which is why this patch also modifies `appendEOL` to mitigate that.[2]

Given that this patch adds a *minimum* batch size in `enqueueChunk`, there's obviously nothing preventing it from becoming a lot larger then the limit (depending e.g. on the PDF structure and the CPU load/speed).
While sending more text chunks at once isn't an issue in itself, it could become problematic at the main-thread during `textLayer` building. Note how both the `PartialEvaluator` and `CanvasGraphics` implementations utilize `Date.now()`-checks, to prevent long-running parsing/rendering from "hanging" the respective thread. In the `textLayer` building we don't utilize such a construction[3], and streaming of textContent is thus essentially acting as a *simple* stand-in for that functionality.
Hence why we want to avoid choosing a too large minimum batch size, since that could thus indirectly affect main-thread performance negatively.

---
[1] While it'd be possible to go even lower, that'd likely require more invasive re-factoring/changes to the `PartialEvaluator.getTextContent`-code to ensure that the batches don't become too large.

[2] This should also, as far as I can tell, explain some of the regressions observed in the "enhance" text-selection tests back in PR 13257.
    Looking closer at the `appendEOL` function it should potentially be changed even more, however that should probably not be done here.

[3] I'd really like to avoid implementing something like that for the `textLayer` building as well, given that it'd require adding a fair bit of complexity.

											
										
										
											2021-09-03 20:07:04 +09:00
+								      sink.enqueue(textContent, length);
 								      textContent.items = [];
 								      textContent.styles = Object.create(null);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Combines standalone divs into text groups.

											
										
										
											2015-11-04 01:12:41 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const timeSlotManager = new TimeSlotManager();
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
 								    return new Promise(function promiseBody(resolve, reject) {
 								      const next = function (promise) {
-												[api-minor] Reduce `postMessage` overhead, in `PartialEvaluator.getTextContent`, by sending text chunks in batches (issue 13962)

Following the STR in the issue, this patch reduces the number of `PartialEvaluator.getTextContent`-related `postMessage`-calls by approximately 78 percent.[1]
Note that by enforcing a relatively low value when batching text chunks, we should thus improve worst-case scenarios while not negatively affect all `textLayer` building.

While working on these changes I noticed, thanks to our unit-tests, that the implementation of the `appendEOL` function unfortunately means that the number and content of the textItems could actually be affected by the particular chunking used.
That seems *extremely* unfortunate, since in practice this means that the particular chunking used is thus observable through the API. Obviously that should be a completely internal implementation detail, which is why this patch also modifies `appendEOL` to mitigate that.[2]

Given that this patch adds a *minimum* batch size in `enqueueChunk`, there's obviously nothing preventing it from becoming a lot larger then the limit (depending e.g. on the PDF structure and the CPU load/speed).
While sending more text chunks at once isn't an issue in itself, it could become problematic at the main-thread during `textLayer` building. Note how both the `PartialEvaluator` and `CanvasGraphics` implementations utilize `Date.now()`-checks, to prevent long-running parsing/rendering from "hanging" the respective thread. In the `textLayer` building we don't utilize such a construction[3], and streaming of textContent is thus essentially acting as a *simple* stand-in for that functionality.
Hence why we want to avoid choosing a too large minimum batch size, since that could thus indirectly affect main-thread performance negatively.

---
[1] While it'd be possible to go even lower, that'd likely require more invasive re-factoring/changes to the `PartialEvaluator.getTextContent`-code to ensure that the batches don't become too large.

[2] This should also, as far as I can tell, explain some of the regressions observed in the "enhance" text-selection tests back in PR 13257.
    Looking closer at the `appendEOL` function it should potentially be changed even more, however that should probably not be done here.

[3] I'd really like to avoid implementing something like that for the `textLayer` building as well, given that it'd require adding a fair bit of complexity.

											
										
										
											2021-09-03 20:07:04 +09:00
+								        enqueueChunk(/* batch = */ true);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        Promise.all([promise, sink.ready]).then(function () {
 								          try {
 								            promiseBody(resolve, reject);
 								          } catch (ex) {
 								            reject(ex);
 								          }
 								        }, reject);
 								      };
 								      task.ensureNotTerminated();
 								      timeSlotManager.reset();
-												Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file

The only *slight* complication here were some of the `switch`-cases, in `getOperatorList`/`getTextContent`, where the parsing is done asynchronously.
However, those cases are easy to deal with by wrapping the code within its own block; please see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/switch#block-scope_variables_within_switch_statements

											
										
										
											2021-05-06 17:08:09 +09:00
 								      const operation = {};
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      let stop,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        args = [];
 								      while (!(stop = timeSlotManager.check())) {
 								        // The arguments parsed by read() are not used beyond this loop, so
 								        // we can reuse the same array on every iteration, thus avoiding
 								        // unnecessary allocations.
 								        args.length = 0;
 								        operation.args = args;
 								        if (!preprocessor.read(operation)) {
 								          break;
 								        }
 								        textState = stateManager.state;
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        const fn = operation.fn;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        args = operation.args;
 								        switch (fn | 0) {
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								          case OPS.setFont:
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            // Optimization to ignore multiple identical Tf commands.
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								            var fontNameArg = args[0].name,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              fontSizeArg = args[1];
 								            if (
 								              textState.font &&
 								              fontNameArg === textState.fontName &&
 								              fontSizeArg === textState.fontSize
 								            ) {
-												Adds Promise to the getOperatorList

											
										
										
											2014-05-10 10:21:15 +09:00
+								              break;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            }
 								            flushTextContentItem();
 								            textState.fontName = fontNameArg;
 								            textState.fontSize = fontSizeArg;
 								            next(handleSetFont(fontNameArg, null));
 								            return;
 								          case OPS.setTextRise:
 								            textState.textRise = args[0];
 								            break;
 								          case OPS.setHScale:
 								            textState.textHScale = args[0] / 100;
 								            break;
 								          case OPS.setLeading:
 								            textState.leading = args[0];
 								            break;
 								          case OPS.moveText:
 								            textState.translateTextLineMatrix(args[0], args[1]);
 								            textState.textMatrix = textState.textLineMatrix.slice();
 								            break;
 								          case OPS.setLeadingMoveText:
 								            textState.leading = -args[1];
 								            textState.translateTextLineMatrix(args[0], args[1]);
 								            textState.textMatrix = textState.textLineMatrix.slice();
 								            break;
 								          case OPS.nextLine:
 								            textState.carriageReturn();
 								            break;
 								          case OPS.setTextMatrix:
 								            textState.setTextMatrix(
 								              args[0],
 								              args[1],
 								              args[2],
 								              args[3],
 								              args[4],
 								              args[5]
 								            );
 								            textState.setTextLineMatrix(
 								              args[0],
 								              args[1],
 								              args[2],
 								              args[3],
 								              args[4],
 								              args[5]
 								            );
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								            updateAdvanceScale();
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            break;
 								          case OPS.setCharSpacing:
 								            textState.charSpacing = args[0];
 								            break;
 								          case OPS.setWordSpacing:
 								            textState.wordSpacing = args[0];
 								            break;
 								          case OPS.beginText:
 								            textState.textMatrix = IDENTITY_MATRIX.slice();
 								            textState.textLineMatrix = IDENTITY_MATRIX.slice();
 								            break;
 								          case OPS.showSpacedText:
 								            if (!stateManager.state.font) {
 								              self.ensureStateFont(stateManager.state);
 								              continue;
 								            }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								            const spaceFactor =
 								              ((textState.font.vertical ? 1 : -1) * textState.fontSize) / 1000;
 								            const elements = args[0];
 								            for (let i = 0, ii = elements.length; i < ii - 1; i++) {
 								              const item = elements[i];
 								              if (typeof item === "string") {
 								                showSpacedTextBuffer.push(item);
 								              } else if (typeof item === "number" && item !== 0) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                // PDF Specification 5.3.2 states:
 								                // The number is expressed in thousandths of a unit of text
 								                // space.
 								                // This amount is subtracted from the current horizontal or
 								                // vertical coordinate, depending on the writing mode.
 								                // In the default coordinate system, a positive adjustment
 								                // has the effect of moving the next glyph painted either to
 								                // the left or down by the given amount.
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								                const str = showSpacedTextBuffer.join("");
 								                showSpacedTextBuffer.length = 0;
 								                buildTextContentItem({
 								                  chars: str,
 								                  extraSpacing: item * spaceFactor,
 								                });
-												Incrementally render by sending the operator list by chunks as they're ready.

											
										
										
											2013-08-01 03:17:36 +09:00
+								              }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
 								            const item = elements[elements.length - 1];
 								            if (typeof item === "string") {
 								              showSpacedTextBuffer.push(item);
 								            }
 								            if (showSpacedTextBuffer.length > 0) {
 								              const str = showSpacedTextBuffer.join("");
 								              showSpacedTextBuffer.length = 0;
 								              buildTextContentItem({
 								                chars: str,
 								                extraSpacing: 0,
 								              });
 								            }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            break;
 								          case OPS.showText:
 								            if (!stateManager.state.font) {
 								              self.ensureStateFont(stateManager.state);
 								              continue;
 								            }
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								            buildTextContentItem({
 								              chars: args[0],
 								              extraSpacing: 0,
 								            });
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            break;
 								          case OPS.nextLineShowText:
 								            if (!stateManager.state.font) {
 								              self.ensureStateFont(stateManager.state);
 								              continue;
 								            }
 								            textState.carriageReturn();
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								            buildTextContentItem({
 								              chars: args[0],
 								              extraSpacing: 0,
 								            });
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            break;
 								          case OPS.nextLineSetSpacingShowText:
 								            if (!stateManager.state.font) {
 								              self.ensureStateFont(stateManager.state);
 								              continue;
 								            }
 								            textState.wordSpacing = args[0];
 								            textState.charSpacing = args[1];
 								            textState.carriageReturn();
-												[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-04-30 21:41:13 +09:00
+								            buildTextContentItem({
 								              chars: args[2],
 								              extraSpacing: 0,
 								            });
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            break;
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								          case OPS.paintXObject:
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            flushTextContentItem();
 								            if (!xobjs) {
 								              xobjs = resources.get("XObject") || Dict.empty;
 								            }
-												[api-minor] Always allow e.g. rendering to continue even if there are errors, and add a `stopAtErrors` parameter to `getDocument` to opt-out of this behaviour (issue 6342, issue 3795, bug 1130815)

Other PDF readers, e.g. Adobe Reader and PDFium (in Chrome), will attempt to render as much of a page as possible even if there are errors present.
Currently we just bail as soon the first error is hit, which means that we'll usually not render anything in these cases and just display a blank page instead.

NOTE: This patch changes the default behaviour of the PDF.js API to always attempt to recover as much data as possible, even when encountering errors during e.g. `getOperatorList`/`getTextContent`, which thus improve our handling of corrupt PDF files and allow the default viewer to handle errors slightly more gracefully.
In the event that an API consumer wishes to use the old behaviour, where we stop parsing as soon as an error is encountered, the `stopAtErrors` parameter can be set at `getDocument`.

Fixes, inasmuch it's possible since the PDF files are corrupt, e.g. issue 6342, issue 3795, and [bug 1130815](https://bugzilla.mozilla.org/show_bug.cgi?id=1130815) (and probably others too).

											
										
										
											2017-02-19 22:03:08 +09:00
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
+								            var isValidName = args[0] instanceof Name;
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								            var name = args[0].name;
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
 								            if (isValidName && emptyXObjectCache.getByName(name)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              break;
 								            }
-												Improve caching of empty `/XObject`s in the `PartialEvaluator.getTextContent` method

It turns out that `getTextContent` suffers from *similar* problems with repeated images as `getOperatorList`; please see the previous patch.

While only `/XObject` resources of the `Form`-type will actually be *parsed* in `PartialEvaluator.getTextContent`, since those are the only ones that may contain text, we're still forced to fetch repeated image resources where the name differs (but not the reference).
Obviously it's less bad in this case, since we're not actually parsing `/XObject`s of e.g. the `Image`-type. However, you still want to avoid even fetching the data whenever possible, since `Stream`s are not cached on the `XRef` instance (given their potential size) and the lookup can thus be somewhat expensive in general.

To address these issues, we can simply replace the exiting name-only caching in `PartialEvaluator.getTextContent` with a new cache backed by `LocalImageCache` instead.

											
										
										
											2020-05-26 16:47:59 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            next(
 								              new Promise(function (resolveXObject, rejectXObject) {
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
+								                if (!isValidName) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  throw new FormatError("XObject must be referred to by name.");
 								                }
-												Improve caching of empty `/XObject`s in the `PartialEvaluator.getTextContent` method

It turns out that `getTextContent` suffers from *similar* problems with repeated images as `getOperatorList`; please see the previous patch.

While only `/XObject` resources of the `Form`-type will actually be *parsed* in `PartialEvaluator.getTextContent`, since those are the only ones that may contain text, we're still forced to fetch repeated image resources where the name differs (but not the reference).
Obviously it's less bad in this case, since we're not actually parsing `/XObject`s of e.g. the `Image`-type. However, you still want to avoid even fetching the data whenever possible, since `Stream`s are not cached on the `XRef` instance (given their potential size) and the lookup can thus be somewhat expensive in general.

To address these issues, we can simply replace the exiting name-only caching in `PartialEvaluator.getTextContent` with a new cache backed by `LocalImageCache` instead.

											
										
										
											2020-05-26 16:47:59 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                let xobj = xobjs.getRaw(name);
 								                if (xobj instanceof Ref) {
 								                  if (emptyXObjectCache.getByRef(xobj)) {
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								                    resolveXObject();
 								                    return;
 								                  }
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
-												Ignore globally cached images in `PartialEvaluator.getTextContent` (PR 11930 follow-up)

Given that we'll only cache `/XObject`s of the `Image`-type globally, we can utilize that in `PartialEvaluator.getTextContent` as well. This way, in cases such as e.g. issue 12098, we can avoid having to fetch/parse `/XObject`s that we already know to be `Image`s. This is helpful, since `Stream`s are not cached on the `XRef` instance (given their potential size) and the lookup can thus be somewhat expensive in general.

Also, skip a redundant `RefSetCache.has` check in the `GlobalImageCache.getData` method.

											
										
										
											2021-01-28 00:56:17 +09:00
+								                  const globalImage = self.globalImageCache.getData(
 								                    xobj,
 								                    self.pageIndex
 								                  );
 								                  if (globalImage) {
 								                    resolveXObject();
 								                    return;
 								                  }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  xobj = xref.fetch(xobj);
 								                }
-												Remove the `isStream` helper function

At this point all the various Stream-classes extends an abstract base-class, hence this helper function is no longer necessary and only adds unnecessary indirection in the code.

											
										
										
											2022-02-17 21:45:42 +09:00
+								                if (!(xobj instanceof BaseStream)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  throw new FormatError("XObject should be a stream");
 								                }
-												Allow `getOperatorList`/`getTextContent` to skip errors when parsing broken XObjects (issue 8702, issue 8704)

This patch makes use of the existing `ignoreErrors` property in `src/core/evaluator.js`, see PRs 8240 and 8441, thus allowing us to attempt to recovery as much as possible of a page even when it contains broken XObjects.

Fixes 8702.
Fixes 8704.

											
										
										
											2017-09-17 20:35:18 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                const type = xobj.dict.get("Subtype");
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								                if (!(type instanceof Name)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  throw new FormatError("XObject should have a Name subtype");
 								                }
-												Improve caching of empty `/XObject`s in the `PartialEvaluator.getTextContent` method

It turns out that `getTextContent` suffers from *similar* problems with repeated images as `getOperatorList`; please see the previous patch.

While only `/XObject` resources of the `Form`-type will actually be *parsed* in `PartialEvaluator.getTextContent`, since those are the only ones that may contain text, we're still forced to fetch repeated image resources where the name differs (but not the reference).
Obviously it's less bad in this case, since we're not actually parsing `/XObject`s of e.g. the `Image`-type. However, you still want to avoid even fetching the data whenever possible, since `Stream`s are not cached on the `XRef` instance (given their potential size) and the lookup can thus be somewhat expensive in general.

To address these issues, we can simply replace the exiting name-only caching in `PartialEvaluator.getTextContent` with a new cache backed by `LocalImageCache` instead.

											
										
										
											2020-05-26 16:47:59 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                if (type.name !== "Form") {
 								                  emptyXObjectCache.set(name, xobj.dict.objId, true);
-												Allow `getOperatorList`/`getTextContent` to skip errors when parsing broken XObjects (issue 8702, issue 8704)

This patch makes use of the existing `ignoreErrors` property in `src/core/evaluator.js`, see PRs 8240 and 8441, thus allowing us to attempt to recovery as much as possible of a page even when it contains broken XObjects.

Fixes 8702.
Fixes 8704.

											
										
										
											2017-09-17 20:35:18 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  resolveXObject();
 								                  return;
 								                }
-												Allow `getOperatorList`/`getTextContent` to skip errors when parsing broken XObjects (issue 8702, issue 8704)

This patch makes use of the existing `ignoreErrors` property in `src/core/evaluator.js`, see PRs 8240 and 8441, thus allowing us to attempt to recovery as much as possible of a page even when it contains broken XObjects.

Fixes 8702.
Fixes 8704.

											
										
										
											2017-09-17 20:35:18 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                // Use a new `StateManager` to prevent incorrect positioning
 								                // of textItems *after* the Form XObject, since errors in the
 								                // data can otherwise prevent `restore` operators from
 								                // executing.
 								                // NOTE: Only an issue when `options.ignoreErrors === true`.
 								                const currentState = stateManager.state.clone();
 								                const xObjStateManager = new StateManager(currentState);
 								                const matrix = xobj.dict.getArray("Matrix");
 								                if (Array.isArray(matrix) && matrix.length === 6) {
 								                  xObjStateManager.transform(matrix);
 								                }
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                // Enqueue the `textContent` chunk before parsing the /Form
 								                // XObject.
 								                enqueueChunk();
 								                const sinkWrapper = {
 								                  enqueueInvoked: false,
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  enqueue(chunk, size) {
 								                    this.enqueueInvoked = true;
 								                    sink.enqueue(chunk, size);
 								                  },
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  get desiredSize() {
 								                    return sink.desiredSize;
 								                  },
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  get ready() {
 								                    return sink.ready;
 								                  },
 								                };
-												Making src/core/evaluator.js adhere to the style guide

											
										
										
											2014-03-23 03:15:51 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                self
 								                  .getTextContent({
 								                    stream: xobj,
 								                    task,
 								                    resources: xobj.dict.get("Resources") || resources,
 								                    stateManager: xObjStateManager,
 								                    combineTextItems,
-												Add support for basic structure tree for accessibility.

When a PDF is "marked" we now generate a separate DOM that represents
the structure tree from the PDF.  This DOM is inserted into the <canvas>
element and allows screen readers to walk the tree and have more
information about headings, images, links, etc. To link the structure
tree DOM (which is empty) to the text layer aria-owns is used. This
required modifying the text layer creation so that marked items are
now tracked.

											
										
										
											2021-04-01 07:07:02 +09:00
+								                    includeMarkedContent,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                    sink: sinkWrapper,
 								                    seenStyles,
-												[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201;
- if the glyph position is not within the view then skip it.

											
										
										
											2022-02-14 03:39:40 +09:00
+								                    viewBox,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  })
 								                  .then(function () {
 								                    if (!sinkWrapper.enqueueInvoked) {
 								                      emptyXObjectCache.set(name, xobj.dict.objId, true);
 								                    }
 								                    resolveXObject();
 								                  }, rejectXObject);
 								              }).catch(function (reason) {
 								                if (reason instanceof AbortException) {
 								                  return;
 								                }
 								                if (self.options.ignoreErrors) {
 								                  // Error(s) in the XObject -- allow text-extraction to
 								                  // continue.
 								                  warn(`getTextContent - ignoring XObject: "${reason}".`);
 								                  return;
 								                }
 								                throw reason;
 								              })
 								            );
 								            return;
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								          case OPS.setGState:
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
+								            isValidName = args[0] instanceof Name;
-												Revert "Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file" (PR 13344 follow-up)

This reverts commit 0ef9b5aafc88094f19fec793c174c622e7e15542, since it cases a lot of warnings (see below) *locally* with e.g. the document from issue 9627.
Strangely enough, this only occurs with `gulp server`-mode and the actual builds are apparently fine. It seems that this *may* be some unfortunate interaction with the old Babel-plugin that's used together with SystemJS.

```
Warning: getTextContent - ignoring ExtGState: "FormatError: ExtGState should be a dictionary.".
```

Rather than taking the risk that this could actually cover a more serious bug, and since I cannot immediately figure out what's wrong, it thus seem safest to revert this for now and we can (carefully) revisit this once SystemJS has been removed (see PR 12563).

											
										
										
											2021-05-13 17:40:08 +09:00
+								            name = args[0].name;
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
 								            if (isValidName && emptyGStateCache.getByName(name)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              break;
 								            }
-												Add local caching of non-font Graphics State (ExtGState) data in `PartialEvaluator.getTextContent`

It turns out that `getTextContent` suffers from *similar* problems with repeated GStates as `getOperatorList`; please see the previous patch.

While only `/ExtGState` resources containing Fonts will actually be *parsed* by `PartialEvaluator.getTextContent`, we're still forced to fetch/validate repeated `/ExtGState` resources even though *most* of them won't affect the textContent (since they mostly contain purely graphical state).

With these changes we also no longer need to immediately reset the current text-state when encountering a `setGState` operator, which may thus improve text-selection in some cases.

											
										
										
											2020-07-11 21:05:53 +09:00
 								            next(
 								              new Promise(function (resolveGState, rejectGState) {
-												Re-factor the handling of *empty* `Name`-instances (PR 13612 follow-up)

When working on PR 13612, I mostly prioritized a simple solution that didn't require touching a lot of code. However, while working on PR 13735 I started to realize that the static `Name.empty` construction really wasn't a good idea.

In particular, having a special `Name`-instance where the `name`-property isn't actually a String is confusing (to put it mildly) and can easily lead to issues elsewhere. The only reason for not simply allowing the `name`-property to be an *empty* string, in PR 13612, was to avoid having to touch a lot of existing code. However, it turns out that this is only limited to a few methods in the `PartialEvaluator` and a few of the `BaseLocalCache`-implementations, all of which can be easily re-factored to handle *empty* `Name`-instances.

All-in-all, I think that this patch is even an *overall* improvement since we're now validating (what should always be) `Name`-data better in the `PartialEvaluator`.
This is what I ought to have done from the start, sorry about the code churn here!

											
										
										
											2021-07-15 04:38:19 +09:00
+								                if (!isValidName) {
-												Add local caching of non-font Graphics State (ExtGState) data in `PartialEvaluator.getTextContent`

It turns out that `getTextContent` suffers from *similar* problems with repeated GStates as `getOperatorList`; please see the previous patch.

While only `/ExtGState` resources containing Fonts will actually be *parsed* by `PartialEvaluator.getTextContent`, we're still forced to fetch/validate repeated `/ExtGState` resources even though *most* of them won't affect the textContent (since they mostly contain purely graphical state).

With these changes we also no longer need to immediately reset the current text-state when encountering a `setGState` operator, which may thus improve text-selection in some cases.

											
										
										
											2020-07-11 21:05:53 +09:00
+								                  throw new FormatError("GState must be referred to by name.");
 								                }
 								                const extGState = resources.get("ExtGState");
 								                if (!(extGState instanceof Dict)) {
 								                  throw new FormatError("ExtGState should be a dictionary.");
 								                }
 								                const gState = extGState.get(name);
 								                // TODO: Attempt to lookup cached GStates by reference as well,
 								                //       if and only if there are PDF documents where doing so
 								                //       would significantly improve performance.
 								                if (!(gState instanceof Dict)) {
 								                  throw new FormatError("GState should be a dictionary.");
 								                }
 								                const gStateFont = gState.get("Font");
 								                if (!gStateFont) {
 								                  emptyGStateCache.set(name, gState.objId, true);
 								                  resolveGState();
 								                  return;
 								                }
 								                flushTextContentItem();
 								                textState.fontName = null;
 								                textState.fontSize = gStateFont[1];
 								                handleSetFont(null, gStateFont[0]).then(
 								                  resolveGState,
 								                  rejectGState
 								                );
 								              }).catch(function (reason) {
 								                if (reason instanceof AbortException) {
 								                  return;
 								                }
 								                if (self.options.ignoreErrors) {
 								                  // Error(s) in the ExtGState -- allow text-extraction to
 								                  // continue.
 								                  warn(`getTextContent - ignoring ExtGState: "${reason}".`);
 								                  return;
 								                }
 								                throw reason;
 								              })
 								            );
 								            return;
-												Add support for basic structure tree for accessibility.

When a PDF is "marked" we now generate a separate DOM that represents
the structure tree from the PDF.  This DOM is inserted into the <canvas>
element and allows screen readers to walk the tree and have more
information about headings, images, links, etc. To link the structure
tree DOM (which is empty) to the text layer aria-owns is used. This
required modifying the text layer creation so that marked items are
now tracked.

											
										
										
											2021-04-01 07:07:02 +09:00
+								          case OPS.beginMarkedContent:
 								            if (includeMarkedContent) {
 								              textContent.items.push({
 								                type: "beginMarkedContent",
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								                tag: args[0] instanceof Name ? args[0].name : null,
-												Add support for basic structure tree for accessibility.

When a PDF is "marked" we now generate a separate DOM that represents
the structure tree from the PDF.  This DOM is inserted into the <canvas>
element and allows screen readers to walk the tree and have more
information about headings, images, links, etc. To link the structure
tree DOM (which is empty) to the text layer aria-owns is used. This
required modifying the text layer creation so that marked items are
now tracked.

											
										
										
											2021-04-01 07:07:02 +09:00
+								              });
 								            }
 								            break;
 								          case OPS.beginMarkedContentProps:
 								            if (includeMarkedContent) {
 								              flushTextContentItem();
 								              let mcid = null;
-												Prefer `instanceof Dict` rather than calling `isDict()` with one argument

Unless you actually need to check that something is both a `Dict` and also of the *correct* type, using `instanceof Dict` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isDict` helper function for where it makes sense.

											
										
										
											2022-02-21 20:44:56 +09:00
+								              if (args[1] instanceof Dict) {
-												Add support for basic structure tree for accessibility.

When a PDF is "marked" we now generate a separate DOM that represents
the structure tree from the PDF.  This DOM is inserted into the <canvas>
element and allows screen readers to walk the tree and have more
information about headings, images, links, etc. To link the structure
tree DOM (which is empty) to the text layer aria-owns is used. This
required modifying the text layer creation so that marked items are
now tracked.

											
										
										
											2021-04-01 07:07:02 +09:00
+								                mcid = args[1].get("MCID");
 								              }
 								              textContent.items.push({
 								                type: "beginMarkedContentProps",
 								                id: Number.isInteger(mcid)
 								                  ? `${self.idFactory.getPageObjId()}_mcid${mcid}`
 								                  : null,
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								                tag: args[0] instanceof Name ? args[0].name : null,
-												Add support for basic structure tree for accessibility.

When a PDF is "marked" we now generate a separate DOM that represents
the structure tree from the PDF.  This DOM is inserted into the <canvas>
element and allows screen readers to walk the tree and have more
information about headings, images, links, etc. To link the structure
tree DOM (which is empty) to the text layer aria-owns is used. This
required modifying the text layer creation so that marked items are
now tracked.

											
										
										
											2021-04-01 07:07:02 +09:00
+								              });
 								            }
 								            break;
 								          case OPS.endMarkedContent:
 								            if (includeMarkedContent) {
 								              flushTextContentItem();
 								              textContent.items.push({
 								                type: "endMarkedContent",
 								              });
 								            }
 								            break;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        } // switch
 								        if (textContent.items.length >= sink.desiredSize) {
 								          // Wait for ready, if we reach highWaterMark.
 								          stop = true;
 								          break;
-												Terminate getOperationList and getTextContent every 20 ms

											
										
										
											2014-05-10 10:41:03 +09:00
+								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      } // while
 								      if (stop) {
 								        next(deferred);
 								        return;
 								      }
 								      flushTextContentItem();
 								      enqueueChunk();
 								      resolve();
 								    }).catch(reason => {
 								      if (reason instanceof AbortException) {
 								        return;
 								      }
 								      if (this.options.ignoreErrors) {
 								        // Error(s) in the TextContent -- allow text-extraction to continue.
 								        warn(
 								          `getTextContent - ignoring errors during "${task.name}" ` +
 								            `task: "${reason}".`
 								        );
-												Combines standalone divs into text groups.

											
										
										
											2015-11-04 01:12:41 +09:00
+								        flushTextContentItem();
-												Adds Streams API in getTextContent to stream data.

This patch adds Streams API support in getTextContent
so that we can stream data in chunks instead of fetching
whole data from worker thread to main thread. This patch
supports Streams API without changing the core functionality
of getTextContent.

Enqueue textContent directly at getTextContent in partialEvaluator.

Adds desiredSize and ready property in streamSink.

											
										
										
											2017-04-17 21:46:53 +09:00
+								        enqueueChunk();
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        return;
 								      }
 								      throw reason;
 								    });
 								  }
-												Text char codes extraction

											
										
										
											2011-12-11 08:24:54 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  extractDataStructures(dict, baseDict, properties) {
 								    const xref = this.xref;
 								    let cidToGidBytes;
 								    // 9.10.2
-												Export the "raw" `toUnicode`-data from `PartialEvaluator.preEvaluateFont`

Compared to other data-structures, such as e.g. `Dict`s, we're purposely *not* caching Streams on the `XRef`-instance.[1]
The, somewhat unfortunate, effect of Streams not being cached is that repeatedly getting the *same* Stream-data requires re-parsing/re-initializing of a bunch of data; see `XRef.fetch` and related methods.

For the font-parsing in particular we're currently fetching the `toUnicode`-data, which is very often a Stream, in `PartialEvaluator.preEvaluateFont` and then *again* in `PartialEvaluator.extractDataStructures` soon afterwards.
By instead letting `PartialEvaluator.preEvaluateFont` export the "raw" `toUnicode`-data, we can avoid *some* unnecessary re-parsing/re-initializing when handling fonts.
*Please note:* In this particular case, given that `PartialEvaluator.preEvaluateFont` only accesses the "raw" `toUnicode` data, exporting a Stream should be safe.

---
[1] The reasons for this include:
 - Streams, especially `DecodeStream`-instances, can become *very* large once read. Hence caching them really isn't a good idea simply because of the (potential) memory impact of doing so.

 - Attempting to read from the *same* Stream-instance more than once won't work, unless it's `reset` in between, since using any method such as e.g. `getBytes` always starts at the current data position.

 - Given that parsing, even in the worker-thread, is now fairly asynchronous it's generally impossible to assert that any one Stream-instance isn't being accessed "concurrently" by e.g. different `getOperatorList` calls. Hence `reset`-ing a cached Stream-instance isn't going to work in the general case.

											
										
										
											2021-05-08 05:25:08 +09:00
+								    const toUnicodePromise = this.readToUnicode(
 								      properties.toUnicode || dict.get("ToUnicode") || baseDict.get("ToUnicode")
 								    );
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
 								    if (properties.composite) {
 								      // CIDSystemInfo helps to match CID to glyphs
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const cidSystemInfo = dict.get("CIDSystemInfo");
-												Prefer `instanceof Dict` rather than calling `isDict()` with one argument

Unless you actually need to check that something is both a `Dict` and also of the *correct* type, using `instanceof Dict` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isDict` helper function for where it makes sense.

											
										
										
											2022-02-21 20:44:56 +09:00
+								      if (cidSystemInfo instanceof Dict) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        properties.cidSystemInfo = {
 								          registry: stringToPDFString(cidSystemInfo.get("Registry")),
 								          ordering: stringToPDFString(cidSystemInfo.get("Ordering")),
 								          supplement: cidSystemInfo.get("Supplement"),
 								        };
 								      }
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const cidToGidMap = dict.get("CIDToGIDMap");
-												Take the /CIDToGIDMap data into account when computing the hash, in `PartialEvaluator.preEvaluateFont`, for composite fonts (bug 1734802)

This is unfortunately *yet another* bug in the `preEvaluateFont`-implementation, and I've lost count of the number of times I've had to tweak this code over the years :-(
I really cannot help thinking that PR 4423 was way too simplistic, since it missed a bunch of cases that leads to broken font rendering in many PDF documents.

Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1734802

											
										
										
											2021-10-08 19:21:26 +09:00
+								      if (cidToGidMap instanceof BaseStream) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        cidToGidBytes = cidToGidMap.getBytes();
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // Based on 9.6.6 of the spec the encoding can come from multiple places
 								    // and depends on the font type. The base encoding and differences are
 								    // read here, but the encoding that is actually used is chosen during
 								    // glyph mapping in the font.
 								    // TODO: Loading the built in encoding in the font would allow the
 								    // differences to be merged in here not require us to hold on to it.
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const differences = [];
 								    let baseEncodingName = null;
 								    let encoding;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (dict.has("Encoding")) {
 								      encoding = dict.get("Encoding");
-												Prefer `instanceof Dict` rather than calling `isDict()` with one argument

Unless you actually need to check that something is both a `Dict` and also of the *correct* type, using `instanceof Dict` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isDict` helper function for where it makes sense.

											
										
										
											2022-02-21 20:44:56 +09:00
+								      if (encoding instanceof Dict) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        baseEncodingName = encoding.get("BaseEncoding");
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								        baseEncodingName =
 								          baseEncodingName instanceof Name ? baseEncodingName.name : null;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        // Load the differences between the base and original
 								        if (encoding.has("Differences")) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								          const diffEncoding = encoding.get("Differences");
 								          let index = 0;
 								          for (let j = 0, jj = diffEncoding.length; j < jj; j++) {
 								            const data = xref.fetchIfRef(diffEncoding[j]);
-												Remove the `isNum` helper function

The call-sites are replaced by direct `typeof`-checks instead, which removes unnecessary function calls. Note that in the `src/`-folder we already had more `typeof`-cases than `isNum`-calls.

These changes were *mostly* done using regular expression search-and-replace, with two exceptions:
 - In `Font._charToGlyph` we no longer unconditionally update the `width`, since that seems completely unnecessary.
 - In `PDFDocument.documentInfo`, when parsing custom entries, we now do the `typeof`-check once.

											
										
										
											2022-02-22 19:55:34 +09:00
+								            if (typeof data === "number") {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              index = data;
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								            } else if (data instanceof Name) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              differences[index++] = data.name;
 								            } else {
 								              throw new FormatError(
 								                `Invalid entry in 'Differences' array: ${data}`
 								              );
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
+								            }
 								          }
-												Use built in CMaps and unify the glyph mapping.

											
										
										
											2014-02-12 03:27:09 +09:00
+								        }
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								      } else if (encoding instanceof Name) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        baseEncodingName = encoding.name;
 								      } else {
 								        throw new FormatError("Encoding is not a Name nor a Dict");
 								      }
 								      // According to table 114 if the encoding is a named encoding it must be
 								      // one of these predefined encodings.
 								      if (
 								        baseEncodingName !== "MacRomanEncoding" &&
 								        baseEncodingName !== "MacExpertEncoding" &&
 								        baseEncodingName !== "WinAnsiEncoding"
 								      ) {
 								        baseEncodingName = null;
-												Use built in CMaps and unify the glyph mapping.

											
										
										
											2014-02-12 03:27:09 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Use built in CMaps and unify the glyph mapping.

											
										
										
											2014-02-12 03:27:09 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (baseEncodingName) {
-												For CFF fonts without proper `ToUnicode`/`Encoding` data, utilize the "charset"/"Encoding"-data from the font file to improve text-selection (issue 13260)

This patch extends the approach, implemented in PR 7550, to also apply to CFF fonts.

											
										
										
											2021-04-21 00:12:19 +09:00
+								      properties.defaultEncoding = getEncoding(baseEncodingName);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    } else {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const isSymbolicFont = !!(properties.flags & FontFlags.Symbolic);
 								      const isNonsymbolicFont = !!(properties.flags & FontFlags.Nonsymbolic);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      // According to "Table 114" in section "9.6.6.1 General" (under
 								      // "9.6.6 Character Encoding") of the PDF specification, a Nonsymbolic
 								      // font should use the `StandardEncoding` if no encoding is specified.
 								      encoding = StandardEncoding;
 								      if (properties.type === "TrueType" && !isNonsymbolicFont) {
 								        encoding = WinAnsiEncoding;
 								      }
 								      // The Symbolic attribute can be misused for regular fonts
 								      // Heuristic: we have to check if the font is a standard one also
 								      if (isSymbolicFont) {
 								        encoding = MacRomanEncoding;
-												Allow using the standard font data for non-Type1 fonts (issue 13585, PR 12726 follow-up)

Given that we're not imposing any font-type restrictions[1] in the non-/FontDescriptor case, it's not really clear to me why we'd actually need to do that in the general case.
Please note that there's some *expected* movement, all of which should be improvements, in the `fips197.pdf` file with this patch.

---
[1] With the exception of Type3-fonts, of course.

											
										
										
											2021-06-20 18:06:35 +09:00
+								        if (!properties.file || properties.isInternalFont) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          if (/Symbol/i.test(properties.name)) {
 								            encoding = SymbolSetEncoding;
 								          } else if (/Dingbats|Wingdings/i.test(properties.name)) {
 								            encoding = ZapfDingbatsEncoding;
-												Add basic support for ZapfDingbats

											
										
										
											2014-09-01 10:22:24 +09:00
+								          }
-												Use built in CMaps and unify the glyph mapping.

											
										
										
											2014-02-12 03:27:09 +09:00
+								        }
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      properties.defaultEncoding = encoding;
 								    }
-												Initial ToUnicode modifications

											
										
										
											2011-11-25 00:38:09 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    properties.differences = differences;
 								    properties.baseEncodingName = baseEncodingName;
 								    properties.hasEncoding = !!baseEncodingName || differences.length > 0;
 								    properties.dict = dict;
 								    return toUnicodePromise
 								      .then(readToUnicode => {
 								        properties.toUnicode = readToUnicode;
 								        return this.buildToUnicode(properties);
 								      })
 								      .then(builtToUnicode => {
 								        properties.toUnicode = builtToUnicode;
 								        if (cidToGidBytes) {
 								          properties.cidToGidMap = this.readCidToGidMap(
 								            cidToGidBytes,
 								            builtToUnicode
 								          );
-												Refactor the building of `ToUnicode` maps for simple fonts a helper method

											
										
										
											2017-11-26 20:53:06 +09:00
+								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        return properties;
 								      });
 								  }
-												Improve the heuristics, in `PartialEvaluator._buildSimpleFontToUnicode`, for glyphNames of the Cdd{d}/cdd{d} format (issue 9655)

*Please note:* I've been thinking about possible ways of addressing this issue for a while now, but all of the solutions I came up with became too complicated and thus hurt readability of the code.
However, it occured to me that we're essentially trying to add a heuristic *on top* of another heuristic, and that it shouldn't matter how efficient the code is as long as it works.

In the PDF file in the issue the Encoding contains glyphNames of the `Cdd` format, which our existing heuristics will treat as base 10 values. However, in this particular file they actually contain base 16 values, which we thus attempt to detect and fix such that text-selection works.

											
										
										
											2019-09-30 06:50:58 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  /**
-												Re-factor the `fallbackToUnicode` functionality (PR 9192 follow-up)

Rather than having to create and check a *separate* `ToUnicodeMap` to handle these cases, we can simply use the `fallbackToUnicode`-data (when it exists) to directly supplement *missing* /ToUnicode entires in the regular `ToUnicodeMap` instead.

											
										
										
											2021-05-18 20:45:19 +09:00
+								   * @returns {Array}
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								   * @private
 								   */
-												Re-factor the `fallbackToUnicode` functionality (PR 9192 follow-up)

Rather than having to create and check a *separate* `ToUnicodeMap` to handle these cases, we can simply use the `fallbackToUnicode`-data (when it exists) to directly supplement *missing* /ToUnicode entires in the regular `ToUnicodeMap` instead.

											
										
										
											2021-05-18 20:45:19 +09:00
+								  _simpleFontToUnicode(properties, forceGlyphs = false) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    assert(!properties.composite, "Must be a simple font.");
 								    const toUnicode = [];
 								    const encoding = properties.defaultEncoding.slice();
 								    const baseEncodingName = properties.baseEncodingName;
 								    // Merge in the differences array.
 								    const differences = properties.differences;
 								    for (const charcode in differences) {
 								      const glyphName = differences[charcode];
 								      if (glyphName === ".notdef") {
 								        // Skip .notdef to prevent rendering errors, e.g. boxes appearing
 								        // where there should be spaces (fixes issue5256.pdf).
 								        continue;
 								      }
 								      encoding[charcode] = glyphName;
 								    }
 								    const glyphsUnicodeMap = getGlyphsUnicode();
 								    for (const charcode in encoding) {
 								      // a) Map the character code to a character name.
 								      let glyphName = encoding[charcode];
 								      // b) Look up the character name in the Adobe Glyph List (see the
 								      //    Bibliography) to obtain the corresponding Unicode value.
 								      if (glyphName === "") {
 								        continue;
 								      } else if (glyphsUnicodeMap[glyphName] === undefined) {
 								        // (undocumented) c) Few heuristics to recognize unknown glyphs
 								        // NOTE: Adobe Reader does not do this step, but OSX Preview does
 								        let code = 0;
 								        switch (glyphName[0]) {
 								          case "G": // Gxx glyph
 								            if (glyphName.length === 3) {
 								              code = parseInt(glyphName.substring(1), 16);
 								            }
 								            break;
 								          case "g": // g00xx glyph
 								            if (glyphName.length === 5) {
 								              code = parseInt(glyphName.substring(1), 16);
 								            }
 								            break;
 								          case "C": // Cdd{d} glyph
 								          case "c": // cdd{d} glyph
 								            if (glyphName.length >= 3 && glyphName.length <= 4) {
 								              const codeStr = glyphName.substring(1);
 								              if (forceGlyphs) {
 								                code = parseInt(codeStr, 16);
 								                break;
-												Refactor the building of `ToUnicode` maps for simple fonts a helper method

											
										
										
											2017-11-26 20:53:06 +09:00
+								              }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              // Normally the Cdd{d}/cdd{d} glyphName format will contain
 								              // regular, i.e. base 10, charCodes (see issue4550.pdf)...
 								              code = +codeStr;
 								              // ... however some PDF generators violate that assumption by
 								              // containing glyph, i.e. base 16, codes instead.
 								              // In that case we need to re-parse the *entire* encoding to
 								              // prevent broken text-selection (fixes issue9655_reduced.pdf).
 								              if (
 								                Number.isNaN(code) &&
 								                Number.isInteger(parseInt(codeStr, 16))
 								              ) {
-												Re-factor the `fallbackToUnicode` functionality (PR 9192 follow-up)

Rather than having to create and check a *separate* `ToUnicodeMap` to handle these cases, we can simply use the `fallbackToUnicode`-data (when it exists) to directly supplement *missing* /ToUnicode entires in the regular `ToUnicodeMap` instead.

											
										
										
											2021-05-18 20:45:19 +09:00
+								                return this._simpleFontToUnicode(
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                  properties,
 								                  /* forceGlyphs */ true
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								                );
-												Refactor the building of `ToUnicode` maps for simple fonts a helper method

											
										
										
											2017-11-26 20:53:06 +09:00
+								              }
 								            }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            break;
-												Update npm packages

Note that the Prettier update made it possible to move a couple of comments after `default:`-cases back to their original/intended positions, please see https://prettier.io/blog/2022/03/16/2.6.0.html

											
										
										
											2022-03-20 18:36:11 +09:00
+								          default: // 'uniXXXX'/'uXXXX{XX}' glyphs
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            const unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
 								            if (unicode !== -1) {
 								              code = unicode;
 								            }
-												Refactor the building of `ToUnicode` maps for simple fonts a helper method

											
										
										
											2017-11-26 20:53:06 +09:00
+								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        if (code > 0 && code <= 0x10ffff && Number.isInteger(code)) {
 								          // If `baseEncodingName` is one the predefined encodings, and `code`
 								          // equals `charcode`, using the glyph defined in the baseEncoding
 								          // seems to yield a better `toUnicode` mapping (fixes issue 5070).
 								          if (baseEncodingName && code === +charcode) {
 								            const baseEncoding = getEncoding(baseEncodingName);
 								            if (baseEncoding && (glyphName = baseEncoding[charcode])) {
 								              toUnicode[charcode] = String.fromCharCode(
 								                glyphsUnicodeMap[glyphName]
 								              );
 								              continue;
 								            }
 								          }
 								          toUnicode[charcode] = String.fromCodePoint(code);
-												Build a fallback `ToUnicode` map for simple fonts (issue 8229)

In some fonts, the included `ToUnicode` data is incomplete causing text-selection to not work properly. For simple fonts that contain encoding data, we can manually build a `ToUnicode` map to attempt to improve things.

Please note that since we're currently using the `ToUnicode` data during glyph mapping, in an attempt to avoid rendering regressions, I purposely didn't want to amend to original `ToUnicode` data for this text-selection edge-case.
Instead, I opted for the current solution, which will (hopefully) give slightly better text-extraction results in PDF file with incomplete `ToUnicode` data.

According to the PDF specification, see [section 9.10.2](http://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G8.1873172):

> A conforming reader can use these methods, in the priority given, to map a character code to a Unicode value.
> ...

Reading that paragraph literally, it doesn't seem too unreasonable to use *different* methods for different charcodes.

Fixes 8229.

											
										
										
											2017-11-26 21:29:43 +09:00
+								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        continue;
-												Refactors CMapFactory.create to make it async

											
										
										
											2016-02-29 01:20:29 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      toUnicode[charcode] = String.fromCharCode(glyphsUnicodeMap[glyphName]);
 								    }
-												Re-factor the `fallbackToUnicode` functionality (PR 9192 follow-up)

Rather than having to create and check a *separate* `ToUnicodeMap` to handle these cases, we can simply use the `fallbackToUnicode`-data (when it exists) to directly supplement *missing* /ToUnicode entires in the regular `ToUnicodeMap` instead.

											
										
										
											2021-05-18 20:45:19 +09:00
+								    return toUnicode;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  }
-												Refactor the building of `ToUnicode` maps for simple fonts a helper method

											
										
										
											2017-11-26 20:53:06 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  /**
 								   * Builds a char code to unicode map based on section 9.10 of the spec.
 								   * @param {Object} properties Font properties object.
 								   * @returns {Promise} A Promise that is resolved with a
 								   *   {ToUnicodeMap|IdentityToUnicodeMap} object.
 								   */
-												Convert `PartialEvaluator.buildToUnicode` to an `async` method

This removes the need to *manually* wrap all return values in a Promise.

											
										
										
											2021-05-17 22:40:23 +09:00
+								  async buildToUnicode(properties) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    properties.hasIncludedToUnicodeMap =
 								      !!properties.toUnicode && properties.toUnicode.length > 0;
 								    // Section 9.10.2 Mapping Character Codes to Unicode Values
 								    if (properties.hasIncludedToUnicodeMap) {
 								      // Some fonts contain incomplete ToUnicode data, causing issues with
 								      // text-extraction. For simple fonts, containing encoding information,
 								      // use a fallback ToUnicode map to improve this (fixes issue8229.pdf).
 								      if (!properties.composite && properties.hasEncoding) {
-												Re-factor the `fallbackToUnicode` functionality (PR 9192 follow-up)

Rather than having to create and check a *separate* `ToUnicodeMap` to handle these cases, we can simply use the `fallbackToUnicode`-data (when it exists) to directly supplement *missing* /ToUnicode entires in the regular `ToUnicodeMap` instead.

											
										
										
											2021-05-18 20:45:19 +09:00
+								        properties.fallbackToUnicode = this._simpleFontToUnicode(properties);
-												Refactors CMapFactory.create to make it async

											
										
										
											2016-02-29 01:20:29 +09:00
+								      }
-												Convert `PartialEvaluator.buildToUnicode` to an `async` method

This removes the need to *manually* wrap all return values in a Promise.

											
										
										
											2021-05-17 22:40:23 +09:00
+								      return properties.toUnicode;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
 								    // According to the spec if the font is a simple font we should only map
 								    // to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or
 								    // the differences array only contains adobe standard or symbol set names,
 								    // in pratice it seems better to always try to create a toUnicode map
 								    // based of the default encoding.
 								    if (!properties.composite /* is simple font */) {
-												Re-factor the `fallbackToUnicode` functionality (PR 9192 follow-up)

Rather than having to create and check a *separate* `ToUnicodeMap` to handle these cases, we can simply use the `fallbackToUnicode`-data (when it exists) to directly supplement *missing* /ToUnicode entires in the regular `ToUnicodeMap` instead.

											
										
										
											2021-05-18 20:45:19 +09:00
+								      return new ToUnicodeMap(this._simpleFontToUnicode(properties));
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
 								    // If the font is a composite font that uses one of the predefined CMaps
 								    // listed in Table 118 (except Identity–H and Identity–V) or whose
 								    // descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or
 								    // Adobe-Korea1 character collection:
 								    if (
 								      properties.composite &&
 								      ((properties.cMap.builtInCMap &&
 								        !(properties.cMap instanceof IdentityCMap)) ||
 								        (properties.cidSystemInfo.registry === "Adobe" &&
 								          (properties.cidSystemInfo.ordering === "GB1" ||
 								            properties.cidSystemInfo.ordering === "CNS1" ||
 								            properties.cidSystemInfo.ordering === "Japan1" ||
 								            properties.cidSystemInfo.ordering === "Korea1")))
 								    ) {
 								      // Then:
 								      // a) Map the character code to a character identifier (CID) according
 								      // to the font’s CMap.
 								      // b) Obtain the registry and ordering of the character collection used
 								      // by the font’s CMap (for example, Adobe and Japan1) from its
 								      // CIDSystemInfo dictionary.
-												Convert `PartialEvaluator.buildToUnicode` to an `async` method

This removes the need to *manually* wrap all return values in a Promise.

											
										
										
											2021-05-17 22:40:23 +09:00
+								      const { registry, ordering } = properties.cidSystemInfo;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      // c) Construct a second CMap name by concatenating the registry and
 								      // ordering obtained in step (b) in the format registry–ordering–UCS2
 								      // (for example, Adobe–Japan1–UCS2).
-												Convert `PartialEvaluator.buildToUnicode` to an `async` method

This removes the need to *manually* wrap all return values in a Promise.

											
										
										
											2021-05-17 22:40:23 +09:00
+								      const ucs2CMapName = Name.get(`${registry}-${ordering}-UCS2`);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      // d) Obtain the CMap with the name constructed in step (c) (available
 								      // from the ASN Web site; see the Bibliography).
-												Convert `PartialEvaluator.buildToUnicode` to an `async` method

This removes the need to *manually* wrap all return values in a Promise.

											
										
										
											2021-05-17 22:40:23 +09:00
+								      const ucs2CMap = await CMapFactory.create({
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        encoding: ucs2CMapName,
 								        fetchBuiltInCMap: this._fetchBuiltInCMapBound,
 								        useCMap: null,
 								      });
-												Convert `PartialEvaluator.buildToUnicode` to an `async` method

This removes the need to *manually* wrap all return values in a Promise.

											
										
										
											2021-05-17 22:40:23 +09:00
+								      const toUnicode = [];
 								      properties.cMap.forEach(function (charcode, cid) {
 								        if (cid > 0xffff) {
 								          throw new FormatError("Max size of CID is 65,535");
 								        }
 								        // e) Map the CID obtained in step (a) according to the CMap
 								        // obtained in step (d), producing a Unicode value.
 								        const ucs2 = ucs2CMap.lookup(cid);
 								        if (ucs2) {
 								          toUnicode[charcode] = String.fromCharCode(
 								            (ucs2.charCodeAt(0) << 8) + ucs2.charCodeAt(1)
 								          );
 								        }
 								      });
 								      return new ToUnicodeMap(toUnicode);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Refactors CMapFactory.create to make it async

											
										
										
											2016-02-29 01:20:29 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // The viewer's choice, just use an identity map.
-												Convert `PartialEvaluator.buildToUnicode` to an `async` method

This removes the need to *manually* wrap all return values in a Promise.

											
										
										
											2021-05-17 22:40:23 +09:00
+								    return new IdentityToUnicodeMap(properties.firstChar, properties.lastChar);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  }
-												Export the "raw" `toUnicode`-data from `PartialEvaluator.preEvaluateFont`

Compared to other data-structures, such as e.g. `Dict`s, we're purposely *not* caching Streams on the `XRef`-instance.[1]
The, somewhat unfortunate, effect of Streams not being cached is that repeatedly getting the *same* Stream-data requires re-parsing/re-initializing of a bunch of data; see `XRef.fetch` and related methods.

For the font-parsing in particular we're currently fetching the `toUnicode`-data, which is very often a Stream, in `PartialEvaluator.preEvaluateFont` and then *again* in `PartialEvaluator.extractDataStructures` soon afterwards.
By instead letting `PartialEvaluator.preEvaluateFont` export the "raw" `toUnicode`-data, we can avoid *some* unnecessary re-parsing/re-initializing when handling fonts.
*Please note:* In this particular case, given that `PartialEvaluator.preEvaluateFont` only accesses the "raw" `toUnicode` data, exporting a Stream should be safe.

---
[1] The reasons for this include:
 - Streams, especially `DecodeStream`-instances, can become *very* large once read. Hence caching them really isn't a good idea simply because of the (potential) memory impact of doing so.

 - Attempting to read from the *same* Stream-instance more than once won't work, unless it's `reset` in between, since using any method such as e.g. `getBytes` always starts at the current data position.

 - Given that parsing, even in the worker-thread, is now fairly asynchronous it's generally impossible to assert that any one Stream-instance isn't being accessed "concurrently" by e.g. different `getOperatorList` calls. Hence `reset`-ing a cached Stream-instance isn't going to work in the general case.

											
										
										
											2021-05-08 05:25:08 +09:00
+								  readToUnicode(cmapObj) {
 								    if (!cmapObj) {
 								      return Promise.resolve(null);
 								    }
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								    if (cmapObj instanceof Name) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      return CMapFactory.create({
 								        encoding: cmapObj,
 								        fetchBuiltInCMap: this._fetchBuiltInCMapBound,
 								        useCMap: null,
 								      }).then(function (cmap) {
 								        if (cmap instanceof IdentityCMap) {
 								          return new IdentityToUnicodeMap(0, 0xffff);
 								        }
 								        return new ToUnicodeMap(cmap.getMap());
 								      });
-												Remove the `isStream` helper function

At this point all the various Stream-classes extends an abstract base-class, hence this helper function is no longer necessary and only adds unnecessary indirection in the code.

											
										
										
											2022-02-17 21:45:42 +09:00
+								    } else if (cmapObj instanceof BaseStream) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      return CMapFactory.create({
 								        encoding: cmapObj,
 								        fetchBuiltInCMap: this._fetchBuiltInCMapBound,
 								        useCMap: null,
 								      }).then(
 								        function (cmap) {
-												Refactors CMapFactory.create to make it async

											
										
										
											2016-02-29 01:20:29 +09:00
+								          if (cmap instanceof IdentityCMap) {
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								            return new IdentityToUnicodeMap(0, 0xffff);
-												Refactors CMapFactory.create to make it async

											
										
										
											2016-02-29 01:20:29 +09:00
+								          }
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								          const map = new Array(cmap.length);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          // Convert UTF-16BE
 								          // NOTE: cmap can be a sparse array, so use forEach instead of
 								          // `for(;;)` to iterate over all keys.
 								          cmap.forEach(function (charCode, token) {
-												Support cmaps with only CID characters, when building the ToUnicode-map (issue 9367)

In this particular case the `CMap`-data that we create contains only numbers, but no strings, which causes `PartialEvaluator.readToUnicode` to create a ToUnicode-map with only empty strings.

*Please note:* This is yet another case where I don't know if it's necessarily the best and most correct solution, but it does fix the referenced issue.

											
										
										
											2021-09-18 07:01:24 +09:00
+								            // Some cmaps contain *only* CID characters (fixes issue9367.pdf).
 								            if (typeof token === "number") {
 								              map[charCode] = String.fromCodePoint(token);
 								              return;
 								            }
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								            const str = [];
 								            for (let k = 0; k < token.length; k += 2) {
 								              const w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              if ((w1 & 0xf800) !== 0xd800) {
 								                // w1 < 0xD800 || w1 > 0xDFFF
 								                str.push(w1);
 								                continue;
-												Refactors CMapFactory.create to make it async

											
										
										
											2016-02-29 01:20:29 +09:00
+								              }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              k += 2;
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								              const w2 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              str.push(((w1 & 0x3ff) << 10) + (w2 & 0x3ff) + 0x10000);
-												Allow skipping of errors when reading broken/corrupt ToUnicode data (issue 11549)

This will allow font loading/parsing to continue, rather than immediately failing, when broken/corrupt CMap data is encountered.

											
										
										
											2020-01-30 21:13:51 +09:00
+								            }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            map[charCode] = String.fromCodePoint.apply(String, str);
 								          });
 								          return new ToUnicodeMap(map);
 								        },
 								        reason => {
 								          if (reason instanceof AbortException) {
 								            return null;
 								          }
 								          if (this.options.ignoreErrors) {
 								            // Error in the ToUnicode data -- sending unsupported feature
 								            // notification and allow font parsing to continue.
 								            this.handler.send("UnsupportedFeature", {
 								              featureId: UNSUPPORTED_FEATURES.errorFontToUnicode,
 								            });
 								            warn(`readToUnicode - ignoring ToUnicode data: "${reason}".`);
 								            return null;
-												Allow skipping of errors when reading broken/corrupt ToUnicode data (issue 11549)

This will allow font loading/parsing to continue, rather than immediately failing, when broken/corrupt CMap data is encountered.

											
										
										
											2020-01-30 21:13:51 +09:00
+								          }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          throw reason;
 								        }
 								      );
 								    }
 								    return Promise.resolve(null);
 								  }
-												Making src/core/evaluator.js adhere to the style guide

											
										
										
											2014-03-23 03:15:51 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  readCidToGidMap(glyphsData, toUnicode) {
 								    // Extract the encoding from the CIDToGIDMap
-												Migration of the 'encoding-1' branch (ref #674)

											
										
										
											2011-10-29 10:38:31 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // Set encoding 0 to later verify the font has an encoding
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const result = [];
 								    for (let j = 0, jj = glyphsData.length; j < jj; j++) {
 								      const glyphID = (glyphsData[j++] << 8) | glyphsData[j];
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      const code = j >> 1;
 								      if (glyphID === 0 && !toUnicode.has(code)) {
 								        continue;
 								      }
 								      result[code] = glyphID;
 								    }
 								    return result;
 								  }
 								  extractWidths(dict, descriptor, properties) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const xref = this.xref;
 								    let glyphsWidths = [];
 								    let defaultWidth = 0;
 								    const glyphsVMetrics = [];
 								    let defaultVMetrics;
 								    let i, ii, j, jj, start, code, widths;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (properties.composite) {
 								      defaultWidth = dict.has("DW") ? dict.get("DW") : 1000;
 								      widths = dict.get("W");
 								      if (widths) {
 								        for (i = 0, ii = widths.length; i < ii; i++) {
 								          start = xref.fetchIfRef(widths[i++]);
 								          code = xref.fetchIfRef(widths[i]);
 								          if (Array.isArray(code)) {
 								            for (j = 0, jj = code.length; j < jj; j++) {
 								              glyphsWidths[start++] = xref.fetchIfRef(code[j]);
 								            }
 								          } else {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								            const width = xref.fetchIfRef(widths[++i]);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            for (j = start; j <= code; j++) {
 								              glyphsWidths[j] = width;
 								            }
 								          }
-												Making src/core/evaluator.js adhere to the style guide

											
										
										
											2014-03-23 03:15:51 +09:00
+								        }
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (properties.vertical) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        let vmetrics = dict.getArray("DW2") || [880, -1000];
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        defaultVMetrics = [vmetrics[1], defaultWidth * 0.5, vmetrics[0]];
 								        vmetrics = dict.get("W2");
 								        if (vmetrics) {
 								          for (i = 0, ii = vmetrics.length; i < ii; i++) {
 								            start = xref.fetchIfRef(vmetrics[i++]);
 								            code = xref.fetchIfRef(vmetrics[i]);
-												Replace the `isArray` helper function with the native `Array.isArray` function

*Follow-up to PR 8813.*

											
										
										
											2017-09-02 03:27:13 +09:00
+								            if (Array.isArray(code)) {
-												Fixes lint warning W004 in src/core

											
										
										
											2014-04-08 06:42:54 +09:00
+								              for (j = 0, jj = code.length; j < jj; j++) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                glyphsVMetrics[start++] = [
 								                  xref.fetchIfRef(code[j++]),
 								                  xref.fetchIfRef(code[j++]),
 								                  xref.fetchIfRef(code[j]),
 								                ];
-												Making src/core/evaluator.js adhere to the style guide

											
										
										
											2014-03-23 03:15:51 +09:00
+								              }
-												Fixes reading of the widths for CID fonts

											
										
										
											2013-01-30 07:19:08 +09:00
+								            } else {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								              const vmetric = [
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                xref.fetchIfRef(vmetrics[++i]),
 								                xref.fetchIfRef(vmetrics[++i]),
 								                xref.fetchIfRef(vmetrics[++i]),
 								              ];
-												Fixes lint warning W004 in src/core

											
										
										
											2014-04-08 06:42:54 +09:00
+								              for (j = start; j <= code; j++) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                glyphsVMetrics[j] = vmetric;
-												Making src/core/evaluator.js adhere to the style guide

											
										
										
											2014-03-23 03:15:51 +09:00
+								              }
-												Migration of the 'encoding-1' branch (ref #674)

											
										
										
											2011-10-29 10:38:31 +09:00
+								            }
 								          }
 								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      }
 								    } else {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const firstChar = properties.firstChar;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      widths = dict.get("Widths");
 								      if (widths) {
 								        j = firstChar;
 								        for (i = 0, ii = widths.length; i < ii; i++) {
 								          glyphsWidths[j++] = xref.fetchIfRef(widths[i]);
-												Implements vertical writing

											
										
										
											2013-02-08 21:29:22 +09:00
+								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        defaultWidth = parseFloat(descriptor.get("MissingWidth")) || 0;
-												Migration of the 'encoding-1' branch (ref #674)

											
										
										
											2011-10-29 10:38:31 +09:00
+								      } else {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        // Trying get the BaseFont metrics (see comment above).
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        const baseFontName = dict.get("BaseFont");
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								        if (baseFontName instanceof Name) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								          const metrics = this.getBaseFontMetrics(baseFontName.name);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
 								          glyphsWidths = this.buildCharCodeToWidth(metrics.widths, properties);
 								          defaultWidth = metrics.defaultWidth;
-												Migration of the 'encoding-1' branch (ref #674)

											
										
										
											2011-10-29 10:38:31 +09:00
+								        }
 								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Migration of the 'encoding-1' branch (ref #674)

											
										
										
											2011-10-29 10:38:31 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // Heuristic: detection of monospace font by checking all non-zero widths
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    let isMonospace = true;
 								    let firstWidth = defaultWidth;
 								    for (const glyph in glyphsWidths) {
 								      const glyphWidth = glyphsWidths[glyph];
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (!glyphWidth) {
 								        continue;
-												Adds heuristic for monospace font detection

											
										
										
											2012-09-17 04:38:30 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (!firstWidth) {
 								        firstWidth = glyphWidth;
 								        continue;
-												Making src/core/evaluator.js adhere to the style guide

											
										
										
											2014-03-23 03:15:51 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (firstWidth !== glyphWidth) {
 								        isMonospace = false;
 								        break;
 								      }
 								    }
 								    if (isMonospace) {
 								      properties.flags |= FontFlags.FixedPitch;
 								    }
-												Adds heuristic for monospace font detection

											
										
										
											2012-09-17 04:38:30 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    properties.defaultWidth = defaultWidth;
 								    properties.widths = glyphsWidths;
 								    properties.defaultVMetrics = defaultVMetrics;
 								    properties.vmetrics = glyphsVMetrics;
 								  }
-												Migration of the 'encoding-1' branch (ref #674)

											
										
										
											2011-10-29 10:38:31 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  isSerifFont(baseFontName) {
 								    // Simulating descriptor flags attribute
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const fontNameWoStyle = baseFontName.split("-")[0];
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    return (
 								      fontNameWoStyle in getSerifFonts() ||
 								      fontNameWoStyle.search(/serif/gi) !== -1
 								    );
 								  }
-												Default to a built-in font if an invalid non-embedded font is encountered

											
										
										
											2013-01-12 04:04:56 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  getBaseFontMetrics(name) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    let defaultWidth = 0;
 								    let widths = Object.create(null);
 								    let monospace = false;
 								    const stdFontMap = getStdFontMap();
 								    let lookupName = stdFontMap[name] || name;
 								    const Metrics = getMetrics();
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
 								    if (!(lookupName in Metrics)) {
 								      // Use default fonts for looking up font metrics if the passed
 								      // font is not a base font
 								      if (this.isSerifFont(name)) {
 								        lookupName = "Times-Roman";
-												Migration of the 'encoding-1' branch (ref #674)

											
										
										
											2011-10-29 10:38:31 +09:00
+								      } else {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        lookupName = "Helvetica";
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const glyphWidths = Metrics[lookupName];
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Remove the `isNum` helper function

The call-sites are replaced by direct `typeof`-checks instead, which removes unnecessary function calls. Note that in the `src/`-folder we already had more `typeof`-cases than `isNum`-calls.

These changes were *mostly* done using regular expression search-and-replace, with two exceptions:
 - In `Font._charToGlyph` we no longer unconditionally update the `width`, since that seems completely unnecessary.
 - In `PDFDocument.documentInfo`, when parsing custom entries, we now do the `typeof`-check once.

											
										
										
											2022-02-22 19:55:34 +09:00
+								    if (typeof glyphWidths === "number") {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      defaultWidth = glyphWidths;
 								      monospace = true;
 								    } else {
 								      widths = glyphWidths(); // expand lazy widths array
 								    }
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    return {
 								      defaultWidth,
 								      monospace,
 								      widths,
 								    };
 								  }
 								  buildCharCodeToWidth(widthsByGlyphName, properties) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const widths = Object.create(null);
 								    const differences = properties.differences;
 								    const encoding = properties.defaultEncoding;
 								    for (let charCode = 0; charCode < 256; charCode++) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (charCode in differences && widthsByGlyphName[differences[charCode]]) {
 								        widths[charCode] = widthsByGlyphName[differences[charCode]];
 								        continue;
 								      }
 								      if (charCode in encoding && widthsByGlyphName[encoding[charCode]]) {
 								        widths[charCode] = widthsByGlyphName[encoding[charCode]];
 								        continue;
-												Use built in CMaps and unify the glyph mapping.

											
										
										
											2014-02-12 03:27:09 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
 								    return widths;
 								  }
 								  preEvaluateFont(dict) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const baseDict = dict;
 								    let type = dict.get("Subtype");
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								    if (!(type instanceof Name)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      throw new FormatError("invalid font Subtype");
 								    }
-												Use built in CMaps and unify the glyph mapping.

											
										
										
											2014-02-12 03:27:09 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    let composite = false;
-												Export the "raw" `toUnicode`-data from `PartialEvaluator.preEvaluateFont`

Compared to other data-structures, such as e.g. `Dict`s, we're purposely *not* caching Streams on the `XRef`-instance.[1]
The, somewhat unfortunate, effect of Streams not being cached is that repeatedly getting the *same* Stream-data requires re-parsing/re-initializing of a bunch of data; see `XRef.fetch` and related methods.

For the font-parsing in particular we're currently fetching the `toUnicode`-data, which is very often a Stream, in `PartialEvaluator.preEvaluateFont` and then *again* in `PartialEvaluator.extractDataStructures` soon afterwards.
By instead letting `PartialEvaluator.preEvaluateFont` export the "raw" `toUnicode`-data, we can avoid *some* unnecessary re-parsing/re-initializing when handling fonts.
*Please note:* In this particular case, given that `PartialEvaluator.preEvaluateFont` only accesses the "raw" `toUnicode` data, exporting a Stream should be safe.

---
[1] The reasons for this include:
 - Streams, especially `DecodeStream`-instances, can become *very* large once read. Hence caching them really isn't a good idea simply because of the (potential) memory impact of doing so.

 - Attempting to read from the *same* Stream-instance more than once won't work, unless it's `reset` in between, since using any method such as e.g. `getBytes` always starts at the current data position.

 - Given that parsing, even in the worker-thread, is now fairly asynchronous it's generally impossible to assert that any one Stream-instance isn't being accessed "concurrently" by e.g. different `getOperatorList` calls. Hence `reset`-ing a cached Stream-instance isn't going to work in the general case.

											
										
										
											2021-05-08 05:25:08 +09:00
+								    let hash, toUnicode;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (type.name === "Type0") {
 								      // If font is a composite
 								      //  - get the descendant font
 								      //  - set the type according to the descendant font
 								      //  - get the FontDescriptor from the descendant font
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const df = dict.get("DescendantFonts");
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (!df) {
 								        throw new FormatError("Descendant fonts are not specified");
 								      }
 								      dict = Array.isArray(df) ? this.xref.fetchIfRef(df[0]) : df;
-												Improve the handling of errors, in `PartialEvaluator.loadFont`, occuring in `PartialEvaluator.preEvaluateFont` (issue 12823)

Currently any errors thrown in `preEvaluateFont`, which is a *synchronous* method, will not be handled at all in the `loadFont` method and we were thus failing to return an `ErrorFont`-instance as intended here.

Also, add an *explicit* check in `PartialEvaluator.preEvaluateFont` to ensure that Type0-fonts always have a *valid* dictionary.

											
										
										
											2021-01-07 19:25:09 +09:00
+								      if (!(dict instanceof Dict)) {
 								        throw new FormatError("Descendant font is not a dictionary.");
 								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      type = dict.get("Subtype");
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								      if (!(type instanceof Name)) {
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								        throw new FormatError("invalid font Subtype");
-												Remove most `assert()` calls (issue 8506)

This replaces `assert` calls with `throw new FormatError()`/`throw new Error()`.
In a few places, throwing an `Error` (which is what `assert` meant) isn't correct since the enclosing function is supposed to return a `Promise`, hence some cases were changed to `Promise.reject(...)` and similarily for `createPromiseCapability` instances.

											
										
										
											2017-07-20 21:04:54 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      composite = true;
 								    }
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Export the `firstChar`/`lastChar`-data from `PartialEvaluator.preEvaluateFont`

Rather than re-fetching/re-parsing these properties immediately in `PartialEvaluator.translateFont`, we can simply export them instead. (Obviously the effect will be really tiny, but there is less parsing overall this way.)

											
										
										
											2021-05-08 05:07:23 +09:00
+								    const firstChar = dict.get("FirstChar") || 0,
 								      lastChar = dict.get("LastChar") || (composite ? 0xffff : 0xff);
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const descriptor = dict.get("FontDescriptor");
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (descriptor) {
-												Fix the remaining `no-var` failures, which couldn't be handled automatically, in the `src/core/evaluator.js` file

The only *slight* complication here were some of the `switch`-cases, in `getOperatorList`/`getTextContent`, where the parsing is done asynchronously.
However, those cases are easy to deal with by wrapping the code within its own block; please see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/switch#block-scope_variables_within_switch_statements

											
										
										
											2021-05-06 17:08:09 +09:00
+								      hash = new MurmurHash3_64();
-												Export the `firstChar`/`lastChar`-data from `PartialEvaluator.preEvaluateFont`

Rather than re-fetching/re-parsing these properties immediately in `PartialEvaluator.translateFont`, we can simply export them instead. (Obviously the effect will be really tiny, but there is less parsing overall this way.)

											
										
										
											2021-05-08 05:07:23 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const encoding = baseDict.getRaw("Encoding");
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								      if (encoding instanceof Name) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        hash.update(encoding.name);
-												Remove the `isRef` helper function

This helper function is not really needed, since it's just a wrapper around a simple `instanceof` check, and it only adds unnecessary indirection in the code.

											
										
										
											2022-02-18 20:11:45 +09:00
+								      } else if (encoding instanceof Ref) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        hash.update(encoding.toString());
-												Prefer `instanceof Dict` rather than calling `isDict()` with one argument

Unless you actually need to check that something is both a `Dict` and also of the *correct* type, using `instanceof Dict` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isDict` helper function for where it makes sense.

											
										
										
											2022-02-21 20:44:56 +09:00
+								      } else if (encoding instanceof Dict) {
-												Add a `getRawValues` method, to `Dict` instances, to provide an easier way of getting all *raw* values

When the old `Dict.getAll()` method was removed, it was replaced with a `Dict.getKeys()` call and `Dict.get(...)` calls (in a loop).
While this pattern obviously makes a lot of sense in many cases, there's some instances where we actually want the *raw* `Dict` values (i.e. `Ref`s where applicable). In those cases, `Dict.getRaw(...)` calls are instead used within the loop. However, by introducing a new `Dict.getRawValues()` method we can reduce the number of (strictly unnecessary) function calls by simply getting the *raw* `Dict` values directly.

											
										
										
											2020-07-17 19:57:34 +09:00
+								        for (const entry of encoding.getRawValues()) {
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								          if (entry instanceof Name) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            hash.update(entry.name);
-												Remove the `isRef` helper function

This helper function is not really needed, since it's just a wrapper around a simple `instanceof` check, and it only adds unnecessary indirection in the code.

											
										
										
											2022-02-18 20:11:45 +09:00
+								          } else if (entry instanceof Ref) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            hash.update(entry.toString());
 								          } else if (Array.isArray(entry)) {
 								            // 'Differences' array (fixes bug1157493.pdf).
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								            const diffLength = entry.length,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              diffBuf = new Array(diffLength);
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								            for (let j = 0; j < diffLength; j++) {
 								              const diffEntry = entry[j];
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								              if (diffEntry instanceof Name) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                diffBuf[j] = diffEntry.name;
-												Remove the `isNum` helper function

The call-sites are replaced by direct `typeof`-checks instead, which removes unnecessary function calls. Note that in the `src/`-folder we already had more `typeof`-cases than `isNum`-calls.

These changes were *mostly* done using regular expression search-and-replace, with two exceptions:
 - In `Font._charToGlyph` we no longer unconditionally update the `width`, since that seems completely unnecessary.
 - In `PDFDocument.documentInfo`, when parsing custom entries, we now do the `typeof`-check once.

											
										
										
											2022-02-22 19:55:34 +09:00
+								              } else if (
 								                typeof diffEntry === "number" ||
 								                diffEntry instanceof Ref
 								              ) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								                diffBuf[j] = diffEntry.toString();
-												Improve the handling of `Encoding` dictionary, with `Differences` array, in `PartialEvaluator_preEvaluateFont`

I recently happened to look at the code I wrote for PR 5964, which fixed [bug 1157493](https://bugzilla.mozilla.org/show_bug.cgi?id=1157493), and I quickly realized that the solution is way too simplistic.
The fact that only using the `length` of a `Differences` array worked seems more like a happy accident for a particular set of font data, but could just as easily be incorrect for other PDF files.

Note that in practice, the case where the `Encoding` entry is a regular `Dict` (and not a `Ref` or `Name`) is very rare, hence I don't think that we really need to worry about having to reparse this data.
Also, the performance of this code-block is quite a bit better by updating the `hash` with the data from the *entire* `Differences` array, instead of at every loop iteration.

											
										
										
											2016-12-28 08:06:54 +09:00
+								              }
-												Handle the Encoding being a dictionary in PartialEvaluator_preEvaluateFont (bug 1157493)

*This is a regression from PR 4423.*

Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1157493.

											
										
										
											2015-04-25 20:27:10 +09:00
+								            }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            hash.update(diffBuf.join());
-												Handle the Encoding being a dictionary in PartialEvaluator_preEvaluateFont (bug 1157493)

*This is a regression from PR 4423.*

Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1157493.

											
										
										
											2015-04-25 20:27:10 +09:00
+								          }
-												Treat fonts with the same font descriptor, encoding and unicode map as aliases

Different fonts can point to the same font descriptor
(see https://github.com/mozilla/pdf.js/issues/4339 for details). With this
commit such fonts are treated as aliases if they have also the same encoding
and the same toUnicode map. The according info is stored on the font descriptor.
This change must also ensure that aliases use always the same font name
because translated fonts can get cleared depending on the CLEANUP_TIMEOUT setting.

											
										
										
											2014-03-04 02:44:45 +09:00
+								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      }
-												Treat fonts with the same font descriptor, encoding and unicode map as aliases

Different fonts can point to the same font descriptor
(see https://github.com/mozilla/pdf.js/issues/4339 for details). With this
commit such fonts are treated as aliases if they have also the same encoding
and the same toUnicode map. The according info is stored on the font descriptor.
This change must also ensure that aliases use always the same font name
because translated fonts can get cleared depending on the CLEANUP_TIMEOUT setting.

											
										
										
											2014-03-04 02:44:45 +09:00
-												Export the `firstChar`/`lastChar`-data from `PartialEvaluator.preEvaluateFont`

Rather than re-fetching/re-parsing these properties immediately in `PartialEvaluator.translateFont`, we can simply export them instead. (Obviously the effect will be really tiny, but there is less parsing overall this way.)

											
										
										
											2021-05-08 05:07:23 +09:00
+								      hash.update(`${firstChar}-${lastChar}`); // Fixes issue10665_reduced.pdf
-												Treat fonts with the same font descriptor, encoding and unicode map as aliases

Different fonts can point to the same font descriptor
(see https://github.com/mozilla/pdf.js/issues/4339 for details). With this
commit such fonts are treated as aliases if they have also the same encoding
and the same toUnicode map. The according info is stored on the font descriptor.
This change must also ensure that aliases use always the same font name
because translated fonts can get cleared depending on the CLEANUP_TIMEOUT setting.

											
										
										
											2014-03-04 02:44:45 +09:00
-												Export the "raw" `toUnicode`-data from `PartialEvaluator.preEvaluateFont`

Compared to other data-structures, such as e.g. `Dict`s, we're purposely *not* caching Streams on the `XRef`-instance.[1]
The, somewhat unfortunate, effect of Streams not being cached is that repeatedly getting the *same* Stream-data requires re-parsing/re-initializing of a bunch of data; see `XRef.fetch` and related methods.

For the font-parsing in particular we're currently fetching the `toUnicode`-data, which is very often a Stream, in `PartialEvaluator.preEvaluateFont` and then *again* in `PartialEvaluator.extractDataStructures` soon afterwards.
By instead letting `PartialEvaluator.preEvaluateFont` export the "raw" `toUnicode`-data, we can avoid *some* unnecessary re-parsing/re-initializing when handling fonts.
*Please note:* In this particular case, given that `PartialEvaluator.preEvaluateFont` only accesses the "raw" `toUnicode` data, exporting a Stream should be safe.

---
[1] The reasons for this include:
 - Streams, especially `DecodeStream`-instances, can become *very* large once read. Hence caching them really isn't a good idea simply because of the (potential) memory impact of doing so.

 - Attempting to read from the *same* Stream-instance more than once won't work, unless it's `reset` in between, since using any method such as e.g. `getBytes` always starts at the current data position.

 - Given that parsing, even in the worker-thread, is now fairly asynchronous it's generally impossible to assert that any one Stream-instance isn't being accessed "concurrently" by e.g. different `getOperatorList` calls. Hence `reset`-ing a cached Stream-instance isn't going to work in the general case.

											
										
										
											2021-05-08 05:25:08 +09:00
+								      toUnicode = dict.get("ToUnicode") || baseDict.get("ToUnicode");
-												Remove the `isStream` helper function

At this point all the various Stream-classes extends an abstract base-class, hence this helper function is no longer necessary and only adds unnecessary indirection in the code.

											
										
										
											2022-02-17 21:45:42 +09:00
+								      if (toUnicode instanceof BaseStream) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        const stream = toUnicode.str || toUnicode;
-												Export the "raw" `toUnicode`-data from `PartialEvaluator.preEvaluateFont`

Compared to other data-structures, such as e.g. `Dict`s, we're purposely *not* caching Streams on the `XRef`-instance.[1]
The, somewhat unfortunate, effect of Streams not being cached is that repeatedly getting the *same* Stream-data requires re-parsing/re-initializing of a bunch of data; see `XRef.fetch` and related methods.

For the font-parsing in particular we're currently fetching the `toUnicode`-data, which is very often a Stream, in `PartialEvaluator.preEvaluateFont` and then *again* in `PartialEvaluator.extractDataStructures` soon afterwards.
By instead letting `PartialEvaluator.preEvaluateFont` export the "raw" `toUnicode`-data, we can avoid *some* unnecessary re-parsing/re-initializing when handling fonts.
*Please note:* In this particular case, given that `PartialEvaluator.preEvaluateFont` only accesses the "raw" `toUnicode` data, exporting a Stream should be safe.

---
[1] The reasons for this include:
 - Streams, especially `DecodeStream`-instances, can become *very* large once read. Hence caching them really isn't a good idea simply because of the (potential) memory impact of doing so.

 - Attempting to read from the *same* Stream-instance more than once won't work, unless it's `reset` in between, since using any method such as e.g. `getBytes` always starts at the current data position.

 - Given that parsing, even in the worker-thread, is now fairly asynchronous it's generally impossible to assert that any one Stream-instance isn't being accessed "concurrently" by e.g. different `getOperatorList` calls. Hence `reset`-ing a cached Stream-instance isn't going to work in the general case.

											
										
										
											2021-05-08 05:25:08 +09:00
+								        const uint8array = stream.buffer
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          ? new Uint8Array(stream.buffer.buffer, 0, stream.bufferLength)
 								          : new Uint8Array(
 								              stream.bytes.buffer,
 								              stream.start,
 								              stream.end - stream.start
 								            );
 								        hash.update(uint8array);
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								      } else if (toUnicode instanceof Name) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        hash.update(toUnicode.name);
-												Treat fonts with the same font descriptor, encoding and unicode map as aliases

Different fonts can point to the same font descriptor
(see https://github.com/mozilla/pdf.js/issues/4339 for details). With this
commit such fonts are treated as aliases if they have also the same encoding
and the same toUnicode map. The according info is stored on the font descriptor.
This change must also ensure that aliases use always the same font name
because translated fonts can get cleared depending on the CLEANUP_TIMEOUT setting.

											
										
										
											2014-03-04 02:44:45 +09:00
+								      }
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const widths = dict.get("Widths") || baseDict.get("Widths");
-												Ensure that the `Widths` array is parsed correctly in `PartialEvaluator.preEvaluateFont`

*Please note:* While I don't have a document that this patches fixes, the current code is however not entirely correct as far as I can tell.

Looking at how the `Widths` array is parsed in `PartialEvaluator.extractWidths`, it's clear that the implementation in `PartialEvaluator.preEvaluateFont` is a bit too simplistic. In particular, by only wrapping the data into a TypedArray, there's no attempt to handle *indirect* objects which could potentially lead to colliding `hash`es being computed.

											
										
										
											2021-05-07 19:44:20 +09:00
+								      if (Array.isArray(widths)) {
 								        const widthsBuf = [];
 								        for (const entry of widths) {
-												Remove the `isNum` helper function

The call-sites are replaced by direct `typeof`-checks instead, which removes unnecessary function calls. Note that in the `src/`-folder we already had more `typeof`-cases than `isNum`-calls.

These changes were *mostly* done using regular expression search-and-replace, with two exceptions:
 - In `Font._charToGlyph` we no longer unconditionally update the `width`, since that seems completely unnecessary.
 - In `PDFDocument.documentInfo`, when parsing custom entries, we now do the `typeof`-check once.

											
										
										
											2022-02-22 19:55:34 +09:00
+								          if (typeof entry === "number" || entry instanceof Ref) {
-												Ensure that the `Widths` array is parsed correctly in `PartialEvaluator.preEvaluateFont`

*Please note:* While I don't have a document that this patches fixes, the current code is however not entirely correct as far as I can tell.

Looking at how the `Widths` array is parsed in `PartialEvaluator.extractWidths`, it's clear that the implementation in `PartialEvaluator.preEvaluateFont` is a bit too simplistic. In particular, by only wrapping the data into a TypedArray, there's no attempt to handle *indirect* objects which could potentially lead to colliding `hash`es being computed.

											
										
										
											2021-05-07 19:44:20 +09:00
+								            widthsBuf.push(entry.toString());
 								          }
 								        }
 								        hash.update(widthsBuf.join());
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      }
-												Take the `W` array into account when computing the hash, in `PartialEvaluator.preEvaluateFont`, for composite fonts (issue 13343)

Without this some *composite* fonts may incorrectly end up with matching `hash`es, thus breaking rendering since we'll not actually try to load/parse some of the fonts.

*Please note:* Given that the document, in the referenced issue, doesn't embed *any* of its fonts there's no guarantee that it renders correctly in all configurations even with this patch.

											
										
										
											2021-05-07 18:49:58 +09:00
 								      if (composite) {
-												Ensure that composite/non-composite fonts won't get the same `hash` in `PartialEvaluator.preEvaluateFont`

To hopefully help prevent any future bugs, make sure that composite/non-composite fonts cannot accidentally get matching `hash`es. Given the differences between those font types, that's very unlikely to be useful or even correct in general.

											
										
										
											2021-05-07 19:07:30 +09:00
+								        hash.update("compositeFont");
-												Take the `W` array into account when computing the hash, in `PartialEvaluator.preEvaluateFont`, for composite fonts (issue 13343)

Without this some *composite* fonts may incorrectly end up with matching `hash`es, thus breaking rendering since we'll not actually try to load/parse some of the fonts.

*Please note:* Given that the document, in the referenced issue, doesn't embed *any* of its fonts there's no guarantee that it renders correctly in all configurations even with this patch.

											
										
										
											2021-05-07 18:49:58 +09:00
+								        const compositeWidths = dict.get("W") || baseDict.get("W");
 								        if (Array.isArray(compositeWidths)) {
 								          const widthsBuf = [];
 								          for (const entry of compositeWidths) {
-												Remove the `isNum` helper function

The call-sites are replaced by direct `typeof`-checks instead, which removes unnecessary function calls. Note that in the `src/`-folder we already had more `typeof`-cases than `isNum`-calls.

These changes were *mostly* done using regular expression search-and-replace, with two exceptions:
 - In `Font._charToGlyph` we no longer unconditionally update the `width`, since that seems completely unnecessary.
 - In `PDFDocument.documentInfo`, when parsing custom entries, we now do the `typeof`-check once.

											
										
										
											2022-02-22 19:55:34 +09:00
+								            if (typeof entry === "number" || entry instanceof Ref) {
-												Take the `W` array into account when computing the hash, in `PartialEvaluator.preEvaluateFont`, for composite fonts (issue 13343)

Without this some *composite* fonts may incorrectly end up with matching `hash`es, thus breaking rendering since we'll not actually try to load/parse some of the fonts.

*Please note:* Given that the document, in the referenced issue, doesn't embed *any* of its fonts there's no guarantee that it renders correctly in all configurations even with this patch.

											
										
										
											2021-05-07 18:49:58 +09:00
+								              widthsBuf.push(entry.toString());
 								            } else if (Array.isArray(entry)) {
 								              const subWidthsBuf = [];
 								              for (const element of entry) {
-												Remove the `isNum` helper function

The call-sites are replaced by direct `typeof`-checks instead, which removes unnecessary function calls. Note that in the `src/`-folder we already had more `typeof`-cases than `isNum`-calls.

These changes were *mostly* done using regular expression search-and-replace, with two exceptions:
 - In `Font._charToGlyph` we no longer unconditionally update the `width`, since that seems completely unnecessary.
 - In `PDFDocument.documentInfo`, when parsing custom entries, we now do the `typeof`-check once.

											
										
										
											2022-02-22 19:55:34 +09:00
+								                if (typeof element === "number" || element instanceof Ref) {
-												Take the `W` array into account when computing the hash, in `PartialEvaluator.preEvaluateFont`, for composite fonts (issue 13343)

Without this some *composite* fonts may incorrectly end up with matching `hash`es, thus breaking rendering since we'll not actually try to load/parse some of the fonts.

*Please note:* Given that the document, in the referenced issue, doesn't embed *any* of its fonts there's no guarantee that it renders correctly in all configurations even with this patch.

											
										
										
											2021-05-07 18:49:58 +09:00
+								                  subWidthsBuf.push(element.toString());
 								                }
 								              }
 								              widthsBuf.push(`[${subWidthsBuf.join()}]`);
 								            }
 								          }
 								          hash.update(widthsBuf.join());
 								        }
-												Take the /CIDToGIDMap data into account when computing the hash, in `PartialEvaluator.preEvaluateFont`, for composite fonts (bug 1734802)

This is unfortunately *yet another* bug in the `preEvaluateFont`-implementation, and I've lost count of the number of times I've had to tweak this code over the years :-(
I really cannot help thinking that PR 4423 was way too simplistic, since it missed a bunch of cases that leads to broken font rendering in many PDF documents.

Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1734802

											
										
										
											2021-10-08 19:21:26 +09:00
 								        const cidToGidMap =
 								          dict.getRaw("CIDToGIDMap") || baseDict.getRaw("CIDToGIDMap");
 								        if (cidToGidMap instanceof Name) {
 								          hash.update(cidToGidMap.name);
 								        } else if (cidToGidMap instanceof Ref) {
 								          hash.update(cidToGidMap.toString());
 								        } else if (cidToGidMap instanceof BaseStream) {
 								          hash.update(cidToGidMap.peekBytes());
 								        }
-												Take the `W` array into account when computing the hash, in `PartialEvaluator.preEvaluateFont`, for composite fonts (issue 13343)

Without this some *composite* fonts may incorrectly end up with matching `hash`es, thus breaking rendering since we'll not actually try to load/parse some of the fonts.

*Please note:* Given that the document, in the referenced issue, doesn't embed *any* of its fonts there's no guarantee that it renders correctly in all configurations even with this patch.

											
										
										
											2021-05-07 18:49:58 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Treat fonts with the same font descriptor, encoding and unicode map as aliases

Different fonts can point to the same font descriptor
(see https://github.com/mozilla/pdf.js/issues/4339 for details). With this
commit such fonts are treated as aliases if they have also the same encoding
and the same toUnicode map. The according info is stored on the font descriptor.
This change must also ensure that aliases use always the same font name
because translated fonts can get cleared depending on the CLEANUP_TIMEOUT setting.

											
										
										
											2014-03-04 02:44:45 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    return {
 								      descriptor,
 								      dict,
 								      baseDict,
 								      composite,
 								      type: type.name,
-												Export the `firstChar`/`lastChar`-data from `PartialEvaluator.preEvaluateFont`

Rather than re-fetching/re-parsing these properties immediately in `PartialEvaluator.translateFont`, we can simply export them instead. (Obviously the effect will be really tiny, but there is less parsing overall this way.)

											
										
										
											2021-05-08 05:07:23 +09:00
+								      firstChar,
 								      lastChar,
-												Export the "raw" `toUnicode`-data from `PartialEvaluator.preEvaluateFont`

Compared to other data-structures, such as e.g. `Dict`s, we're purposely *not* caching Streams on the `XRef`-instance.[1]
The, somewhat unfortunate, effect of Streams not being cached is that repeatedly getting the *same* Stream-data requires re-parsing/re-initializing of a bunch of data; see `XRef.fetch` and related methods.

For the font-parsing in particular we're currently fetching the `toUnicode`-data, which is very often a Stream, in `PartialEvaluator.preEvaluateFont` and then *again* in `PartialEvaluator.extractDataStructures` soon afterwards.
By instead letting `PartialEvaluator.preEvaluateFont` export the "raw" `toUnicode`-data, we can avoid *some* unnecessary re-parsing/re-initializing when handling fonts.
*Please note:* In this particular case, given that `PartialEvaluator.preEvaluateFont` only accesses the "raw" `toUnicode` data, exporting a Stream should be safe.

---
[1] The reasons for this include:
 - Streams, especially `DecodeStream`-instances, can become *very* large once read. Hence caching them really isn't a good idea simply because of the (potential) memory impact of doing so.

 - Attempting to read from the *same* Stream-instance more than once won't work, unless it's `reset` in between, since using any method such as e.g. `getBytes` always starts at the current data position.

 - Given that parsing, even in the worker-thread, is now fairly asynchronous it's generally impossible to assert that any one Stream-instance isn't being accessed "concurrently" by e.g. different `getOperatorList` calls. Hence `reset`-ing a cached Stream-instance isn't going to work in the general case.

											
										
										
											2021-05-08 05:25:08 +09:00
+								      toUnicode,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      hash: hash ? hash.hexdigest() : "",
 								    };
 								  }
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Export the "raw" `toUnicode`-data from `PartialEvaluator.preEvaluateFont`

Compared to other data-structures, such as e.g. `Dict`s, we're purposely *not* caching Streams on the `XRef`-instance.[1]
The, somewhat unfortunate, effect of Streams not being cached is that repeatedly getting the *same* Stream-data requires re-parsing/re-initializing of a bunch of data; see `XRef.fetch` and related methods.

For the font-parsing in particular we're currently fetching the `toUnicode`-data, which is very often a Stream, in `PartialEvaluator.preEvaluateFont` and then *again* in `PartialEvaluator.extractDataStructures` soon afterwards.
By instead letting `PartialEvaluator.preEvaluateFont` export the "raw" `toUnicode`-data, we can avoid *some* unnecessary re-parsing/re-initializing when handling fonts.
*Please note:* In this particular case, given that `PartialEvaluator.preEvaluateFont` only accesses the "raw" `toUnicode` data, exporting a Stream should be safe.

---
[1] The reasons for this include:
 - Streams, especially `DecodeStream`-instances, can become *very* large once read. Hence caching them really isn't a good idea simply because of the (potential) memory impact of doing so.

 - Attempting to read from the *same* Stream-instance more than once won't work, unless it's `reset` in between, since using any method such as e.g. `getBytes` always starts at the current data position.

 - Given that parsing, even in the worker-thread, is now fairly asynchronous it's generally impossible to assert that any one Stream-instance isn't being accessed "concurrently" by e.g. different `getOperatorList` calls. Hence `reset`-ing a cached Stream-instance isn't going to work in the general case.

											
										
										
											2021-05-08 05:25:08 +09:00
+								  async translateFont({
 								    descriptor,
 								    dict,
 								    baseDict,
 								    composite,
 								    type,
 								    firstChar,
 								    lastChar,
 								    toUnicode,
 								    cssFontInfo,
 								  }) {
-												Do the `isType3Font`-check *once*, rather than repeating it, in `PartialEvaluator.translateFont`

*This is a small piece of clean-up that I happened to notice while browsing the code.*

											
										
										
											2021-05-16 01:41:28 +09:00
+								    const isType3Font = type === "Type3";
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    let properties;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
 								    if (!descriptor) {
-												Do the `isType3Font`-check *once*, rather than repeating it, in `PartialEvaluator.translateFont`

*This is a small piece of clean-up that I happened to notice while browsing the code.*

											
										
										
											2021-05-16 01:41:28 +09:00
+								      if (isType3Font) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        // FontDescriptor is only required for Type3 fonts when the document
 								        // is a tagged pdf. Create a barbebones one to get by.
 								        descriptor = new Dict(null);
 								        descriptor.set("FontName", Name.get(type));
 								        descriptor.set("FontBBox", dict.getArray("FontBBox") || [0, 0, 0, 0]);
 								      } else {
 								        // Before PDF 1.5 if the font was one of the base 14 fonts, having a
 								        // FontDescriptor was not required.
 								        // This case is here for compatibility.
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        let baseFontName = dict.get("BaseFont");
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								        if (!(baseFontName instanceof Name)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          throw new FormatError("Base font is not specified");
 								        }
 								        // Using base font name as a font name.
 								        baseFontName = baseFontName.name.replace(/[,_]/g, "-");
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        const metrics = this.getBaseFontMetrics(baseFontName);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
 								        // Simulating descriptor flags attribute
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        const fontNameWoStyle = baseFontName.split("-")[0];
 								        const flags =
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          (this.isSerifFont(fontNameWoStyle) ? FontFlags.Serif : 0) |
 								          (metrics.monospace ? FontFlags.FixedPitch : 0) |
 								          (getSymbolsFonts()[fontNameWoStyle]
 								            ? FontFlags.Symbolic
 								            : FontFlags.Nonsymbolic);
 								        properties = {
 								          type,
 								          name: baseFontName,
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								          loadedName: baseDict.loadedName,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          widths: metrics.widths,
 								          defaultWidth: metrics.defaultWidth,
-												Fallback to font name matching, when checking for serif fonts (issue 13845)

In order to handle fonts that specify completely bogus /Flags-entries, fallback to font name matching to determine if the font is a serif one.

											
										
										
											2021-09-23 00:02:06 +09:00
+								          isSimulatedFlags: true,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          flags,
 								          firstChar,
 								          lastChar,
-												Export the "raw" `toUnicode`-data from `PartialEvaluator.preEvaluateFont`

Compared to other data-structures, such as e.g. `Dict`s, we're purposely *not* caching Streams on the `XRef`-instance.[1]
The, somewhat unfortunate, effect of Streams not being cached is that repeatedly getting the *same* Stream-data requires re-parsing/re-initializing of a bunch of data; see `XRef.fetch` and related methods.

For the font-parsing in particular we're currently fetching the `toUnicode`-data, which is very often a Stream, in `PartialEvaluator.preEvaluateFont` and then *again* in `PartialEvaluator.extractDataStructures` soon afterwards.
By instead letting `PartialEvaluator.preEvaluateFont` export the "raw" `toUnicode`-data, we can avoid *some* unnecessary re-parsing/re-initializing when handling fonts.
*Please note:* In this particular case, given that `PartialEvaluator.preEvaluateFont` only accesses the "raw" `toUnicode` data, exporting a Stream should be safe.

---
[1] The reasons for this include:
 - Streams, especially `DecodeStream`-instances, can become *very* large once read. Hence caching them really isn't a good idea simply because of the (potential) memory impact of doing so.

 - Attempting to read from the *same* Stream-instance more than once won't work, unless it's `reset` in between, since using any method such as e.g. `getBytes` always starts at the current data position.

 - Given that parsing, even in the worker-thread, is now fairly asynchronous it's generally impossible to assert that any one Stream-instance isn't being accessed "concurrently" by e.g. different `getOperatorList` calls. Hence `reset`-ing a cached Stream-instance isn't going to work in the general case.

											
										
										
											2021-05-08 05:25:08 +09:00
+								          toUnicode,
-												Add non-PRODUCTION/TESTING overflow `assert`s to various string helper-functions (issue 6759)

											
										
										
											2021-06-27 22:19:02 +09:00
+								          xHeight: 0,
 								          capHeight: 0,
 								          italicAngle: 0,
-												Do the `isType3Font`-check *once*, rather than repeating it, in `PartialEvaluator.translateFont`

*This is a small piece of clean-up that I happened to notice while browsing the code.*

											
										
										
											2021-05-16 01:41:28 +09:00
+								          isType3Font,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        };
 								        const widths = dict.get("Widths");
-												Allow using the standard font data for non-Type1 fonts (issue 13585, PR 12726 follow-up)

Given that we're not imposing any font-type restrictions[1] in the non-/FontDescriptor case, it's not really clear to me why we'd actually need to do that in the general case.
Please note that there's some *expected* movement, all of which should be improvements, in the `fips197.pdf` file with this patch.

---
[1] With the exception of Type3-fonts, of course.

											
										
										
											2021-06-20 18:06:35 +09:00
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								        const standardFontName = getStandardFontName(baseFontName);
 								        let file = null;
 								        if (standardFontName) {
 								          properties.isStandardFont = true;
 								          file = await this.fetchStandardFontData(standardFontName);
 								          properties.isInternalFont = !!file;
 								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        return this.extractDataStructures(dict, dict, properties).then(
 								          newProperties => {
 								            if (widths) {
 								              const glyphWidths = [];
 								              let j = firstChar;
 								              for (let i = 0, ii = widths.length; i < ii; i++) {
 								                glyphWidths[j++] = this.xref.fetchIfRef(widths[i]);
-												Hide .notdef glyphs in non-embedded Type1 fonts and don't ignore Widths

Fixes #11403
The PDF uses the non-embedded Type1 font Helvetica. Character codes 194 and 160 (`Â` and `NBSP`) are encoded as `.notdef`. We shouldn't show those glyphs because it seems that Acrobat Reader doesn't draw glyphs that are named `.notdef` in fonts like this.

In addition to testing `glyphName === ".notdef"`, we must test also `glyphName === ""` because the name `""` is used in `core/encodings.js` for undefined glyphs in encodings like `WinAnsiEncoding`.

The solution above hides the `Â` characters but now the replacement character (space) appears to be too wide. I found out that PDF.js ignores font's `Widths` array if the font has no `FontDescriptor` entry. That happens in #11403, so the default widths of Helvetica were used as specified in `core/metrics.js` and `.nodef` got a width of 333. The correct width is 0 as specified by the `Widths` array in the PDF. Thus we must never ignore `Widths`.

											
										
										
											2020-01-22 03:36:41 +09:00
+								              }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              newProperties.widths = glyphWidths;
 								            } else {
 								              newProperties.widths = this.buildCharCodeToWidth(
 								                metrics.widths,
 								                newProperties
 								              );
-												Enable auto-formatting of the entire code-base using Prettier (issue 11444)

Note that Prettier, purposely, has only limited [configuration options](https://prettier.io/docs/en/options.html). The configuration file is based on [the one in `mozilla central`](https://searchfox.org/mozilla-central/source/.prettierrc) with just a few additions (to avoid future breakage if the defaults ever changes).

Prettier is being used for a couple of reasons:

 - To be consistent with `mozilla-central`, where Prettier is already in use across the tree.

 - To ensure a *consistent* coding style everywhere, which is automatically enforced during linting (since Prettier is used as an ESLint plugin). This thus ends "all" formatting disussions once and for all, removing the need for review comments on most stylistic matters.

Many ESLint options are now redundant, and I've tried my best to remove all the now unnecessary options (but I may have missed some).
Note also that since Prettier considers the `printWidth` option as a guide, rather than a hard rule, this patch resorts to a small hack in the ESLint config to ensure that *comments* won't become too long.

*Please note:* This patch is generated automatically, by appending the `--fix` argument to the ESLint call used in the `gulp lint` task. It will thus require some additional clean-up, which will be done in a *separate* commit.

(On a more personal note, I'll readily admit that some of the changes Prettier makes are *extremely* ugly. However, in the name of consistency we'll probably have to live with that.)

											
										
										
											2019-12-25 23:59:37 +09:00
+								            }
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								            return new Font(baseFontName, file, newProperties);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          }
 								        );
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // According to the spec if 'FontDescriptor' is declared, 'FirstChar',
 								    // 'LastChar' and 'Widths' should exist too, but some PDF encoders seem
 								    // to ignore this rule when a variant of a standard font is used.
 								    // TODO Fill the width array depending on which of the base font this is
 								    // a variant.
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    let fontName = descriptor.get("FontName");
 								    let baseFont = dict.get("BaseFont");
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    // Some bad PDFs have a string as the font name.
-												Remove the `isString` helper function

The call-sites are replaced by direct `typeof`-checks instead, which removes unnecessary function calls. Note that in the `src/`-folder we already had more `typeof`-cases than `isString`-calls.

											
										
										
											2022-02-24 01:02:19 +09:00
+								    if (typeof fontName === "string") {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      fontName = Name.get(fontName);
 								    }
-												Remove the `isString` helper function

The call-sites are replaced by direct `typeof`-checks instead, which removes unnecessary function calls. Note that in the `src/`-folder we already had more `typeof`-cases than `isString`-calls.

											
										
										
											2022-02-24 01:02:19 +09:00
+								    if (typeof baseFont === "string") {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      baseFont = Name.get(baseFont);
 								    }
-												Handle missing 'FontName' entry in FontDescriptor object

											
										
										
											2013-01-12 10:10:09 +09:00
-												Do the `isType3Font`-check *once*, rather than repeating it, in `PartialEvaluator.translateFont`

*This is a small piece of clean-up that I happened to notice while browsing the code.*

											
										
										
											2021-05-16 01:41:28 +09:00
+								    if (!isType3Font) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const fontNameStr = fontName && fontName.name;
 								      const baseFontStr = baseFont && baseFont.name;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (fontNameStr !== baseFontStr) {
 								        info(
-												Fix *some* errors reported by the ESLint `no-useless-escape` rule

This patch removes unnecessary escape-sequence in (mostly) strings, as a first step, since the ones in regular expressions probably requires more careful testing (just in case).
The only exception is a regular expression in `src/core/annotation.js`, since we should have both unit- and reference-tests for this code *and* given [this information on MDN](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes#Types):
 > Inside a character set, the dot loses its special meaning and matches a literal dot.

Please find additional details about the ESLint rule at https://eslint.org/docs/rules/no-useless-escape

											
										
										
											2020-10-29 23:40:40 +09:00
+								          `The FontDescriptor's FontName is "${fontNameStr}" but ` +
 								            `should be the same as the Font's BaseFont "${baseFontStr}".`
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        );
 								        // Workaround for cases where e.g. fontNameStr = 'Arial' and
 								        // baseFontStr = 'Arial,Bold' (needed when no font file is embedded).
 								        if (fontNameStr && baseFontStr && baseFontStr.startsWith(fontNameStr)) {
 								          fontName = baseFont;
-												Skip FontName vs. BaseFont check for Type 3 fonts

Type 3 font dict does not have a BaseFont entry (see PDF Reference 9.6.5 Table 112). This check is actually causing false positives:
http://math.berkeley.edu/~daisuke/Bonn2013.pdf
http://math.berkeley.edu/~daisuke/BVSOL.pdf
http://www.ieice-hbkb.org/files/06/06gun_02hen_04.pdf
http://www.asahi-net.or.jp/~td6i-st/fuku-cathedral/new/gat.pdf

											
										
										
											2013-03-03 21:30:08 +09:00
+								        }
-												Handle missing 'FontName' entry in FontDescriptor object

											
										
										
											2013-01-12 10:10:09 +09:00
+								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
 								    fontName = fontName || baseFont;
-												Handle missing 'FontName' entry in FontDescriptor object

											
										
										
											2013-01-12 10:10:09 +09:00
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								    if (!(fontName instanceof Name)) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      throw new FormatError("invalid font name");
 								    }
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Fix a few *safe* ESLint `no-var` failures in `src/core/evaluator.js` (13371 follow-up)

As can be seen in PR 13371, some of the `no-var` changes in the `PartialEvaluator.{getOperatorList, getTextContent}` methods caused errors in `gulp server`-mode.
However, there's a handful of instances of `var` in other methods which should be completely *safe* to convert since there's no strange scope-issues present in that code.

											
										
										
											2021-05-16 22:15:12 +09:00
+								    let fontFile, subtype, length1, length2, length3;
-												Handle errors gracefully, in `PartialEvaluator.translateFont`, when fetching the font file (issue 9462)

The *third* page of the referenced PDF document currently fails to render completely, since one of its font files fail to load.
Since that error isn't handled, a large part of the text is thus missing which looks quite bad. By "replacing" the font data with an *empty* stream, we'll thus be able to fallback to rendering the text with a standard font (instead of using `ErrorFont`). While there's obviously no guarantee that things will look perfect, actually rendering the text at all should be an improvement in general.

Also, print a warning in `PartialEvaluator.loadFont` when the `PartialEvaluator.translateFont` method rejects, since that'd have helped debug/fix the issue faster.

											
										
										
											2021-02-07 01:48:26 +09:00
+								    try {
 								      fontFile = descriptor.get("FontFile", "FontFile2", "FontFile3");
 								    } catch (ex) {
 								      if (!this.options.ignoreErrors) {
 								        throw ex;
 								      }
 								      warn(`translateFont - fetching "${fontName.name}" font file: "${ex}".`);
 								      fontFile = new NullStream();
 								    }
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								    let isStandardFont = false;
 								    let isInternalFont = false;
-												XFA - Add Liberation-Sans font as a substitution for some missing fonts
  - Some js files contain scale factors for each glyph in order to rescale Liberation to have a final font with the correct width.
  - A lot of XFA have some containers where their dimensions are based on their text content, so using default font from browser can lead to an almost unreadable pdf.

											
										
										
											2021-06-09 03:50:31 +09:00
+								    let glyphScaleFactors = null;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (fontFile) {
 								      if (fontFile.dict) {
-												Fix a few *safe* ESLint `no-var` failures in `src/core/evaluator.js` (13371 follow-up)

As can be seen in PR 13371, some of the `no-var` changes in the `PartialEvaluator.{getOperatorList, getTextContent}` methods caused errors in `gulp server`-mode.
However, there's a handful of instances of `var` in other methods which should be completely *safe* to convert since there's no strange scope-issues present in that code.

											
										
										
											2021-05-16 22:15:12 +09:00
+								        const subtypeEntry = fontFile.dict.get("Subtype");
 								        if (subtypeEntry instanceof Name) {
 								          subtype = subtypeEntry.name;
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
+								        }
-												Fix a few *safe* ESLint `no-var` failures in `src/core/evaluator.js` (13371 follow-up)

As can be seen in PR 13371, some of the `no-var` changes in the `PartialEvaluator.{getOperatorList, getTextContent}` methods caused errors in `gulp server`-mode.
However, there's a handful of instances of `var` in other methods which should be completely *safe* to convert since there's no strange scope-issues present in that code.

											
										
										
											2021-05-16 22:15:12 +09:00
+								        length1 = fontFile.dict.get("Length1");
 								        length2 = fontFile.dict.get("Length2");
 								        length3 = fontFile.dict.get("Length3");
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
+								      }
-												XFA - Add Liberation-Sans font as a substitution for some missing fonts
  - Some js files contain scale factors for each glyph in order to rescale Liberation to have a final font with the correct width.
  - A lot of XFA have some containers where their dimensions are based on their text content, so using default font from browser can lead to an almost unreadable pdf.

											
										
										
											2021-06-09 03:50:31 +09:00
+								    } else if (cssFontInfo) {
 								      // We've a missing XFA font.
 								      const standardFontName = getXfaFontName(fontName.name);
 								      if (standardFontName) {
 								        cssFontInfo.fontFamily = `${cssFontInfo.fontFamily}-PdfJS-XFA`;
-												XFA - Fix auto-sized fields (bug 1722030)
  - In order to better compute text fields size, use line height with no gaps (and consequently guessed height for text are slightly better in general).
  - Fix default background color in fields.

											
										
										
											2021-07-28 01:43:05 +09:00
+								        cssFontInfo.metrics = standardFontName.metrics || null;
-												XFA - Add Liberation-Sans font as a substitution for some missing fonts
  - Some js files contain scale factors for each glyph in order to rescale Liberation to have a final font with the correct width.
  - A lot of XFA have some containers where their dimensions are based on their text content, so using default font from browser can lead to an almost unreadable pdf.

											
										
										
											2021-06-09 03:50:31 +09:00
+								        glyphScaleFactors = standardFontName.factors || null;
 								        fontFile = await this.fetchStandardFontData(standardFontName.name);
 								        isInternalFont = !!fontFile;
-												XFA - Fix font scale factors (bug 1720888)
  - All the scale factors in for the substitution font were wrong because of different glyph positions between Liberation and the other ones:
    - regenerate all the factors
  - Text may have polish chars for example and in this case the glyph widths were wrong:
    - treat substitution font as a composite one
    - add a map glyphIndex to unicode for Liberation in order to generate width array for cid font

											
										
										
											2021-07-29 01:30:22 +09:00
 								        // We're using a substitution font but for example widths (if any)
 								        // are related to the glyph positions in the font.
 								        // So we overwrite everything here to be sure that widths are
 								        // correct.
 								        baseDict = dict = getXfaFontDict(fontName.name);
 								        composite = true;
-												XFA - Add Liberation-Sans font as a substitution for some missing fonts
  - Some js files contain scale factors for each glyph in order to rescale Liberation to have a final font with the correct width.
  - A lot of XFA have some containers where their dimensions are based on their text content, so using default font from browser can lead to an almost unreadable pdf.

											
										
										
											2021-06-09 03:50:31 +09:00
+								      }
-												Allow using the standard font data for non-Type1 fonts (issue 13585, PR 12726 follow-up)

Given that we're not imposing any font-type restrictions[1] in the non-/FontDescriptor case, it's not really clear to me why we'd actually need to do that in the general case.
Please note that there's some *expected* movement, all of which should be improvements, in the `fips197.pdf` file with this patch.

---
[1] With the exception of Type3-fonts, of course.

											
										
										
											2021-06-20 18:06:35 +09:00
+								    } else if (!isType3Font) {
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								      const standardFontName = getStandardFontName(fontName.name);
 								      if (standardFontName) {
 								        isStandardFont = true;
 								        fontFile = await this.fetchStandardFontData(standardFontName);
 								        isInternalFont = !!fontFile;
 								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    properties = {
 								      type,
 								      name: fontName.name,
 								      subtype,
 								      file: fontFile,
 								      length1,
 								      length2,
 								      length3,
-												Include and use the 14 standard fonts files.

											
										
										
											2020-12-11 10:32:18 +09:00
+								      isStandardFont,
 								      isInternalFont,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      loadedName: baseDict.loadedName,
 								      composite,
 								      fixedPitch: false,
 								      fontMatrix: dict.getArray("FontMatrix") || FONT_IDENTITY_MATRIX,
-												Export the `firstChar`/`lastChar`-data from `PartialEvaluator.preEvaluateFont`

Rather than re-fetching/re-parsing these properties immediately in `PartialEvaluator.translateFont`, we can simply export them instead. (Obviously the effect will be really tiny, but there is less parsing overall this way.)

											
										
										
											2021-05-08 05:07:23 +09:00
+								      firstChar,
 								      lastChar,
-												Export the "raw" `toUnicode`-data from `PartialEvaluator.preEvaluateFont`

Compared to other data-structures, such as e.g. `Dict`s, we're purposely *not* caching Streams on the `XRef`-instance.[1]
The, somewhat unfortunate, effect of Streams not being cached is that repeatedly getting the *same* Stream-data requires re-parsing/re-initializing of a bunch of data; see `XRef.fetch` and related methods.

For the font-parsing in particular we're currently fetching the `toUnicode`-data, which is very often a Stream, in `PartialEvaluator.preEvaluateFont` and then *again* in `PartialEvaluator.extractDataStructures` soon afterwards.
By instead letting `PartialEvaluator.preEvaluateFont` export the "raw" `toUnicode`-data, we can avoid *some* unnecessary re-parsing/re-initializing when handling fonts.
*Please note:* In this particular case, given that `PartialEvaluator.preEvaluateFont` only accesses the "raw" `toUnicode` data, exporting a Stream should be safe.

---
[1] The reasons for this include:
 - Streams, especially `DecodeStream`-instances, can become *very* large once read. Hence caching them really isn't a good idea simply because of the (potential) memory impact of doing so.

 - Attempting to read from the *same* Stream-instance more than once won't work, unless it's `reset` in between, since using any method such as e.g. `getBytes` always starts at the current data position.

 - Given that parsing, even in the worker-thread, is now fairly asynchronous it's generally impossible to assert that any one Stream-instance isn't being accessed "concurrently" by e.g. different `getOperatorList` calls. Hence `reset`-ing a cached Stream-instance isn't going to work in the general case.

											
										
										
											2021-05-08 05:25:08 +09:00
+								      toUnicode,
-												Improve text-selection for Type3 fonts with empty /FontBBox-entries (issue 6605)

For Type3 fonts where the /CharProcs-streams of the individual glyph starts with a `d1` operator, we can use that to build a fallback bounding box for the font and thus improve text-selection in some cases.

											
										
										
											2021-05-30 01:06:49 +09:00
+								      bbox: descriptor.getArray("FontBBox") || dict.getArray("FontBBox"),
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      ascent: descriptor.get("Ascent"),
 								      descent: descriptor.get("Descent"),
-												Add non-PRODUCTION/TESTING overflow `assert`s to various string helper-functions (issue 6759)

											
										
										
											2021-06-27 22:19:02 +09:00
+								      xHeight: descriptor.get("XHeight") || 0,
 								      capHeight: descriptor.get("CapHeight") || 0,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      flags: descriptor.get("Flags"),
-												Add non-PRODUCTION/TESTING overflow `assert`s to various string helper-functions (issue 6759)

											
										
										
											2021-06-27 22:19:02 +09:00
+								      italicAngle: descriptor.get("ItalicAngle") || 0,
-												Do the `isType3Font`-check *once*, rather than repeating it, in `PartialEvaluator.translateFont`

*This is a small piece of clean-up that I happened to notice while browsing the code.*

											
										
										
											2021-05-16 01:41:28 +09:00
+								      isType3Font,
-												Export the "raw" `toUnicode`-data from `PartialEvaluator.preEvaluateFont`

Compared to other data-structures, such as e.g. `Dict`s, we're purposely *not* caching Streams on the `XRef`-instance.[1]
The, somewhat unfortunate, effect of Streams not being cached is that repeatedly getting the *same* Stream-data requires re-parsing/re-initializing of a bunch of data; see `XRef.fetch` and related methods.

For the font-parsing in particular we're currently fetching the `toUnicode`-data, which is very often a Stream, in `PartialEvaluator.preEvaluateFont` and then *again* in `PartialEvaluator.extractDataStructures` soon afterwards.
By instead letting `PartialEvaluator.preEvaluateFont` export the "raw" `toUnicode`-data, we can avoid *some* unnecessary re-parsing/re-initializing when handling fonts.
*Please note:* In this particular case, given that `PartialEvaluator.preEvaluateFont` only accesses the "raw" `toUnicode` data, exporting a Stream should be safe.

---
[1] The reasons for this include:
 - Streams, especially `DecodeStream`-instances, can become *very* large once read. Hence caching them really isn't a good idea simply because of the (potential) memory impact of doing so.

 - Attempting to read from the *same* Stream-instance more than once won't work, unless it's `reset` in between, since using any method such as e.g. `getBytes` always starts at the current data position.

 - Given that parsing, even in the worker-thread, is now fairly asynchronous it's generally impossible to assert that any one Stream-instance isn't being accessed "concurrently" by e.g. different `getOperatorList` calls. Hence `reset`-ing a cached Stream-instance isn't going to work in the general case.

											
										
										
											2021-05-08 05:25:08 +09:00
+								      cssFontInfo,
-												XFA - Add Liberation-Sans font as a substitution for some missing fonts
  - Some js files contain scale factors for each glyph in order to rescale Liberation to have a final font with the correct width.
  - A lot of XFA have some containers where their dimensions are based on their text content, so using default font from browser can lead to an almost unreadable pdf.

											
										
										
											2021-06-09 03:50:31 +09:00
+								      scaleFactors: glyphScaleFactors,
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    };
-												Implements vertical writing

											
										
										
											2013-02-08 21:29:22 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (composite) {
-												Convert `PartialEvaluator.translateFont` to an `async` method

This allows us to make a slight simplification in `PartialEvaluator.loadFont`, which thus removes an old TODO-comment from the method.
Furthermore, in `PartialEvaluator.translateFont`, the CMap-handling is now limited to only *composite* fonts to avoid having to wait for a "dummy"-Promise for most fonts.

											
										
										
											2020-10-15 16:30:54 +09:00
+								      const cidEncoding = baseDict.get("Encoding");
-												Prefer `instanceof Name` rather than calling `isName()` with one argument

Unless you actually need to check that something is both a `Name` and also of the *correct* type, using `instanceof Name` directly should be a tiny bit more efficient since it avoids one function call and an unnecessary `undefined` check.

This patch uses ESLint to enforce this, since we obviously still want to keep the `isName` helper function for where it makes sense.

											
										
										
											2022-02-21 20:45:00 +09:00
+								      if (cidEncoding instanceof Name) {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        properties.cidEncoding = cidEncoding.name;
 								      }
-												Convert `PartialEvaluator.translateFont` to an `async` method

This allows us to make a slight simplification in `PartialEvaluator.loadFont`, which thus removes an old TODO-comment from the method.
Furthermore, in `PartialEvaluator.translateFont`, the CMap-handling is now limited to only *composite* fonts to avoid having to wait for a "dummy"-Promise for most fonts.

											
										
										
											2020-10-15 16:30:54 +09:00
+								      const cMap = await CMapFactory.create({
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        encoding: cidEncoding,
 								        fetchBuiltInCMap: this._fetchBuiltInCMapBound,
 								        useCMap: null,
 								      });
-												Convert `PartialEvaluator.translateFont` to an `async` method

This allows us to make a slight simplification in `PartialEvaluator.loadFont`, which thus removes an old TODO-comment from the method.
Furthermore, in `PartialEvaluator.translateFont`, the CMap-handling is now limited to only *composite* fonts to avoid having to wait for a "dummy"-Promise for most fonts.

											
										
										
											2020-10-15 16:30:54 +09:00
+								      properties.cMap = cMap;
 								      properties.vertical = properties.cMap.vertical;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Convert `PartialEvaluator.translateFont` to an `async` method

This allows us to make a slight simplification in `PartialEvaluator.loadFont`, which thus removes an old TODO-comment from the method.
Furthermore, in `PartialEvaluator.translateFont`, the CMap-handling is now limited to only *composite* fonts to avoid having to wait for a "dummy"-Promise for most fonts.

											
										
										
											2020-10-15 16:30:54 +09:00
+								    return this.extractDataStructures(dict, baseDict, properties).then(
 								      newProperties => {
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        this.extractWidths(dict, descriptor, newProperties);
-												Refactors CMapFactory.create to make it async

											
										
										
											2016-02-29 01:20:29 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        return new Font(fontName.name, fontFile, newProperties);
-												Convert `PartialEvaluator.translateFont` to an `async` method

This allows us to make a slight simplification in `PartialEvaluator.loadFont`, which thus removes an old TODO-comment from the method.
Furthermore, in `PartialEvaluator.translateFont`, the CMap-handling is now limited to only *composite* fonts to avoid having to wait for a "dummy"-Promise for most fonts.

											
										
										
											2020-10-15 16:30:54 +09:00
+								      }
 								    );
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  }
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Handle errors gracefully, in `PartialEvaluator.buildFontPaths`, when glyph path building fails

The building of glyph paths, in the `FontRendererFactory`, can fail in various ways for corrupt font data. However, we're currently not attempting to handle any such errors in the evaluator, which means that a single broken glyph *can* prevent an entire page from rendering.

To address this we simply have to pass along, and check, the existing `ignoreErrors` option in `PartialEvaluator.buildFontPaths` similar to the rest of the `PartialEvaluator` code.

											
										
										
											2021-05-16 01:21:18 +09:00
+								  static buildFontPaths(font, glyphs, handler, evaluatorOptions) {
-												Fallback to the built-in font renderer when font loading fails

After PR 9340 all glyphs are now re-mapped to a Private Use Area (PUA) which means that if a font fails to load, for whatever reason[1], all glyphs in the font will now render as Unicode glyph outlines.
This obviously doesn't look good, to say the least, and might be seen as a "regression" since previously many glyphs were left in their original positions which provided a slightly better fallback[2].

Hence this patch, which implements a *general* fallback to the PDF.js built-in font renderer for fonts that fail to load (i.e. are rejected by the sanitizer). One caveat here is that this only works for the Font Loading API, since it's easy to handle errors in that case[3].

The solution implemented in this patch does *not* in any way delay the loading of valid fonts, which was the problem with my previous attempt at a solution, and will only require a bit of extra work/waiting for those fonts that actually fail to load.

*Please note:* This patch doesn't fix any of the underlying PDF.js font conversion bugs that's responsible for creating corrupt font files, however it does *improve* rendering in a number of cases; refer to this possibly incomplete list:

[Bug 1524888](https://bugzilla.mozilla.org/show_bug.cgi?id=1524888)
Issue 10175
Issue 10232

---
[1] Usually because the PDF.js font conversion code wasn't able to parse the font file correctly.

[2] Glyphs fell back to some default font, which while not accurate was more useful than the current state.

[3] Furthermore I'm not sure how to implement this generally, assuming that's even possible, and don't really have time/interest to look into it either.

											
										
										
											2019-02-11 08:47:56 +09:00
+								    function buildPath(fontChar) {
-												Handle errors gracefully, in `PartialEvaluator.buildFontPaths`, when glyph path building fails

The building of glyph paths, in the `FontRendererFactory`, can fail in various ways for corrupt font data. However, we're currently not attempting to handle any such errors in the evaluator, which means that a single broken glyph *can* prevent an entire page from rendering.

To address this we simply have to pass along, and check, the existing `ignoreErrors` option in `PartialEvaluator.buildFontPaths` similar to the rest of the `PartialEvaluator` code.

											
										
										
											2021-05-16 01:21:18 +09:00
+								      const glyphName = `${font.loadedName}_path_${fontChar}`;
 								      try {
 								        if (font.renderer.hasBuiltPath(fontChar)) {
 								          return;
 								        }
 								        handler.send("commonobj", [
 								          glyphName,
 								          "FontPath",
 								          font.renderer.getPathJs(fontChar),
 								        ]);
 								      } catch (reason) {
 								        if (evaluatorOptions.ignoreErrors) {
 								          // Error in the font data -- sending unsupported feature notification
 								          // and allow glyph path building to continue.
 								          handler.send("UnsupportedFeature", {
 								            featureId: UNSUPPORTED_FEATURES.errorFontBuildPath,
 								          });
 								          warn(`buildFontPaths - ignoring ${glyphName} glyph: "${reason}".`);
 								          return;
 								        }
 								        throw reason;
-												Fallback to the built-in font renderer when font loading fails

After PR 9340 all glyphs are now re-mapped to a Private Use Area (PUA) which means that if a font fails to load, for whatever reason[1], all glyphs in the font will now render as Unicode glyph outlines.
This obviously doesn't look good, to say the least, and might be seen as a "regression" since previously many glyphs were left in their original positions which provided a slightly better fallback[2].

Hence this patch, which implements a *general* fallback to the PDF.js built-in font renderer for fonts that fail to load (i.e. are rejected by the sanitizer). One caveat here is that this only works for the Font Loading API, since it's easy to handle errors in that case[3].

The solution implemented in this patch does *not* in any way delay the loading of valid fonts, which was the problem with my previous attempt at a solution, and will only require a bit of extra work/waiting for those fonts that actually fail to load.

*Please note:* This patch doesn't fix any of the underlying PDF.js font conversion bugs that's responsible for creating corrupt font files, however it does *improve* rendering in a number of cases; refer to this possibly incomplete list:

[Bug 1524888](https://bugzilla.mozilla.org/show_bug.cgi?id=1524888)
Issue 10175
Issue 10232

---
[1] Usually because the PDF.js font conversion code wasn't able to parse the font file correctly.

[2] Glyphs fell back to some default font, which while not accurate was more useful than the current state.

[3] Furthermore I'm not sure how to implement this generally, assuming that's even possible, and don't really have time/interest to look into it either.

											
										
										
											2019-02-11 08:47:56 +09:00
+								      }
 								    }
 								    for (const glyph of glyphs) {
 								      buildPath(glyph.fontChar);
 								      // If the glyph has an accent we need to build a path for its
 								      // fontChar too, otherwise CanvasGraphics_paintChar will fail.
 								      const accent = glyph.accent;
 								      if (accent && accent.fontChar) {
 								        buildPath(accent.fontChar);
 								      }
 								    }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  }
-												Fallback to the built-in font renderer when font loading fails

After PR 9340 all glyphs are now re-mapped to a Private Use Area (PUA) which means that if a font fails to load, for whatever reason[1], all glyphs in the font will now render as Unicode glyph outlines.
This obviously doesn't look good, to say the least, and might be seen as a "regression" since previously many glyphs were left in their original positions which provided a slightly better fallback[2].

Hence this patch, which implements a *general* fallback to the PDF.js built-in font renderer for fonts that fail to load (i.e. are rejected by the sanitizer). One caveat here is that this only works for the Font Loading API, since it's easy to handle errors in that case[3].

The solution implemented in this patch does *not* in any way delay the loading of valid fonts, which was the problem with my previous attempt at a solution, and will only require a bit of extra work/waiting for those fonts that actually fail to load.

*Please note:* This patch doesn't fix any of the underlying PDF.js font conversion bugs that's responsible for creating corrupt font files, however it does *improve* rendering in a number of cases; refer to this possibly incomplete list:

[Bug 1524888](https://bugzilla.mozilla.org/show_bug.cgi?id=1524888)
Issue 10175
Issue 10232

---
[1] Usually because the PDF.js font conversion code wasn't able to parse the font file correctly.

[2] Glyphs fell back to some default font, which while not accurate was more useful than the current state.

[3] Furthermore I'm not sure how to implement this generally, assuming that's even possible, and don't really have time/interest to look into it either.

											
										
										
											2019-02-11 08:47:56 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  static get fallbackFontDict() {
-												Cache the fallback font dictionary on the `PartialEvaluator` (PR 11218 follow-up)

This way we'll benefit from the existing font caching, and can thus avoid re-creating a fallback font over and over again during parsing.
(Thece changes necessitated the previous patch, since otherwise breakage could occur e.g. with fake workers.)

											
										
										
											2020-01-16 23:08:25 +09:00
+								    const dict = new Dict();
 								    dict.set("BaseFont", Name.get("PDFJS-FallbackFont"));
 								    dict.set("Type", Name.get("FallbackType"));
 								    dict.set("Subtype", Name.get("FallbackType"));
 								    dict.set("Encoding", Name.get("WinAnsiEncoding"));
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    return shadow(this, "fallbackFontDict", dict);
 								  }
 								}
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								class TranslatedFont {
-												Handle errors gracefully, in `PartialEvaluator.buildFontPaths`, when glyph path building fails

The building of glyph paths, in the `FontRendererFactory`, can fail in various ways for corrupt font data. However, we're currently not attempting to handle any such errors in the evaluator, which means that a single broken glyph *can* prevent an entire page from rendering.

To address this we simply have to pass along, and check, the existing `ignoreErrors` option in `PartialEvaluator.buildFontPaths` similar to the rest of the `PartialEvaluator` code.

											
										
										
											2021-05-16 01:21:18 +09:00
+								  constructor({ loadedName, font, dict, evaluatorOptions }) {
-												Refactors loadFont for translateFont be async; fixes type3 dup data

											
										
										
											2014-05-20 06:27:54 +09:00
+								    this.loadedName = loadedName;
 								    this.font = font;
 								    this.dict = dict;
-												Handle errors gracefully, in `PartialEvaluator.buildFontPaths`, when glyph path building fails

The building of glyph paths, in the `FontRendererFactory`, can fail in various ways for corrupt font data. However, we're currently not attempting to handle any such errors in the evaluator, which means that a single broken glyph *can* prevent an entire page from rendering.

To address this we simply have to pass along, and check, the existing `ignoreErrors` option in `PartialEvaluator.buildFontPaths` similar to the rest of the `PartialEvaluator` code.

											
										
										
											2021-05-16 01:21:18 +09:00
+								    this._evaluatorOptions = evaluatorOptions || DefaultPartialEvaluatorOptions;
-												Refactors loadFont for translateFont be async; fixes type3 dup data

											
										
										
											2014-05-20 06:27:54 +09:00
+								    this.type3Loaded = null;
-												Improve how Type3-fonts with dependencies are handled

While the `CharProcs` streams of Type3-fonts *usually* don't rely on dependencies, such as e.g. images, it does happen in some cases.

Currently any dependencies are simply appended to the parent operatorList, which in practice means *only* the operatorList of the *first* page where the Type3-font is being used.
However, there's one thing that's slightly unfortunate with that approach: Since fonts are global to the PDF document, we really ought to ensure that any Type3 dependencies are appended to the operatorList of *all* pages where the Type3-font is being used. Otherwise there's a theoretical risk that, if one page has its rendering paused, another page may try to use a Type3-font whose dependencies are not yet fully resolved. In that case there would be errors, since Type3 operatorLists are executed synchronously.

Hence this patch, which ensures that all relevant pages will have Type3 dependencies appended to the main operatorList. (Note here that the `OperatorList.addDependencies` method, via `OperatorList.addDependency`, ensures that a dependency is only added *once* to any operatorList.)

Finally, these changes also remove the need for the "waiting for the main-thread"-hack that was added to `PartialEvaluator.buildPaintImageXObject` as part of fixing issue 10717.

											
										
										
											2020-07-26 19:23:28 +09:00
+								    this.type3Dependencies = font.isType3Font ? new Set() : null;
-												Refactors loadFont for translateFont be async; fixes type3 dup data

											
										
										
											2014-05-20 06:27:54 +09:00
+								    this.sent = false;
 								  }
-												Fallback to the built-in font renderer when font loading fails

After PR 9340 all glyphs are now re-mapped to a Private Use Area (PUA) which means that if a font fails to load, for whatever reason[1], all glyphs in the font will now render as Unicode glyph outlines.
This obviously doesn't look good, to say the least, and might be seen as a "regression" since previously many glyphs were left in their original positions which provided a slightly better fallback[2].

Hence this patch, which implements a *general* fallback to the PDF.js built-in font renderer for fonts that fail to load (i.e. are rejected by the sanitizer). One caveat here is that this only works for the Font Loading API, since it's easy to handle errors in that case[3].

The solution implemented in this patch does *not* in any way delay the loading of valid fonts, which was the problem with my previous attempt at a solution, and will only require a bit of extra work/waiting for those fonts that actually fail to load.

*Please note:* This patch doesn't fix any of the underlying PDF.js font conversion bugs that's responsible for creating corrupt font files, however it does *improve* rendering in a number of cases; refer to this possibly incomplete list:

[Bug 1524888](https://bugzilla.mozilla.org/show_bug.cgi?id=1524888)
Issue 10175
Issue 10232

---
[1] Usually because the PDF.js font conversion code wasn't able to parse the font file correctly.

[2] Glyphs fell back to some default font, which while not accurate was more useful than the current state.

[3] Furthermore I'm not sure how to implement this generally, assuming that's even possible, and don't really have time/interest to look into it either.

											
										
										
											2019-02-11 08:47:56 +09:00
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								  send(handler) {
 								    if (this.sent) {
 								      return;
 								    }
 								    this.sent = true;
-												Fallback to the built-in font renderer when font loading fails

After PR 9340 all glyphs are now re-mapped to a Private Use Area (PUA) which means that if a font fails to load, for whatever reason[1], all glyphs in the font will now render as Unicode glyph outlines.
This obviously doesn't look good, to say the least, and might be seen as a "regression" since previously many glyphs were left in their original positions which provided a slightly better fallback[2].

Hence this patch, which implements a *general* fallback to the PDF.js built-in font renderer for fonts that fail to load (i.e. are rejected by the sanitizer). One caveat here is that this only works for the Font Loading API, since it's easy to handle errors in that case[3].

The solution implemented in this patch does *not* in any way delay the loading of valid fonts, which was the problem with my previous attempt at a solution, and will only require a bit of extra work/waiting for those fonts that actually fail to load.

*Please note:* This patch doesn't fix any of the underlying PDF.js font conversion bugs that's responsible for creating corrupt font files, however it does *improve* rendering in a number of cases; refer to this possibly incomplete list:

[Bug 1524888](https://bugzilla.mozilla.org/show_bug.cgi?id=1524888)
Issue 10175
Issue 10232

---
[1] Usually because the PDF.js font conversion code wasn't able to parse the font file correctly.

[2] Glyphs fell back to some default font, which while not accurate was more useful than the current state.

[3] Furthermore I'm not sure how to implement this generally, assuming that's even possible, and don't really have time/interest to look into it either.

											
										
										
											2019-02-11 08:47:56 +09:00
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								    handler.send("commonobj", [
 								      this.loadedName,
 								      "Font",
-												Handle errors gracefully, in `PartialEvaluator.buildFontPaths`, when glyph path building fails

The building of glyph paths, in the `FontRendererFactory`, can fail in various ways for corrupt font data. However, we're currently not attempting to handle any such errors in the evaluator, which means that a single broken glyph *can* prevent an entire page from rendering.

To address this we simply have to pass along, and check, the existing `ignoreErrors` option in `PartialEvaluator.buildFontPaths` similar to the rest of the `PartialEvaluator` code.

											
										
										
											2021-05-16 01:21:18 +09:00
+								      this.font.exportData(this._evaluatorOptions.fontExtraProperties),
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								    ]);
 								  }
-												Fallback to the built-in font renderer when font loading fails

After PR 9340 all glyphs are now re-mapped to a Private Use Area (PUA) which means that if a font fails to load, for whatever reason[1], all glyphs in the font will now render as Unicode glyph outlines.
This obviously doesn't look good, to say the least, and might be seen as a "regression" since previously many glyphs were left in their original positions which provided a slightly better fallback[2].

Hence this patch, which implements a *general* fallback to the PDF.js built-in font renderer for fonts that fail to load (i.e. are rejected by the sanitizer). One caveat here is that this only works for the Font Loading API, since it's easy to handle errors in that case[3].

The solution implemented in this patch does *not* in any way delay the loading of valid fonts, which was the problem with my previous attempt at a solution, and will only require a bit of extra work/waiting for those fonts that actually fail to load.

*Please note:* This patch doesn't fix any of the underlying PDF.js font conversion bugs that's responsible for creating corrupt font files, however it does *improve* rendering in a number of cases; refer to this possibly incomplete list:

[Bug 1524888](https://bugzilla.mozilla.org/show_bug.cgi?id=1524888)
Issue 10175
Issue 10232

---
[1] Usually because the PDF.js font conversion code wasn't able to parse the font file correctly.

[2] Glyphs fell back to some default font, which while not accurate was more useful than the current state.

[3] Furthermore I'm not sure how to implement this generally, assuming that's even possible, and don't really have time/interest to look into it either.

											
										
										
											2019-02-11 08:47:56 +09:00
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								  fallback(handler) {
 								    if (!this.font.data) {
 								      return;
 								    }
 								    // When font loading failed, fall back to the built-in font renderer.
 								    this.font.disableFontFace = true;
 								    // An arbitrary number of text rendering operators could have been
 								    // encountered between the point in time when the 'Font' message was sent
 								    // to the main-thread, and the point in time when the 'FontFallback'
 								    // message was received on the worker-thread.
 								    // To ensure that all 'FontPath's are available on the main-thread, when
 								    // font loading failed, attempt to resend *all* previously parsed glyphs.
-												Handle errors gracefully, in `PartialEvaluator.buildFontPaths`, when glyph path building fails

The building of glyph paths, in the `FontRendererFactory`, can fail in various ways for corrupt font data. However, we're currently not attempting to handle any such errors in the evaluator, which means that a single broken glyph *can* prevent an entire page from rendering.

To address this we simply have to pass along, and check, the existing `ignoreErrors` option in `PartialEvaluator.buildFontPaths` similar to the rest of the `PartialEvaluator` code.

											
										
										
											2021-05-16 01:21:18 +09:00
+								    PartialEvaluator.buildFontPaths(
 								      this.font,
 								      /* glyphs = */ this.font.glyphCacheValues,
 								      handler,
 								      this._evaluatorOptions
 								    );
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								  }
-												Refactors loadFont for translateFont be async; fixes type3 dup data

											
										
										
											2014-05-20 06:27:54 +09:00
-												Improve how Type3-fonts with dependencies are handled

While the `CharProcs` streams of Type3-fonts *usually* don't rely on dependencies, such as e.g. images, it does happen in some cases.

Currently any dependencies are simply appended to the parent operatorList, which in practice means *only* the operatorList of the *first* page where the Type3-font is being used.
However, there's one thing that's slightly unfortunate with that approach: Since fonts are global to the PDF document, we really ought to ensure that any Type3 dependencies are appended to the operatorList of *all* pages where the Type3-font is being used. Otherwise there's a theoretical risk that, if one page has its rendering paused, another page may try to use a Type3-font whose dependencies are not yet fully resolved. In that case there would be errors, since Type3 operatorLists are executed synchronously.

Hence this patch, which ensures that all relevant pages will have Type3 dependencies appended to the main operatorList. (Note here that the `OperatorList.addDependencies` method, via `OperatorList.addDependency`, ensures that a dependency is only added *once* to any operatorList.)

Finally, these changes also remove the need for the "waiting for the main-thread"-hack that was added to `PartialEvaluator.buildPaintImageXObject` as part of fixing issue 10717.

											
										
										
											2020-07-26 19:23:28 +09:00
+								  loadType3Data(evaluator, resources, task) {
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								    if (this.type3Loaded) {
-												Refactors loadFont for translateFont be async; fixes type3 dup data

											
										
										
											2014-05-20 06:27:54 +09:00
+								      return this.type3Loaded;
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								    }
-												Only check `isType3Font` the first time that `TranslatedFont.loadType3Data` is called

If the `TranslatedFont.type3Loaded` property exists, then you already know that the font must be a Type3 one.

											
										
										
											2020-07-27 20:00:24 +09:00
+								    if (!this.font.isType3Font) {
 								      throw new Error("Must be a Type3 font.");
 								    }
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								    // When parsing Type3 glyphs, always ignore them if there are errors.
 								    // Compared to the parsing of e.g. an entire page, it doesn't really
 								    // make sense to only be able to render a Type3 glyph partially.
-												Ensure that the old/new `options` are correctly combined in `PartialEvaluator.clone`

											
										
										
											2021-05-31 19:13:20 +09:00
+								    const type3Evaluator = evaluator.clone({ ignoreErrors: false });
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								    type3Evaluator.parsingType3Font = true;
-												Prevent circular references in Type3 fonts

In corrupt PDF documents Type3 fonts may introduce circular dependencies, thus resulting in the affected font(s) never loading and parsing/rendering never completing.
Note that I've not seen any real-world examples of this kind of font corruption, but the attached PDF document was rather found in https://github.com/pdf-association/safedocs/tree/main/Miscellaneous%20Targeted%20Test%20PDFs

*Please note:* That repository contains a number of reduced test-cases that are specifically intended to test interoperability (between PDF viewer) and parsing/rendering for various kinds of strange/corrupt PDF documents.
Some of the test-cases found there may thus not make sense to try and "fix" upfront, in my opinion, unless the problems are also found in real-world PDF documents.

											
										
										
											2022-01-14 01:36:36 +09:00
+								    // Prevent circular references in Type3 fonts.
 								    const type3FontRefs = new RefSet(evaluator.type3FontRefs);
 								    if (this.dict.objId && !type3FontRefs.has(this.dict.objId)) {
 								      type3FontRefs.put(this.dict.objId);
 								    }
 								    type3Evaluator.type3FontRefs = type3FontRefs;
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
-												Improve how Type3-fonts with dependencies are handled

While the `CharProcs` streams of Type3-fonts *usually* don't rely on dependencies, such as e.g. images, it does happen in some cases.

Currently any dependencies are simply appended to the parent operatorList, which in practice means *only* the operatorList of the *first* page where the Type3-font is being used.
However, there's one thing that's slightly unfortunate with that approach: Since fonts are global to the PDF document, we really ought to ensure that any Type3 dependencies are appended to the operatorList of *all* pages where the Type3-font is being used. Otherwise there's a theoretical risk that, if one page has its rendering paused, another page may try to use a Type3-font whose dependencies are not yet fully resolved. In that case there would be errors, since Type3 operatorLists are executed synchronously.

Hence this patch, which ensures that all relevant pages will have Type3 dependencies appended to the main operatorList. (Note here that the `OperatorList.addDependencies` method, via `OperatorList.addDependency`, ensures that a dependency is only added *once* to any operatorList.)

Finally, these changes also remove the need for the "waiting for the main-thread"-hack that was added to `PartialEvaluator.buildPaintImageXObject` as part of fixing issue 10717.

											
										
										
											2020-07-26 19:23:28 +09:00
+								    const translatedFont = this.font,
 								      type3Dependencies = this.type3Dependencies;
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    let loadCharProcsPromise = Promise.resolve();
 								    const charProcs = this.dict.get("CharProcs");
 								    const fontResources = this.dict.get("Resources") || resources;
 								    const charProcOperatorList = Object.create(null);
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
-												Improve text-selection for Type3 fonts with empty /FontBBox-entries (issue 6605)

For Type3 fonts where the /CharProcs-streams of the individual glyph starts with a `d1` operator, we can use that to build a fallback bounding box for the font and thus improve text-selection in some cases.

											
										
										
											2021-05-30 01:06:49 +09:00
+								    const isEmptyBBox =
 								      !translatedFont.bbox || isArrayEqual(translatedFont.bbox, [0, 0, 0, 0]);
-												Improve how Type3-fonts with dependencies are handled

While the `CharProcs` streams of Type3-fonts *usually* don't rely on dependencies, such as e.g. images, it does happen in some cases.

Currently any dependencies are simply appended to the parent operatorList, which in practice means *only* the operatorList of the *first* page where the Type3-font is being used.
However, there's one thing that's slightly unfortunate with that approach: Since fonts are global to the PDF document, we really ought to ensure that any Type3 dependencies are appended to the operatorList of *all* pages where the Type3-font is being used. Otherwise there's a theoretical risk that, if one page has its rendering paused, another page may try to use a Type3-font whose dependencies are not yet fully resolved. In that case there would be errors, since Type3 operatorLists are executed synchronously.

Hence this patch, which ensures that all relevant pages will have Type3 dependencies appended to the main operatorList. (Note here that the `OperatorList.addDependencies` method, via `OperatorList.addDependency`, ensures that a dependency is only added *once* to any operatorList.)

Finally, these changes also remove the need for the "waiting for the main-thread"-hack that was added to `PartialEvaluator.buildPaintImageXObject` as part of fixing issue 10717.

											
										
										
											2020-07-26 19:23:28 +09:00
+								    for (const key of charProcs.getKeys()) {
-												Ignore color-operators in Type3 glyphs beginning with a `d1` operator (issue 12705)

Please refer to the PDF specification at https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G8.1977497 and https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G7.3998470

This patch removes the color-operators in the evaluator, since that should be more efficient than doing it repeatedly in the main-thread when rendering the Type3 glyphs.

											
										
										
											2020-12-10 22:22:05 +09:00
+								      loadCharProcsPromise = loadCharProcsPromise.then(() => {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        const glyphStream = charProcs.get(key);
 								        const operatorList = new OperatorList();
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								        return type3Evaluator
 								          .getOperatorList({
 								            stream: glyphStream,
 								            task,
 								            resources: fontResources,
 								            operatorList,
 								          })
-												Ignore color-operators in Type3 glyphs beginning with a `d1` operator (issue 12705)

Please refer to the PDF specification at https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G8.1977497 and https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G7.3998470

This patch removes the color-operators in the evaluator, since that should be more efficient than doing it repeatedly in the main-thread when rendering the Type3 glyphs.

											
										
										
											2020-12-10 22:22:05 +09:00
+								          .then(() => {
 								            // According to the PDF specification, section "9.6.5 Type 3 Fonts"
 								            // and "Table 113":
 								            //  "A glyph description that begins with the d1 operator should
 								            //   not execute any operators that set the colour (or other
 								            //   colour-related parameters) in the graphics state;
 								            //   any use of such operators shall be ignored."
 								            if (operatorList.fnArray[0] === OPS.setCharWidthAndBounds) {
-												Improve text-selection for Type3 fonts with empty /FontBBox-entries (issue 6605)

For Type3 fonts where the /CharProcs-streams of the individual glyph starts with a `d1` operator, we can use that to build a fallback bounding box for the font and thus improve text-selection in some cases.

											
										
										
											2021-05-30 01:06:49 +09:00
+								              this._removeType3ColorOperators(operatorList, isEmptyBBox);
-												Ignore color-operators in Type3 glyphs beginning with a `d1` operator (issue 12705)

Please refer to the PDF specification at https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G8.1977497 and https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G7.3998470

This patch removes the color-operators in the evaluator, since that should be more efficient than doing it repeatedly in the main-thread when rendering the Type3 glyphs.

											
										
										
											2020-12-10 22:22:05 +09:00
+								            }
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								            charProcOperatorList[key] = operatorList.getIR();
-												Improve how Type3-fonts with dependencies are handled

While the `CharProcs` streams of Type3-fonts *usually* don't rely on dependencies, such as e.g. images, it does happen in some cases.

Currently any dependencies are simply appended to the parent operatorList, which in practice means *only* the operatorList of the *first* page where the Type3-font is being used.
However, there's one thing that's slightly unfortunate with that approach: Since fonts are global to the PDF document, we really ought to ensure that any Type3 dependencies are appended to the operatorList of *all* pages where the Type3-font is being used. Otherwise there's a theoretical risk that, if one page has its rendering paused, another page may try to use a Type3-font whose dependencies are not yet fully resolved. In that case there would be errors, since Type3 operatorLists are executed synchronously.

Hence this patch, which ensures that all relevant pages will have Type3 dependencies appended to the main operatorList. (Note here that the `OperatorList.addDependencies` method, via `OperatorList.addDependency`, ensures that a dependency is only added *once* to any operatorList.)

Finally, these changes also remove the need for the "waiting for the main-thread"-hack that was added to `PartialEvaluator.buildPaintImageXObject` as part of fixing issue 10717.

											
										
										
											2020-07-26 19:23:28 +09:00
+								            for (const dependency of operatorList.dependencies) {
 								              type3Dependencies.add(dependency);
 								            }
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								          })
-												Update Prettier to version 2.0

Please note that these changes were done automatically, using `gulp lint --fix`.

Given that the major version number was increased, there's a fair number of (primarily whitespace) changes; please see https://prettier.io/blog/2020/03/21/2.0.0.html
In order to reduce the size of these changes somewhat, this patch maintains the old "arrowParens" style for now (once mozilla-central updates Prettier we can simply choose the same formatting, assuming it will differ here).

											
										
										
											2020-04-14 19:28:14 +09:00
+								          .catch(function (reason) {
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								            warn(`Type3 font resource "${key}" is not available.`);
 								            const dummyOperatorList = new OperatorList();
 								            charProcOperatorList[key] = dummyOperatorList.getIR();
 								          });
 								      });
 								    }
-												Improve text-selection for Type3 fonts with empty /FontBBox-entries (issue 6605)

For Type3 fonts where the /CharProcs-streams of the individual glyph starts with a `d1` operator, we can use that to build a fallback bounding box for the font and thus improve text-selection in some cases.

											
										
										
											2021-05-30 01:06:49 +09:00
+								    this.type3Loaded = loadCharProcsPromise.then(() => {
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								      translatedFont.charProcOperatorList = charProcOperatorList;
-												Improve text-selection for Type3 fonts with empty /FontBBox-entries (issue 6605)

For Type3 fonts where the /CharProcs-streams of the individual glyph starts with a `d1` operator, we can use that to build a fallback bounding box for the font and thus improve text-selection in some cases.

											
										
										
											2021-05-30 01:06:49 +09:00
+								      if (this._bbox) {
 								        translatedFont.isCharBBox = true;
 								        translatedFont.bbox = this._bbox;
 								      }
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								    });
 								    return this.type3Loaded;
 								  }
-												Ignore color-operators in Type3 glyphs beginning with a `d1` operator (issue 12705)

Please refer to the PDF specification at https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G8.1977497 and https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G7.3998470

This patch removes the color-operators in the evaluator, since that should be more efficient than doing it repeatedly in the main-thread when rendering the Type3 glyphs.

											
										
										
											2020-12-10 22:22:05 +09:00
 								  /**
 								   * @private
 								   */
-												Improve text-selection for Type3 fonts with empty /FontBBox-entries (issue 6605)

For Type3 fonts where the /CharProcs-streams of the individual glyph starts with a `d1` operator, we can use that to build a fallback bounding box for the font and thus improve text-selection in some cases.

											
										
										
											2021-05-30 01:06:49 +09:00
+								  _removeType3ColorOperators(operatorList, isEmptyBBox = false) {
-												Ignore color-operators in Type3 glyphs beginning with a `d1` operator (issue 12705)

Please refer to the PDF specification at https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G8.1977497 and https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G7.3998470

This patch removes the color-operators in the evaluator, since that should be more efficient than doing it repeatedly in the main-thread when rendering the Type3 glyphs.

											
										
										
											2020-12-10 22:22:05 +09:00
+								    if (
 								      typeof PDFJSDev === "undefined" ||
 								      PDFJSDev.test("!PRODUCTION || TESTING")
 								    ) {
 								      assert(
 								        operatorList.fnArray[0] === OPS.setCharWidthAndBounds,
 								        "Type3 glyph shall start with the d1 operator."
 								      );
 								    }
-												Improve text-selection for Type3 fonts with empty /FontBBox-entries (issue 6605)

For Type3 fonts where the /CharProcs-streams of the individual glyph starts with a `d1` operator, we can use that to build a fallback bounding box for the font and thus improve text-selection in some cases.

											
										
										
											2021-05-30 01:06:49 +09:00
+								    if (isEmptyBBox) {
 								      if (!this._bbox) {
 								        this._bbox = [Infinity, Infinity, -Infinity, -Infinity];
 								      }
 								      const charBBox = Util.normalizeRect(operatorList.argsArray[0].slice(2));
 								      this._bbox[0] = Math.min(this._bbox[0], charBBox[0]);
 								      this._bbox[1] = Math.min(this._bbox[1], charBBox[1]);
 								      this._bbox[2] = Math.max(this._bbox[2], charBBox[2]);
 								      this._bbox[3] = Math.max(this._bbox[3], charBBox[3]);
 								    }
-												Ignore color-operators in Type3 glyphs beginning with a `d1` operator (issue 12705)

Please refer to the PDF specification at https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G8.1977497 and https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G7.3998470

This patch removes the color-operators in the evaluator, since that should be more efficient than doing it repeatedly in the main-thread when rendering the Type3 glyphs.

											
										
										
											2020-12-10 22:22:05 +09:00
+								    let i = 1,
 								      ii = operatorList.length;
 								    while (i < ii) {
 								      switch (operatorList.fnArray[i]) {
 								        case OPS.setStrokeColorSpace:
 								        case OPS.setFillColorSpace:
 								        case OPS.setStrokeColor:
 								        case OPS.setStrokeColorN:
 								        case OPS.setFillColor:
 								        case OPS.setFillColorN:
 								        case OPS.setStrokeGray:
 								        case OPS.setFillGray:
 								        case OPS.setStrokeRGBColor:
 								        case OPS.setFillRGBColor:
 								        case OPS.setStrokeCMYKColor:
 								        case OPS.setFillCMYKColor:
 								        case OPS.shadingFill:
 								        case OPS.setRenderingIntent:
 								          operatorList.fnArray.splice(i, 1);
 								          operatorList.argsArray.splice(i, 1);
 								          ii--;
 								          continue;
 								        case OPS.setGState:
-												Fix the `gStateObj` lookup in `TranslatedFont._removeType3ColorOperators` (PR 12718 follow-up)

As can be seen in https://github.com/mozilla/pdf.js/blob/2cba29036180b420778bd2c91d21d71bd207c146/src/core/evaluator.js#L986 the `gStateObj` (which is actually an Array despite its name), is wrapped in Array when it's inserted into the OperatorList. Hence we obviously need to take this into account when accessing it in `TranslatedFont._removeType3ColorOperators`; this mistake happened because we don't have any test-cases for this particular code-path as far as I know.

											
										
										
											2021-01-22 20:27:38 +09:00
+								          const [gStateObj] = operatorList.argsArray[i];
-												Ignore color-operators in Type3 glyphs beginning with a `d1` operator (issue 12705)

Please refer to the PDF specification at https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G8.1977497 and https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf#G7.3998470

This patch removes the color-operators in the evaluator, since that should be more efficient than doing it repeatedly in the main-thread when rendering the Type3 glyphs.

											
										
										
											2020-12-10 22:22:05 +09:00
+								          let j = 0,
 								            jj = gStateObj.length;
 								          while (j < jj) {
 								            const [gStateKey] = gStateObj[j];
 								            switch (gStateKey) {
 								              case "TR":
 								              case "TR2":
 								              case "HT":
 								              case "BG":
 								              case "BG2":
 								              case "UCR":
 								              case "UCR2":
 								                gStateObj.splice(j, 1);
 								                jj--;
 								                continue;
 								            }
 								            j++;
 								          }
 								          break;
 								      }
 								      i++;
 								    }
 								  }
-												Change the signature of `TranslatedFont`, and convert it to a proper class

In preparation for the next patch, this changes the signature of `TranslatedFont` to take an object rather than individual parameters. This also, in my opinion, makes the call-sites easier to read since it essentially provides a small bit of documentation of the arguments.

Finally, since it was necessary to touch `TranslatedFont` anyway it seemed like a good idea to also convert it to a proper `class`.

											
										
										
											2020-04-03 17:19:02 +09:00
+								}
-												Refactors loadFont for translateFont be async; fixes type3 dup data

											
										
										
											2014-05-20 06:27:54 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								class StateManager {
-												Add a parser to get font data from the default appearance (#12831)

* Add a parser to get font data from the default appearance
 - pdfium & poppler use a special parser too to get these info.

* Update src/core/default_appearance.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-01-22 04:15:31 +09:00
+								  constructor(initialState = new EvalState()) {
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
+								    this.state = initialState;
 								    this.stateStack = [];
 								  }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
 								  save() {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const old = this.state;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    this.stateStack.push(this.state);
 								    this.state = old.clone();
 								  }
 								  restore() {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const prev = this.stateStack.pop();
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    if (prev) {
 								      this.state = prev;
 								    }
 								  }
 								  transform(args) {
 								    this.state.ctm = Util.transform(this.state.ctm, args);
 								  }
 								}
 								class TextState {
 								  constructor() {
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
+								    this.ctm = new Float32Array(IDENTITY_MATRIX);
-												Attempt to ignore multiple identical Tf (setFont) commands in `PartialEvaluator_getTextContent` (issue 5808)

This patch improves the performance of issue 5808, but I'm not sure if it's enough to call it fixed. On average, this patch reduces the number of textLayer div's by a factor of 3, and it also reduces the time spend in `getTextContent` by a factor of ~2.

The PDF file is generated by `Scribus PDF`, which for reasons I cannot understand is placing redundant `Tf` commands before *every* showText command.
Note how the PDF file also contains lots of (basically) identical fonts, but with slightly different names, which causes unnecessary font-switching. This causes some unnecessary breaking of textLayer div's, but this issue cannot be easily worked around.

											
										
										
											2016-06-01 06:01:35 +09:00
+								    this.fontName = null;
-												implementing transform matrix to getTextContent

											
										
										
											2013-09-15 02:58:58 +09:00
+								    this.fontSize = 0;
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
+								    this.font = null;
 								    this.fontMatrix = FONT_IDENTITY_MATRIX;
 								    this.textMatrix = IDENTITY_MATRIX.slice();
 								    this.textLineMatrix = IDENTITY_MATRIX.slice();
 								    this.charSpacing = 0;
 								    this.wordSpacing = 0;
-												implementing transform matrix to getTextContent

											
										
										
											2013-09-15 02:58:58 +09:00
+								    this.leading = 0;
 								    this.textHScale = 1;
 								    this.textRise = 0;
 								  }
-												Making src/core/evaluator.js adhere to the style guide

											
										
										
											2014-03-23 03:15:51 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  setTextMatrix(a, b, c, d, e, f) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const m = this.textMatrix;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    m[0] = a;
 								    m[1] = b;
 								    m[2] = c;
 								    m[3] = d;
 								    m[4] = e;
 								    m[5] = f;
 								  }
 								  setTextLineMatrix(a, b, c, d, e, f) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const m = this.textLineMatrix;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    m[0] = a;
 								    m[1] = b;
 								    m[2] = c;
 								    m[3] = d;
 								    m[4] = e;
 								    m[5] = f;
 								  }
 								  translateTextMatrix(x, y) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const m = this.textMatrix;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    m[4] = m[0] * x + m[2] * y + m[4];
 								    m[5] = m[1] * x + m[3] * y + m[5];
 								  }
 								  translateTextLineMatrix(x, y) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const m = this.textLineMatrix;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    m[4] = m[0] * x + m[2] * y + m[4];
 								    m[5] = m[1] * x + m[3] * y + m[5];
 								  }
 								  carriageReturn() {
 								    this.translateTextLineMatrix(0, -this.leading);
 								    this.textMatrix = this.textLineMatrix.slice();
 								  }
 								  clone() {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    const clone = Object.create(this);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    clone.textMatrix = this.textMatrix.slice();
 								    clone.textLineMatrix = this.textLineMatrix.slice();
 								    clone.fontMatrix = this.fontMatrix.slice();
 								    return clone;
 								  }
 								}
 								class EvalState {
 								  constructor() {
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
+								    this.ctm = new Float32Array(IDENTITY_MATRIX);
-												Move chars to glyphs conversion to the worker.

											
										
										
											2013-08-01 06:01:55 +09:00
+								    this.font = null;
-												Move the creation of canvas path fonts to the worker.

											
										
										
											2013-08-20 08:33:20 +09:00
+								    this.textRenderingMode = TextRenderingMode.FILL;
-												Moves ColorSpace logic into evaluator

											
										
										
											2014-05-22 02:47:42 +09:00
+								    this.fillColorSpace = ColorSpace.singletons.gray;
 								    this.strokeColorSpace = ColorSpace.singletons.gray;
-												Splitting files

											
										
										
											2011-10-25 08:55:23 +09:00
+								  }
-												Fix js strict warning in worker.js.

Also fix a few minor nits.

											
										
										
											2011-10-28 03:51:10 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  clone() {
 								    return Object.create(this);
 								  }
 								}
-												Extracts evaluator preprocessor and refactor text extraction

											
										
										
											2014-01-17 22:16:52 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								class EvaluatorPreprocessor {
 								  static get opMap() {
 								    // Specifies properties for each command
 								    //
 								    // If variableArgs === true: [0, `numArgs`] expected
 								    // If variableArgs === false: exactly `numArgs` expected
 								    const getOPMap = getLookupTableFactory(function (t) {
 								      // Graphic state
 								      t.w = { id: OPS.setLineWidth, numArgs: 1, variableArgs: false };
 								      t.J = { id: OPS.setLineCap, numArgs: 1, variableArgs: false };
 								      t.j = { id: OPS.setLineJoin, numArgs: 1, variableArgs: false };
 								      t.M = { id: OPS.setMiterLimit, numArgs: 1, variableArgs: false };
 								      t.d = { id: OPS.setDash, numArgs: 2, variableArgs: false };
 								      t.ri = { id: OPS.setRenderingIntent, numArgs: 1, variableArgs: false };
 								      t.i = { id: OPS.setFlatness, numArgs: 1, variableArgs: false };
 								      t.gs = { id: OPS.setGState, numArgs: 1, variableArgs: false };
 								      t.q = { id: OPS.save, numArgs: 0, variableArgs: false };
 								      t.Q = { id: OPS.restore, numArgs: 0, variableArgs: false };
 								      t.cm = { id: OPS.transform, numArgs: 6, variableArgs: false };
 								      // Path
 								      t.m = { id: OPS.moveTo, numArgs: 2, variableArgs: false };
 								      t.l = { id: OPS.lineTo, numArgs: 2, variableArgs: false };
 								      t.c = { id: OPS.curveTo, numArgs: 6, variableArgs: false };
 								      t.v = { id: OPS.curveTo2, numArgs: 4, variableArgs: false };
 								      t.y = { id: OPS.curveTo3, numArgs: 4, variableArgs: false };
 								      t.h = { id: OPS.closePath, numArgs: 0, variableArgs: false };
 								      t.re = { id: OPS.rectangle, numArgs: 4, variableArgs: false };
 								      t.S = { id: OPS.stroke, numArgs: 0, variableArgs: false };
 								      t.s = { id: OPS.closeStroke, numArgs: 0, variableArgs: false };
 								      t.f = { id: OPS.fill, numArgs: 0, variableArgs: false };
 								      t.F = { id: OPS.fill, numArgs: 0, variableArgs: false };
 								      t["f*"] = { id: OPS.eoFill, numArgs: 0, variableArgs: false };
 								      t.B = { id: OPS.fillStroke, numArgs: 0, variableArgs: false };
 								      t["B*"] = { id: OPS.eoFillStroke, numArgs: 0, variableArgs: false };
 								      t.b = { id: OPS.closeFillStroke, numArgs: 0, variableArgs: false };
 								      t["b*"] = { id: OPS.closeEOFillStroke, numArgs: 0, variableArgs: false };
 								      t.n = { id: OPS.endPath, numArgs: 0, variableArgs: false };
 								      // Clipping
 								      t.W = { id: OPS.clip, numArgs: 0, variableArgs: false };
 								      t["W*"] = { id: OPS.eoClip, numArgs: 0, variableArgs: false };
 								      // Text
 								      t.BT = { id: OPS.beginText, numArgs: 0, variableArgs: false };
 								      t.ET = { id: OPS.endText, numArgs: 0, variableArgs: false };
 								      t.Tc = { id: OPS.setCharSpacing, numArgs: 1, variableArgs: false };
 								      t.Tw = { id: OPS.setWordSpacing, numArgs: 1, variableArgs: false };
 								      t.Tz = { id: OPS.setHScale, numArgs: 1, variableArgs: false };
 								      t.TL = { id: OPS.setLeading, numArgs: 1, variableArgs: false };
 								      t.Tf = { id: OPS.setFont, numArgs: 2, variableArgs: false };
 								      t.Tr = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false };
 								      t.Ts = { id: OPS.setTextRise, numArgs: 1, variableArgs: false };
 								      t.Td = { id: OPS.moveText, numArgs: 2, variableArgs: false };
 								      t.TD = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false };
 								      t.Tm = { id: OPS.setTextMatrix, numArgs: 6, variableArgs: false };
 								      t["T*"] = { id: OPS.nextLine, numArgs: 0, variableArgs: false };
 								      t.Tj = { id: OPS.showText, numArgs: 1, variableArgs: false };
 								      t.TJ = { id: OPS.showSpacedText, numArgs: 1, variableArgs: false };
 								      t["'"] = { id: OPS.nextLineShowText, numArgs: 1, variableArgs: false };
 								      t['"'] = {
 								        id: OPS.nextLineSetSpacingShowText,
 								        numArgs: 3,
 								        variableArgs: false,
 								      };
-												Extracts evaluator preprocessor and refactor text extraction

											
										
										
											2014-01-17 22:16:52 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      // Type3 fonts
 								      t.d0 = { id: OPS.setCharWidth, numArgs: 2, variableArgs: false };
 								      t.d1 = {
 								        id: OPS.setCharWidthAndBounds,
 								        numArgs: 6,
 								        variableArgs: false,
 								      };
 								      // Color
 								      t.CS = { id: OPS.setStrokeColorSpace, numArgs: 1, variableArgs: false };
 								      t.cs = { id: OPS.setFillColorSpace, numArgs: 1, variableArgs: false };
 								      t.SC = { id: OPS.setStrokeColor, numArgs: 4, variableArgs: true };
 								      t.SCN = { id: OPS.setStrokeColorN, numArgs: 33, variableArgs: true };
 								      t.sc = { id: OPS.setFillColor, numArgs: 4, variableArgs: true };
 								      t.scn = { id: OPS.setFillColorN, numArgs: 33, variableArgs: true };
 								      t.G = { id: OPS.setStrokeGray, numArgs: 1, variableArgs: false };
 								      t.g = { id: OPS.setFillGray, numArgs: 1, variableArgs: false };
 								      t.RG = { id: OPS.setStrokeRGBColor, numArgs: 3, variableArgs: false };
 								      t.rg = { id: OPS.setFillRGBColor, numArgs: 3, variableArgs: false };
 								      t.K = { id: OPS.setStrokeCMYKColor, numArgs: 4, variableArgs: false };
 								      t.k = { id: OPS.setFillCMYKColor, numArgs: 4, variableArgs: false };
 								      // Shading
 								      t.sh = { id: OPS.shadingFill, numArgs: 1, variableArgs: false };
 								      // Images
 								      t.BI = { id: OPS.beginInlineImage, numArgs: 0, variableArgs: false };
 								      t.ID = { id: OPS.beginImageData, numArgs: 0, variableArgs: false };
 								      t.EI = { id: OPS.endInlineImage, numArgs: 1, variableArgs: false };
 								      // XObjects
 								      t.Do = { id: OPS.paintXObject, numArgs: 1, variableArgs: false };
 								      t.MP = { id: OPS.markPoint, numArgs: 1, variableArgs: false };
 								      t.DP = { id: OPS.markPointProps, numArgs: 2, variableArgs: false };
 								      t.BMC = { id: OPS.beginMarkedContent, numArgs: 1, variableArgs: false };
 								      t.BDC = {
 								        id: OPS.beginMarkedContentProps,
 								        numArgs: 2,
 								        variableArgs: false,
 								      };
 								      t.EMC = { id: OPS.endMarkedContent, numArgs: 0, variableArgs: false };
 								      // Compatibility
 								      t.BX = { id: OPS.beginCompat, numArgs: 0, variableArgs: false };
 								      t.EX = { id: OPS.endCompat, numArgs: 0, variableArgs: false };
 								      // (reserved partial commands for the lexer)
 								      t.BM = null;
 								      t.BD = null;
 								      t.true = null;
 								      t.fa = null;
 								      t.fal = null;
 								      t.fals = null;
 								      t.false = null;
 								      t.nu = null;
 								      t.nul = null;
 								      t.null = null;
 								    });
 								    return shadow(this, "opMap", getOPMap());
 								  }
 								  static get MAX_INVALID_PATH_OPS() {
-												Let `Lexer.getNumber` treat a single minus sign as zero (bug 1753983)

This appears to be consistent with the behaviour in both Adobe Reader and PDFium (in Google Chrome); this is essentially the same approach as used for a single decimal point in PR 9827.

											
										
										
											2022-02-08 00:14:45 +09:00
+								    return shadow(this, "MAX_INVALID_PATH_OPS", 10);
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  }
-												Add a parser to get font data from the default appearance (#12831)

* Add a parser to get font data from the default appearance
 - pdfium & poppler use a special parser too to get these info.

* Update src/core/default_appearance.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-01-22 04:15:31 +09:00
+								  constructor(stream, xref, stateManager = new StateManager()) {
-												Lazify OP_MAP.

											
										
										
											2016-01-22 07:43:27 +09:00
+								    // TODO(mduan): pass array of knownCommands rather than this.opMap
-												Extracts evaluator preprocessor and refactor text extraction

											
										
										
											2014-01-17 22:16:52 +09:00
+								    // dictionary
-												Change the signature of the `Parser` constructor to take a parameter object

A lot of the `new Parser()` call-sites look quite unwieldy/ugly as-is, with a bunch of somewhat randomly ordered arguments, which we can avoid by changing the constructor to accept an object instead. As an added bonus, this provides better documentation without having to add inline argument comments in the code.

											
										
										
											2019-06-23 23:01:45 +09:00
+								    this.parser = new Parser({
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      lexer: new Lexer(stream, EvaluatorPreprocessor.opMap),
-												Change the signature of the `Parser` constructor to take a parameter object

A lot of the `new Parser()` call-sites look quite unwieldy/ugly as-is, with a bunch of somewhat randomly ordered arguments, which we can avoid by changing the constructor to accept an object instead. As an added bonus, this provides better documentation without having to add inline argument comments in the code.

											
										
										
											2019-06-23 23:01:45 +09:00
+								      xref,
 								    });
-												Build the text layer geometry on the worker.

											
										
										
											2014-04-10 08:44:07 +09:00
+								    this.stateManager = stateManager;
-												Handle nested post script arguments in the preprocessor

Fix for issue #4785

											
										
										
											2014-05-15 15:07:43 +09:00
+								    this.nonProcessedArgs = [];
-												Error, rather than warn, once a number of invalid path operators are encountered in `EvaluatorPreprocessor.read` (bug 1443140)

Incomplete path operators, in particular, can result in fairly chaotic rendering artifacts, as can be observed on page four of the referenced PDF file.

The initial (naive) solution that was attempted, was to simply throw a `FormatError` as soon as any invalid (i.e. too short) operator was found and rely on the existing `ignoreErrors` code-paths. However, doing so would have caused regressions in some files; see the existing `issue2391-1` test-case, which was promoted to an `eq` test to help prevent future bugs.
Hence this patch, which adds special handling for invalid path operators since those may cause quite bad rendering artifacts.

You could, in all fairness, argue that the patch is a handwavy solution and I wouldn't object. However, given that this only concerns *corrupt* PDF files, the way that PDF viewers (PDF.js included) try to gracefully deal with those could probably be described as a best-effort solution anyway.

This patch also adjusts the existing `warn`/`info` messages to print the command name according to the PDF specification, rather than an internal PDF.js enumeration value. The former should be much more useful for debugging purposes.

Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1443140.

											
										
										
											2018-06-24 16:53:32 +09:00
+								    this._numInvalidPathOPS = 0;
-												Extracts evaluator preprocessor and refactor text extraction

											
										
										
											2014-01-17 22:16:52 +09:00
+								  }
-												Making src/core/evaluator.js adhere to the style guide

											
										
										
											2014-03-23 03:15:51 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  get savedStatesDepth() {
 								    return this.stateManager.stateStack.length;
 								  }
-												Making src/core/evaluator.js adhere to the style guide

											
										
										
											2014-03-23 03:15:51 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  // |operation| is an object with two fields:
 								  //
 								  // - |fn| is an out param.
 								  //
 								  // - |args| is an inout param. On entry, it should have one of two values.
 								  //
 								  //   - An empty array. This indicates that the caller is providing the
 								  //     array in which the args will be stored in. The caller should use
 								  //     this value if it can reuse a single array for each call to read().
 								  //
 								  //   - |null|. This indicates that the caller needs this function to create
 								  //     the array in which any args are stored in. If there are zero args,
 								  //     this function will leave |operation.args| as |null| (thus avoiding
 								  //     allocations that would occur if we used an empty array to represent
 								  //     zero arguments). Otherwise, it will replace |null| with a new array
 								  //     containing the arguments. The caller should use this value if it
 								  //     cannot reuse an array for each call to read().
 								  //
 								  // These two modes are present because this function is very hot and so
 								  // avoiding allocations where possible is worthwhile.
 								  //
 								  read(operation) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								    let args = operation.args;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    while (true) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								      const obj = this.parser.getObj();
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								      if (obj instanceof Cmd) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        const cmd = obj.cmd;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        // Check that the command is valid
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        const opSpec = EvaluatorPreprocessor.opMap[cmd];
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        if (!opSpec) {
 								          warn(`Unknown command "${cmd}".`);
 								          continue;
 								        }
-												Restructured EvaluatorPreprocessor_read to be more natural.

											
										
										
											2014-06-19 19:47:00 +09:00
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								        const fn = opSpec.id;
 								        const numArgs = opSpec.numArgs;
 								        let argsLength = args !== null ? args.length : 0;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
 								        if (!opSpec.variableArgs) {
 								          // Postscript commands can be nested, e.g. /F2 /GS2 gs 5.711 Tf
 								          if (argsLength !== numArgs) {
-												Enable the `no-var` rule in the `src/core/evaluator.js` file

These changes were made automatically, using `gulp lint --fix`.

											
										
										
											2021-05-06 16:39:21 +09:00
+								            const nonProcessedArgs = this.nonProcessedArgs;
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            while (argsLength > numArgs) {
 								              nonProcessedArgs.push(args.shift());
 								              argsLength--;
 								            }
 								            while (argsLength < numArgs && nonProcessedArgs.length !== 0) {
 								              if (args === null) {
 								                args = [];
-												Restructured EvaluatorPreprocessor_read to be more natural.

											
										
										
											2014-06-19 19:47:00 +09:00
+								              }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								              args.unshift(nonProcessedArgs.pop());
 								              argsLength++;
-												Restructured EvaluatorPreprocessor_read to be more natural.

											
										
										
											2014-06-19 19:47:00 +09:00
+								            }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          }
-												Restructured EvaluatorPreprocessor_read to be more natural.

											
										
										
											2014-06-19 19:47:00 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								          if (argsLength < numArgs) {
 								            const partialMsg =
 								              `command ${cmd}: expected ${numArgs} args, ` +
 								              `but received ${argsLength} args.`;
 								            // Incomplete path operators, in particular, can result in fairly
 								            // chaotic rendering artifacts. Hence the following heuristics is
 								            // used to error, rather than just warn, once a number of invalid
 								            // path operators have been encountered (fixes bug1443140.pdf).
 								            if (
 								              fn >= OPS.moveTo &&
 								              fn <= OPS.endPath && // Path operator
 								              ++this._numInvalidPathOPS >
 								                EvaluatorPreprocessor.MAX_INVALID_PATH_OPS
 								            ) {
 								              throw new FormatError(`Invalid ${partialMsg}`);
-												Restructured EvaluatorPreprocessor_read to be more natural.

											
										
										
											2014-06-19 19:47:00 +09:00
+								            }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								            // If we receive too few arguments, it's not possible to execute
 								            // the command, hence we skip the command.
 								            warn(`Skipping ${partialMsg}`);
 								            if (args !== null) {
 								              args.length = 0;
 								            }
 								            continue;
-												Restructured EvaluatorPreprocessor_read to be more natural.

											
										
										
											2014-06-19 19:47:00 +09:00
+								          }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        } else if (argsLength > numArgs) {
 								          info(
 								            `Command ${cmd}: expected [0, ${numArgs}] args, ` +
 								              `but received ${argsLength} args.`
 								          );
 								        }
-												Restructured EvaluatorPreprocessor_read to be more natural.

											
										
										
											2014-06-19 19:47:00 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        // TODO figure out how to type-check vararg functions
 								        this.preprocessCommand(fn, args);
-												Restructured EvaluatorPreprocessor_read to be more natural.

											
										
										
											2014-06-19 19:47:00 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        operation.fn = fn;
 								        operation.args = args;
 								        return true;
 								      }
 								      if (obj === EOF) {
 								        return false; // no more commands
 								      }
 								      // argument
 								      if (obj !== null) {
 								        if (args === null) {
 								          args = [];
-												Enable the `no-else-return` ESLint rule

Using `else` after `return` is not necessary, and can often lead to unnecessarily cluttered code. By using the `no-else-return` rule in ESLint we can avoid this pattern, see http://eslint.org/docs/rules/no-else-return.

											
										
										
											2016-12-16 21:05:33 +09:00
+								        }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								        args.push(obj);
 								        if (args.length > 33) {
 								          throw new FormatError("Too many arguments");
-												Extracts evaluator preprocessor and refactor text extraction

											
										
										
											2014-01-17 22:16:52 +09:00
+								        }
 								      }
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								    }
 								  }
-												Making src/core/evaluator.js adhere to the style guide

											
										
										
											2014-03-23 03:15:51 +09:00
-												Convert the code in `src/core/evaluator.js` to use standard classes

This removes additional `// eslint-disable-next-line no-shadow` usage, which our old pseudo-classes necessitated.

Most of the re-formatting changes, after the `class` definitions and methods were fixed, were done automatically by Prettier.

*Please note:* I'm purposely not doing any `var` to `let`/`const` conversion here, since it's generally better to (if possible) do that automatically on e.g. a directory basis instead.

											
										
										
											2020-07-05 19:20:10 +09:00
+								  preprocessCommand(fn, args) {
 								    switch (fn | 0) {
 								      case OPS.save:
 								        this.stateManager.save();
 								        break;
 								      case OPS.restore:
 								        this.stateManager.restore();
 								        break;
 								      case OPS.transform:
 								        this.stateManager.transform(args);
 								        break;
 								    }
 								  }
 								}
-												Refactors optimization list

											
										
										
											2014-02-24 11:42:54 +09:00
-												Add a parser to get font data from the default appearance (#12831)

* Add a parser to get font data from the default appearance
 - pdfium & poppler use a special parser too to get these info.

* Update src/core/default_appearance.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
											
										
										
											2021-01-22 04:15:31 +09:00
+								export { EvaluatorPreprocessor, PartialEvaluator };