709d89420e
- Re-factor the existing `fetchData` helper function such that it can fetch more types of data, and it now supports "arraybuffer", "json", and "text". This only needed minor adjustments in the `DOMCMapReaderFactory` and `DOMStandardFontDataFactory` classes.[1] - Expose the `fetchData` helper function in the API, such that the viewer is able to access it. - Use the `fetchData` helper function in the `GenericL10n` class, since this should allow fetching of localization-data even if the default viewer is run in an environment without support for the Fetch API. --- [1] While testing this I also noticed a minor inconsistency when handling standard font-data on the worker-thread.
5011 lines
158 KiB
JavaScript
5011 lines
158 KiB
JavaScript
/* Copyright 2012 Mozilla Foundation
|
||
*
|
||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
* you may not use this file except in compliance with the License.
|
||
* You may obtain a copy of the License at
|
||
*
|
||
* http://www.apache.org/licenses/LICENSE-2.0
|
||
*
|
||
* Unless required by applicable law or agreed to in writing, software
|
||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
* See the License for the specific language governing permissions and
|
||
* limitations under the License.
|
||
*/
|
||
/* eslint-disable no-var */
|
||
|
||
import {
|
||
AbortException,
|
||
assert,
|
||
CMapCompressionType,
|
||
FONT_IDENTITY_MATRIX,
|
||
FormatError,
|
||
IDENTITY_MATRIX,
|
||
info,
|
||
isArrayEqual,
|
||
normalizeUnicode,
|
||
OPS,
|
||
PromiseCapability,
|
||
shadow,
|
||
stringToPDFString,
|
||
TextRenderingMode,
|
||
Util,
|
||
warn,
|
||
} from "../shared/util.js";
|
||
import { CMapFactory, IdentityCMap } from "./cmap.js";
|
||
import { Cmd, Dict, EOF, isName, Name, Ref, RefSet } from "./primitives.js";
|
||
import { ErrorFont, Font } from "./fonts.js";
|
||
import {
|
||
getEncoding,
|
||
MacRomanEncoding,
|
||
StandardEncoding,
|
||
SymbolSetEncoding,
|
||
WinAnsiEncoding,
|
||
ZapfDingbatsEncoding,
|
||
} from "./encodings.js";
|
||
import {
|
||
getFontNameToFileMap,
|
||
getSerifFonts,
|
||
getStandardFontName,
|
||
getStdFontMap,
|
||
getSymbolsFonts,
|
||
isKnownFontName,
|
||
} from "./standard_fonts.js";
|
||
import { getTilingPatternIR, Pattern } from "./pattern.js";
|
||
import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
|
||
import { IdentityToUnicodeMap, ToUnicodeMap } from "./to_unicode_map.js";
|
||
import { isPDFFunction, PDFFunctionFactory } from "./function.js";
|
||
import { Lexer, Parser } from "./parser.js";
|
||
import {
|
||
LocalColorSpaceCache,
|
||
LocalGStateCache,
|
||
LocalImageCache,
|
||
LocalTilingPatternCache,
|
||
RegionalImageCache,
|
||
} from "./image_utils.js";
|
||
import { NullStream, Stream } from "./stream.js";
|
||
import { BaseStream } from "./base_stream.js";
|
||
import { bidi } from "./bidi.js";
|
||
import { ColorSpace } from "./colorspace.js";
|
||
import { DecodeStream } from "./decode_stream.js";
|
||
import { FontFlags } from "./fonts_utils.js";
|
||
import { getFontSubstitution } from "./font_substitutions.js";
|
||
import { getGlyphsUnicode } from "./glyphlist.js";
|
||
import { getMetrics } from "./metrics.js";
|
||
import { getUnicodeForGlyph } from "./unicode.js";
|
||
import { ImageResizer } from "./image_resizer.js";
|
||
import { MurmurHash3_64 } from "../shared/murmurhash3.js";
|
||
import { OperatorList } from "./operator_list.js";
|
||
import { PDFImage } from "./image.js";
|
||
|
||
const DefaultPartialEvaluatorOptions = Object.freeze({
|
||
maxImageSize: -1,
|
||
disableFontFace: false,
|
||
ignoreErrors: false,
|
||
isEvalSupported: true,
|
||
isOffscreenCanvasSupported: false,
|
||
canvasMaxAreaInBytes: -1,
|
||
fontExtraProperties: false,
|
||
useSystemFonts: true,
|
||
cMapUrl: null,
|
||
standardFontDataUrl: null,
|
||
});
|
||
|
||
const PatternType = {
|
||
TILING: 1,
|
||
SHADING: 2,
|
||
};
|
||
|
||
// Optionally avoid sending individual, or very few, text chunks to reduce
|
||
// `postMessage` overhead with ReadableStream (see issue 13962).
|
||
//
|
||
// PLEASE NOTE: This value should *not* be too large (it's used as a lower limit
|
||
// in `enqueueChunk`), since that would cause streaming of textContent to become
|
||
// essentially useless in practice by sending all (or most) chunks at once.
|
||
// Also, a too large value would (indirectly) affect the main-thread `textLayer`
|
||
// building negatively by forcing all textContent to be handled at once, which
|
||
// could easily end up hurting *overall* performance (e.g. rendering as well).
|
||
const TEXT_CHUNK_BATCH_SIZE = 10;
|
||
|
||
const deferred = Promise.resolve();
|
||
|
||
// Convert PDF blend mode names to HTML5 blend mode names.
|
||
function normalizeBlendMode(value, parsingArray = false) {
|
||
if (Array.isArray(value)) {
|
||
// Use the first *supported* BM value in the Array (fixes issue11279.pdf).
|
||
for (const val of value) {
|
||
const maybeBM = normalizeBlendMode(val, /* parsingArray = */ true);
|
||
if (maybeBM) {
|
||
return maybeBM;
|
||
}
|
||
}
|
||
warn(`Unsupported blend mode Array: ${value}`);
|
||
return "source-over";
|
||
}
|
||
|
||
if (!(value instanceof Name)) {
|
||
if (parsingArray) {
|
||
return null;
|
||
}
|
||
return "source-over";
|
||
}
|
||
switch (value.name) {
|
||
case "Normal":
|
||
case "Compatible":
|
||
return "source-over";
|
||
case "Multiply":
|
||
return "multiply";
|
||
case "Screen":
|
||
return "screen";
|
||
case "Overlay":
|
||
return "overlay";
|
||
case "Darken":
|
||
return "darken";
|
||
case "Lighten":
|
||
return "lighten";
|
||
case "ColorDodge":
|
||
return "color-dodge";
|
||
case "ColorBurn":
|
||
return "color-burn";
|
||
case "HardLight":
|
||
return "hard-light";
|
||
case "SoftLight":
|
||
return "soft-light";
|
||
case "Difference":
|
||
return "difference";
|
||
case "Exclusion":
|
||
return "exclusion";
|
||
case "Hue":
|
||
return "hue";
|
||
case "Saturation":
|
||
return "saturation";
|
||
case "Color":
|
||
return "color";
|
||
case "Luminosity":
|
||
return "luminosity";
|
||
}
|
||
if (parsingArray) {
|
||
return null;
|
||
}
|
||
warn(`Unsupported blend mode: ${value.name}`);
|
||
return "source-over";
|
||
}
|
||
|
||
function incrementCachedImageMaskCount(data) {
|
||
if (data.fn === OPS.paintImageMaskXObject && data.args[0]?.count > 0) {
|
||
data.args[0].count++;
|
||
}
|
||
}
|
||
|
||
// Trying to minimize Date.now() usage and check every 100 time.
|
||
class TimeSlotManager {
|
||
static TIME_SLOT_DURATION_MS = 20;
|
||
|
||
static CHECK_TIME_EVERY = 100;
|
||
|
||
constructor() {
|
||
this.reset();
|
||
}
|
||
|
||
check() {
|
||
if (++this.checked < TimeSlotManager.CHECK_TIME_EVERY) {
|
||
return false;
|
||
}
|
||
this.checked = 0;
|
||
return this.endTime <= Date.now();
|
||
}
|
||
|
||
reset() {
|
||
this.endTime = Date.now() + TimeSlotManager.TIME_SLOT_DURATION_MS;
|
||
this.checked = 0;
|
||
}
|
||
}
|
||
|
||
class PartialEvaluator {
|
||
constructor({
|
||
xref,
|
||
handler,
|
||
pageIndex,
|
||
idFactory,
|
||
fontCache,
|
||
builtInCMapCache,
|
||
standardFontDataCache,
|
||
globalImageCache,
|
||
systemFontCache,
|
||
options = null,
|
||
}) {
|
||
this.xref = xref;
|
||
this.handler = handler;
|
||
this.pageIndex = pageIndex;
|
||
this.idFactory = idFactory;
|
||
this.fontCache = fontCache;
|
||
this.builtInCMapCache = builtInCMapCache;
|
||
this.standardFontDataCache = standardFontDataCache;
|
||
this.globalImageCache = globalImageCache;
|
||
this.systemFontCache = systemFontCache;
|
||
this.options = options || DefaultPartialEvaluatorOptions;
|
||
this.parsingType3Font = false;
|
||
|
||
this._regionalImageCache = new RegionalImageCache();
|
||
this._fetchBuiltInCMapBound = this.fetchBuiltInCMap.bind(this);
|
||
ImageResizer.setMaxArea(this.options.canvasMaxAreaInBytes);
|
||
}
|
||
|
||
/**
|
||
* Since Functions are only cached (locally) by reference, we can share one
|
||
* `PDFFunctionFactory` instance within this `PartialEvaluator` instance.
|
||
*/
|
||
get _pdfFunctionFactory() {
|
||
const pdfFunctionFactory = new PDFFunctionFactory({
|
||
xref: this.xref,
|
||
isEvalSupported: this.options.isEvalSupported,
|
||
});
|
||
return shadow(this, "_pdfFunctionFactory", pdfFunctionFactory);
|
||
}
|
||
|
||
clone(newOptions = null) {
|
||
const newEvaluator = Object.create(this);
|
||
newEvaluator.options = Object.assign(
|
||
Object.create(null),
|
||
this.options,
|
||
newOptions
|
||
);
|
||
return newEvaluator;
|
||
}
|
||
|
||
hasBlendModes(resources, nonBlendModesSet) {
|
||
if (!(resources instanceof Dict)) {
|
||
return false;
|
||
}
|
||
if (resources.objId && nonBlendModesSet.has(resources.objId)) {
|
||
return false;
|
||
}
|
||
|
||
const processed = new RefSet(nonBlendModesSet);
|
||
if (resources.objId) {
|
||
processed.put(resources.objId);
|
||
}
|
||
|
||
const nodes = [resources],
|
||
xref = this.xref;
|
||
while (nodes.length) {
|
||
const node = nodes.shift();
|
||
// First check the current resources for blend modes.
|
||
const graphicStates = node.get("ExtGState");
|
||
if (graphicStates instanceof Dict) {
|
||
for (let graphicState of graphicStates.getRawValues()) {
|
||
if (graphicState instanceof Ref) {
|
||
if (processed.has(graphicState)) {
|
||
continue; // The ExtGState has already been processed.
|
||
}
|
||
try {
|
||
graphicState = xref.fetch(graphicState);
|
||
} catch (ex) {
|
||
// Avoid parsing a corrupt ExtGState more than once.
|
||
processed.put(graphicState);
|
||
|
||
info(`hasBlendModes - ignoring ExtGState: "${ex}".`);
|
||
continue;
|
||
}
|
||
}
|
||
if (!(graphicState instanceof Dict)) {
|
||
continue;
|
||
}
|
||
if (graphicState.objId) {
|
||
processed.put(graphicState.objId);
|
||
}
|
||
|
||
const bm = graphicState.get("BM");
|
||
if (bm instanceof Name) {
|
||
if (bm.name !== "Normal") {
|
||
return true;
|
||
}
|
||
continue;
|
||
}
|
||
if (bm !== undefined && Array.isArray(bm)) {
|
||
for (const element of bm) {
|
||
if (element instanceof Name && element.name !== "Normal") {
|
||
return true;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
// Descend into the XObjects to look for more resources and blend modes.
|
||
const xObjects = node.get("XObject");
|
||
if (!(xObjects instanceof Dict)) {
|
||
continue;
|
||
}
|
||
for (let xObject of xObjects.getRawValues()) {
|
||
if (xObject instanceof Ref) {
|
||
if (processed.has(xObject)) {
|
||
// The XObject has already been processed, and by avoiding a
|
||
// redundant `xref.fetch` we can *significantly* reduce the load
|
||
// time for badly generated PDF files (fixes issue6961.pdf).
|
||
continue;
|
||
}
|
||
try {
|
||
xObject = xref.fetch(xObject);
|
||
} catch (ex) {
|
||
// Avoid parsing a corrupt XObject more than once.
|
||
processed.put(xObject);
|
||
|
||
info(`hasBlendModes - ignoring XObject: "${ex}".`);
|
||
continue;
|
||
}
|
||
}
|
||
if (!(xObject instanceof BaseStream)) {
|
||
continue;
|
||
}
|
||
if (xObject.dict.objId) {
|
||
processed.put(xObject.dict.objId);
|
||
}
|
||
const xResources = xObject.dict.get("Resources");
|
||
if (!(xResources instanceof Dict)) {
|
||
continue;
|
||
}
|
||
// Checking objId to detect an infinite loop.
|
||
if (xResources.objId && processed.has(xResources.objId)) {
|
||
continue;
|
||
}
|
||
|
||
nodes.push(xResources);
|
||
if (xResources.objId) {
|
||
processed.put(xResources.objId);
|
||
}
|
||
}
|
||
}
|
||
|
||
// When no blend modes exist, there's no need re-fetch/re-parse any of the
|
||
// processed `Ref`s again for subsequent pages. This helps reduce redundant
|
||
// `XRef.fetch` calls for some documents (e.g. issue6961.pdf).
|
||
for (const ref of processed) {
|
||
nonBlendModesSet.put(ref);
|
||
}
|
||
return false;
|
||
}
|
||
|
||
async fetchBuiltInCMap(name) {
|
||
const cachedData = this.builtInCMapCache.get(name);
|
||
if (cachedData) {
|
||
return cachedData;
|
||
}
|
||
let data;
|
||
|
||
if (this.options.cMapUrl !== null) {
|
||
// Only compressed CMaps are (currently) supported here.
|
||
const url = `${this.options.cMapUrl}${name}.bcmap`;
|
||
const response = await fetch(url);
|
||
if (!response.ok) {
|
||
throw new Error(
|
||
`fetchBuiltInCMap: failed to fetch file "${url}" with "${response.statusText}".`
|
||
);
|
||
}
|
||
data = {
|
||
cMapData: new Uint8Array(await response.arrayBuffer()),
|
||
compressionType: CMapCompressionType.BINARY,
|
||
};
|
||
} else {
|
||
// Get the data on the main-thread instead.
|
||
data = await this.handler.sendWithPromise("FetchBuiltInCMap", { name });
|
||
}
|
||
|
||
if (data.compressionType !== CMapCompressionType.NONE) {
|
||
// Given the size of uncompressed CMaps, only cache compressed ones.
|
||
this.builtInCMapCache.set(name, data);
|
||
}
|
||
return data;
|
||
}
|
||
|
||
async fetchStandardFontData(name) {
|
||
const cachedData = this.standardFontDataCache.get(name);
|
||
if (cachedData) {
|
||
return new Stream(cachedData);
|
||
}
|
||
|
||
// The symbol fonts are not consistent across platforms, always load the
|
||
// standard font data for them.
|
||
if (
|
||
this.options.useSystemFonts &&
|
||
name !== "Symbol" &&
|
||
name !== "ZapfDingbats"
|
||
) {
|
||
return null;
|
||
}
|
||
|
||
const standardFontNameToFileName = getFontNameToFileMap(),
|
||
filename = standardFontNameToFileName[name];
|
||
let data;
|
||
|
||
if (this.options.standardFontDataUrl !== null) {
|
||
const url = `${this.options.standardFontDataUrl}${filename}`;
|
||
const response = await fetch(url);
|
||
if (!response.ok) {
|
||
warn(
|
||
`fetchStandardFontData: failed to fetch file "${url}" with "${response.statusText}".`
|
||
);
|
||
} else {
|
||
data = new Uint8Array(await response.arrayBuffer());
|
||
}
|
||
} else {
|
||
// Get the data on the main-thread instead.
|
||
try {
|
||
data = await this.handler.sendWithPromise("FetchStandardFontData", {
|
||
filename,
|
||
});
|
||
} catch (e) {
|
||
warn(
|
||
`fetchStandardFontData: failed to fetch file "${filename}" with "${e}".`
|
||
);
|
||
}
|
||
}
|
||
|
||
if (!data) {
|
||
return null;
|
||
}
|
||
// Cache the "raw" standard font data, to avoid fetching it repeatedly
|
||
// (see e.g. issue 11399).
|
||
this.standardFontDataCache.set(name, data);
|
||
|
||
return new Stream(data);
|
||
}
|
||
|
||
async buildFormXObject(
|
||
resources,
|
||
xobj,
|
||
smask,
|
||
operatorList,
|
||
task,
|
||
initialState,
|
||
localColorSpaceCache
|
||
) {
|
||
const dict = xobj.dict;
|
||
const matrix = dict.getArray("Matrix");
|
||
let bbox = dict.getArray("BBox");
|
||
bbox =
|
||
Array.isArray(bbox) && bbox.length === 4
|
||
? Util.normalizeRect(bbox)
|
||
: null;
|
||
|
||
let optionalContent, groupOptions;
|
||
if (dict.has("OC")) {
|
||
optionalContent = await this.parseMarkedContentProps(
|
||
dict.get("OC"),
|
||
resources
|
||
);
|
||
}
|
||
if (optionalContent !== undefined) {
|
||
operatorList.addOp(OPS.beginMarkedContentProps, ["OC", optionalContent]);
|
||
}
|
||
const group = dict.get("Group");
|
||
if (group) {
|
||
groupOptions = {
|
||
matrix,
|
||
bbox,
|
||
smask,
|
||
isolated: false,
|
||
knockout: false,
|
||
};
|
||
|
||
const groupSubtype = group.get("S");
|
||
let colorSpace = null;
|
||
if (isName(groupSubtype, "Transparency")) {
|
||
groupOptions.isolated = group.get("I") || false;
|
||
groupOptions.knockout = group.get("K") || false;
|
||
if (group.has("CS")) {
|
||
const cs = group.getRaw("CS");
|
||
|
||
const cachedColorSpace = ColorSpace.getCached(
|
||
cs,
|
||
this.xref,
|
||
localColorSpaceCache
|
||
);
|
||
if (cachedColorSpace) {
|
||
colorSpace = cachedColorSpace;
|
||
} else {
|
||
colorSpace = await this.parseColorSpace({
|
||
cs,
|
||
resources,
|
||
localColorSpaceCache,
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
if (smask?.backdrop) {
|
||
colorSpace ||= ColorSpace.singletons.rgb;
|
||
smask.backdrop = colorSpace.getRgb(smask.backdrop, 0);
|
||
}
|
||
|
||
operatorList.addOp(OPS.beginGroup, [groupOptions]);
|
||
}
|
||
|
||
// If it's a group, a new canvas will be created that is the size of the
|
||
// bounding box and translated to the correct position so we don't need to
|
||
// apply the bounding box to it.
|
||
const args = group ? [matrix, null] : [matrix, bbox];
|
||
operatorList.addOp(OPS.paintFormXObjectBegin, args);
|
||
|
||
return this.getOperatorList({
|
||
stream: xobj,
|
||
task,
|
||
resources: dict.get("Resources") || resources,
|
||
operatorList,
|
||
initialState,
|
||
}).then(function () {
|
||
operatorList.addOp(OPS.paintFormXObjectEnd, []);
|
||
|
||
if (group) {
|
||
operatorList.addOp(OPS.endGroup, [groupOptions]);
|
||
}
|
||
|
||
if (optionalContent !== undefined) {
|
||
operatorList.addOp(OPS.endMarkedContent, []);
|
||
}
|
||
});
|
||
}
|
||
|
||
_sendImgData(objId, imgData, cacheGlobally = false) {
|
||
const transfers = imgData ? [imgData.bitmap || imgData.data.buffer] : null;
|
||
|
||
if (this.parsingType3Font || cacheGlobally) {
|
||
return this.handler.send(
|
||
"commonobj",
|
||
[objId, "Image", imgData],
|
||
transfers
|
||
);
|
||
}
|
||
return this.handler.send(
|
||
"obj",
|
||
[objId, this.pageIndex, "Image", imgData],
|
||
transfers
|
||
);
|
||
}
|
||
|
||
async buildPaintImageXObject({
|
||
resources,
|
||
image,
|
||
isInline = false,
|
||
operatorList,
|
||
cacheKey,
|
||
localImageCache,
|
||
localColorSpaceCache,
|
||
}) {
|
||
const dict = image.dict;
|
||
const imageRef = dict.objId;
|
||
const w = dict.get("W", "Width");
|
||
const h = dict.get("H", "Height");
|
||
|
||
if (!(w && typeof w === "number") || !(h && typeof h === "number")) {
|
||
warn("Image dimensions are missing, or not numbers.");
|
||
return;
|
||
}
|
||
const maxImageSize = this.options.maxImageSize;
|
||
if (maxImageSize !== -1 && w * h > maxImageSize) {
|
||
const msg = "Image exceeded maximum allowed size and was removed.";
|
||
|
||
if (this.options.ignoreErrors) {
|
||
warn(msg);
|
||
return;
|
||
}
|
||
throw new Error(msg);
|
||
}
|
||
|
||
let optionalContent;
|
||
if (dict.has("OC")) {
|
||
optionalContent = await this.parseMarkedContentProps(
|
||
dict.get("OC"),
|
||
resources
|
||
);
|
||
}
|
||
|
||
const imageMask = dict.get("IM", "ImageMask") || false;
|
||
let imgData, args;
|
||
if (imageMask) {
|
||
// This depends on a tmpCanvas being filled with the
|
||
// current fillStyle, such that processing the pixel
|
||
// data can't be done here. Instead of creating a
|
||
// complete PDFImage, only read the information needed
|
||
// for later.
|
||
const interpolate = dict.get("I", "Interpolate");
|
||
const bitStrideLength = (w + 7) >> 3;
|
||
const imgArray = image.getBytes(bitStrideLength * h);
|
||
const decode = dict.getArray("D", "Decode");
|
||
|
||
if (this.parsingType3Font) {
|
||
imgData = PDFImage.createRawMask({
|
||
imgArray,
|
||
width: w,
|
||
height: h,
|
||
imageIsFromDecodeStream: image instanceof DecodeStream,
|
||
inverseDecode: decode?.[0] > 0,
|
||
interpolate,
|
||
});
|
||
|
||
imgData.cached = !!cacheKey;
|
||
args = [imgData];
|
||
|
||
operatorList.addImageOps(
|
||
OPS.paintImageMaskXObject,
|
||
args,
|
||
optionalContent
|
||
);
|
||
|
||
if (cacheKey) {
|
||
const cacheData = {
|
||
fn: OPS.paintImageMaskXObject,
|
||
args,
|
||
optionalContent,
|
||
};
|
||
localImageCache.set(cacheKey, imageRef, cacheData);
|
||
|
||
if (imageRef) {
|
||
this._regionalImageCache.set(
|
||
/* name = */ null,
|
||
imageRef,
|
||
cacheData
|
||
);
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
imgData = await PDFImage.createMask({
|
||
imgArray,
|
||
width: w,
|
||
height: h,
|
||
imageIsFromDecodeStream: image instanceof DecodeStream,
|
||
inverseDecode: decode?.[0] > 0,
|
||
interpolate,
|
||
isOffscreenCanvasSupported: this.options.isOffscreenCanvasSupported,
|
||
});
|
||
|
||
if (imgData.isSingleOpaquePixel) {
|
||
// Handles special case of mainly LaTeX documents which use image
|
||
// masks to draw lines with the current fill style.
|
||
operatorList.addImageOps(
|
||
OPS.paintSolidColorImageMask,
|
||
[],
|
||
optionalContent
|
||
);
|
||
|
||
if (cacheKey) {
|
||
const cacheData = {
|
||
fn: OPS.paintSolidColorImageMask,
|
||
args: [],
|
||
optionalContent,
|
||
};
|
||
localImageCache.set(cacheKey, imageRef, cacheData);
|
||
|
||
if (imageRef) {
|
||
this._regionalImageCache.set(
|
||
/* name = */ null,
|
||
imageRef,
|
||
cacheData
|
||
);
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
const objId = `mask_${this.idFactory.createObjId()}`;
|
||
operatorList.addDependency(objId);
|
||
this._sendImgData(objId, imgData);
|
||
|
||
args = [
|
||
{
|
||
data: objId,
|
||
width: imgData.width,
|
||
height: imgData.height,
|
||
interpolate: imgData.interpolate,
|
||
count: 1,
|
||
},
|
||
];
|
||
operatorList.addImageOps(
|
||
OPS.paintImageMaskXObject,
|
||
args,
|
||
optionalContent
|
||
);
|
||
|
||
if (cacheKey) {
|
||
const cacheData = {
|
||
fn: OPS.paintImageMaskXObject,
|
||
args,
|
||
optionalContent,
|
||
};
|
||
localImageCache.set(cacheKey, imageRef, cacheData);
|
||
|
||
if (imageRef) {
|
||
this._regionalImageCache.set(/* name = */ null, imageRef, cacheData);
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
const SMALL_IMAGE_DIMENSIONS = 200;
|
||
// Inlining small images into the queue as RGB data
|
||
if (
|
||
isInline &&
|
||
!dict.has("SMask") &&
|
||
!dict.has("Mask") &&
|
||
w + h < SMALL_IMAGE_DIMENSIONS
|
||
) {
|
||
const imageObj = new PDFImage({
|
||
xref: this.xref,
|
||
res: resources,
|
||
image,
|
||
isInline,
|
||
pdfFunctionFactory: this._pdfFunctionFactory,
|
||
localColorSpaceCache,
|
||
});
|
||
// We force the use of RGBA_32BPP images here, because we can't handle
|
||
// any other kind.
|
||
imgData = await imageObj.createImageData(
|
||
/* forceRGBA = */ true,
|
||
/* isOffscreenCanvasSupported = */ false
|
||
);
|
||
operatorList.isOffscreenCanvasSupported =
|
||
this.options.isOffscreenCanvasSupported;
|
||
operatorList.addImageOps(
|
||
OPS.paintInlineImageXObject,
|
||
[imgData],
|
||
optionalContent
|
||
);
|
||
return;
|
||
}
|
||
|
||
// If there is no imageMask, create the PDFImage and a lot
|
||
// of image processing can be done here.
|
||
let objId = `img_${this.idFactory.createObjId()}`,
|
||
cacheGlobally = false;
|
||
|
||
if (this.parsingType3Font) {
|
||
objId = `${this.idFactory.getDocId()}_type3_${objId}`;
|
||
} else if (imageRef) {
|
||
cacheGlobally = this.globalImageCache.shouldCache(
|
||
imageRef,
|
||
this.pageIndex
|
||
);
|
||
|
||
if (cacheGlobally) {
|
||
objId = `${this.idFactory.getDocId()}_${objId}`;
|
||
}
|
||
}
|
||
|
||
// Ensure that the dependency is added before the image is decoded.
|
||
operatorList.addDependency(objId);
|
||
args = [objId, w, h];
|
||
|
||
PDFImage.buildImage({
|
||
xref: this.xref,
|
||
res: resources,
|
||
image,
|
||
isInline,
|
||
pdfFunctionFactory: this._pdfFunctionFactory,
|
||
localColorSpaceCache,
|
||
})
|
||
.then(async imageObj => {
|
||
imgData = await imageObj.createImageData(
|
||
/* forceRGBA = */ false,
|
||
/* isOffscreenCanvasSupported = */ this.options
|
||
.isOffscreenCanvasSupported
|
||
);
|
||
|
||
if (cacheKey && imageRef && cacheGlobally) {
|
||
const length = imgData.bitmap
|
||
? imgData.width * imgData.height * 4
|
||
: imgData.data.length;
|
||
this.globalImageCache.addByteSize(imageRef, length);
|
||
}
|
||
|
||
return this._sendImgData(objId, imgData, cacheGlobally);
|
||
})
|
||
.catch(reason => {
|
||
warn(`Unable to decode image "${objId}": "${reason}".`);
|
||
|
||
return this._sendImgData(objId, /* imgData = */ null, cacheGlobally);
|
||
});
|
||
|
||
operatorList.addImageOps(OPS.paintImageXObject, args, optionalContent);
|
||
|
||
if (cacheKey) {
|
||
const cacheData = {
|
||
fn: OPS.paintImageXObject,
|
||
args,
|
||
optionalContent,
|
||
};
|
||
localImageCache.set(cacheKey, imageRef, cacheData);
|
||
|
||
if (imageRef) {
|
||
this._regionalImageCache.set(/* name = */ null, imageRef, cacheData);
|
||
|
||
if (cacheGlobally) {
|
||
assert(!isInline, "Cannot cache an inline image globally.");
|
||
|
||
this.globalImageCache.setData(imageRef, {
|
||
objId,
|
||
fn: OPS.paintImageXObject,
|
||
args,
|
||
optionalContent,
|
||
byteSize: 0, // Temporary entry, note `addByteSize` above.
|
||
});
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
handleSMask(
|
||
smask,
|
||
resources,
|
||
operatorList,
|
||
task,
|
||
stateManager,
|
||
localColorSpaceCache
|
||
) {
|
||
const smaskContent = smask.get("G");
|
||
const smaskOptions = {
|
||
subtype: smask.get("S").name,
|
||
backdrop: smask.get("BC"),
|
||
};
|
||
|
||
// The SMask might have a alpha/luminosity value transfer function --
|
||
// we will build a map of integer values in range 0..255 to be fast.
|
||
const transferObj = smask.get("TR");
|
||
if (isPDFFunction(transferObj)) {
|
||
const transferFn = this._pdfFunctionFactory.create(transferObj);
|
||
const transferMap = new Uint8Array(256);
|
||
const tmp = new Float32Array(1);
|
||
for (let i = 0; i < 256; i++) {
|
||
tmp[0] = i / 255;
|
||
transferFn(tmp, 0, tmp, 0);
|
||
transferMap[i] = (tmp[0] * 255) | 0;
|
||
}
|
||
smaskOptions.transferMap = transferMap;
|
||
}
|
||
|
||
return this.buildFormXObject(
|
||
resources,
|
||
smaskContent,
|
||
smaskOptions,
|
||
operatorList,
|
||
task,
|
||
stateManager.state.clone(),
|
||
localColorSpaceCache
|
||
);
|
||
}
|
||
|
||
handleTransferFunction(tr) {
|
||
let transferArray;
|
||
if (Array.isArray(tr)) {
|
||
transferArray = tr;
|
||
} else if (isPDFFunction(tr)) {
|
||
transferArray = [tr];
|
||
} else {
|
||
return null; // Not a valid transfer function entry.
|
||
}
|
||
|
||
const transferMaps = [];
|
||
let numFns = 0,
|
||
numEffectfulFns = 0;
|
||
for (const entry of transferArray) {
|
||
const transferObj = this.xref.fetchIfRef(entry);
|
||
numFns++;
|
||
|
||
if (isName(transferObj, "Identity")) {
|
||
transferMaps.push(null);
|
||
continue;
|
||
} else if (!isPDFFunction(transferObj)) {
|
||
return null; // Not a valid transfer function object.
|
||
}
|
||
|
||
const transferFn = this._pdfFunctionFactory.create(transferObj);
|
||
const transferMap = new Uint8Array(256),
|
||
tmp = new Float32Array(1);
|
||
for (let j = 0; j < 256; j++) {
|
||
tmp[0] = j / 255;
|
||
transferFn(tmp, 0, tmp, 0);
|
||
transferMap[j] = (tmp[0] * 255) | 0;
|
||
}
|
||
transferMaps.push(transferMap);
|
||
numEffectfulFns++;
|
||
}
|
||
|
||
if (!(numFns === 1 || numFns === 4)) {
|
||
return null; // Only 1 or 4 functions are supported, by the specification.
|
||
}
|
||
if (numEffectfulFns === 0) {
|
||
return null; // Only /Identity transfer functions found, which are no-ops.
|
||
}
|
||
return transferMaps;
|
||
}
|
||
|
||
handleTilingType(
|
||
fn,
|
||
color,
|
||
resources,
|
||
pattern,
|
||
patternDict,
|
||
operatorList,
|
||
task,
|
||
localTilingPatternCache
|
||
) {
|
||
// Create an IR of the pattern code.
|
||
const tilingOpList = new OperatorList();
|
||
// Merge the available resources, to prevent issues when the patternDict
|
||
// is missing some /Resources entries (fixes issue6541.pdf).
|
||
const patternResources = Dict.merge({
|
||
xref: this.xref,
|
||
dictArray: [patternDict.get("Resources"), resources],
|
||
});
|
||
|
||
return this.getOperatorList({
|
||
stream: pattern,
|
||
task,
|
||
resources: patternResources,
|
||
operatorList: tilingOpList,
|
||
})
|
||
.then(function () {
|
||
const operatorListIR = tilingOpList.getIR();
|
||
const tilingPatternIR = getTilingPatternIR(
|
||
operatorListIR,
|
||
patternDict,
|
||
color
|
||
);
|
||
// Add the dependencies to the parent operator list so they are
|
||
// resolved before the sub operator list is executed synchronously.
|
||
operatorList.addDependencies(tilingOpList.dependencies);
|
||
operatorList.addOp(fn, tilingPatternIR);
|
||
|
||
if (patternDict.objId) {
|
||
localTilingPatternCache.set(/* name = */ null, patternDict.objId, {
|
||
operatorListIR,
|
||
dict: patternDict,
|
||
});
|
||
}
|
||
})
|
||
.catch(reason => {
|
||
if (reason instanceof AbortException) {
|
||
return;
|
||
}
|
||
if (this.options.ignoreErrors) {
|
||
warn(`handleTilingType - ignoring pattern: "${reason}".`);
|
||
return;
|
||
}
|
||
throw reason;
|
||
});
|
||
}
|
||
|
||
handleSetFont(
|
||
resources,
|
||
fontArgs,
|
||
fontRef,
|
||
operatorList,
|
||
task,
|
||
state,
|
||
fallbackFontDict = null,
|
||
cssFontInfo = null
|
||
) {
|
||
const fontName = fontArgs?.[0] instanceof Name ? fontArgs[0].name : null;
|
||
|
||
return this.loadFont(
|
||
fontName,
|
||
fontRef,
|
||
resources,
|
||
fallbackFontDict,
|
||
cssFontInfo
|
||
)
|
||
.then(translated => {
|
||
if (!translated.font.isType3Font) {
|
||
return translated;
|
||
}
|
||
return translated
|
||
.loadType3Data(this, resources, task)
|
||
.then(function () {
|
||
// Add the dependencies to the parent operatorList so they are
|
||
// resolved before Type3 operatorLists are executed synchronously.
|
||
operatorList.addDependencies(translated.type3Dependencies);
|
||
|
||
return translated;
|
||
})
|
||
.catch(reason => {
|
||
return new TranslatedFont({
|
||
loadedName: "g_font_error",
|
||
font: new ErrorFont(`Type3 font load error: ${reason}`),
|
||
dict: translated.font,
|
||
evaluatorOptions: this.options,
|
||
});
|
||
});
|
||
})
|
||
.then(translated => {
|
||
state.font = translated.font;
|
||
translated.send(this.handler);
|
||
return translated.loadedName;
|
||
});
|
||
}
|
||
|
||
handleText(chars, state) {
|
||
const font = state.font;
|
||
const glyphs = font.charsToGlyphs(chars);
|
||
|
||
if (font.data) {
|
||
const isAddToPathSet = !!(
|
||
state.textRenderingMode & TextRenderingMode.ADD_TO_PATH_FLAG
|
||
);
|
||
if (
|
||
isAddToPathSet ||
|
||
state.fillColorSpace.name === "Pattern" ||
|
||
font.disableFontFace ||
|
||
this.options.disableFontFace
|
||
) {
|
||
PartialEvaluator.buildFontPaths(
|
||
font,
|
||
glyphs,
|
||
this.handler,
|
||
this.options
|
||
);
|
||
}
|
||
}
|
||
return glyphs;
|
||
}
|
||
|
||
ensureStateFont(state) {
|
||
if (state.font) {
|
||
return;
|
||
}
|
||
const reason = new FormatError(
|
||
"Missing setFont (Tf) operator before text rendering operator."
|
||
);
|
||
|
||
if (this.options.ignoreErrors) {
|
||
warn(`ensureStateFont: "${reason}".`);
|
||
return;
|
||
}
|
||
throw reason;
|
||
}
|
||
|
||
async setGState({
|
||
resources,
|
||
gState,
|
||
operatorList,
|
||
cacheKey,
|
||
task,
|
||
stateManager,
|
||
localGStateCache,
|
||
localColorSpaceCache,
|
||
}) {
|
||
const gStateRef = gState.objId;
|
||
let isSimpleGState = true;
|
||
// This array holds the converted/processed state data.
|
||
const gStateObj = [];
|
||
let promise = Promise.resolve();
|
||
for (const key of gState.getKeys()) {
|
||
const value = gState.get(key);
|
||
switch (key) {
|
||
case "Type":
|
||
break;
|
||
case "LW":
|
||
case "LC":
|
||
case "LJ":
|
||
case "ML":
|
||
case "D":
|
||
case "RI":
|
||
case "FL":
|
||
case "CA":
|
||
case "ca":
|
||
gStateObj.push([key, value]);
|
||
break;
|
||
case "Font":
|
||
isSimpleGState = false;
|
||
|
||
promise = promise.then(() => {
|
||
return this.handleSetFont(
|
||
resources,
|
||
null,
|
||
value[0],
|
||
operatorList,
|
||
task,
|
||
stateManager.state
|
||
).then(function (loadedName) {
|
||
operatorList.addDependency(loadedName);
|
||
gStateObj.push([key, [loadedName, value[1]]]);
|
||
});
|
||
});
|
||
break;
|
||
case "BM":
|
||
gStateObj.push([key, normalizeBlendMode(value)]);
|
||
break;
|
||
case "SMask":
|
||
if (isName(value, "None")) {
|
||
gStateObj.push([key, false]);
|
||
break;
|
||
}
|
||
if (value instanceof Dict) {
|
||
isSimpleGState = false;
|
||
|
||
promise = promise.then(() => {
|
||
return this.handleSMask(
|
||
value,
|
||
resources,
|
||
operatorList,
|
||
task,
|
||
stateManager,
|
||
localColorSpaceCache
|
||
);
|
||
});
|
||
gStateObj.push([key, true]);
|
||
} else {
|
||
warn("Unsupported SMask type");
|
||
}
|
||
break;
|
||
case "TR":
|
||
const transferMaps = this.handleTransferFunction(value);
|
||
gStateObj.push([key, transferMaps]);
|
||
break;
|
||
// Only generate info log messages for the following since
|
||
// they are unlikely to have a big impact on the rendering.
|
||
case "OP":
|
||
case "op":
|
||
case "OPM":
|
||
case "BG":
|
||
case "BG2":
|
||
case "UCR":
|
||
case "UCR2":
|
||
case "TR2":
|
||
case "HT":
|
||
case "SM":
|
||
case "SA":
|
||
case "AIS":
|
||
case "TK":
|
||
// TODO implement these operators.
|
||
info("graphic state operator " + key);
|
||
break;
|
||
default:
|
||
info("Unknown graphic state operator " + key);
|
||
break;
|
||
}
|
||
}
|
||
return promise.then(function () {
|
||
if (gStateObj.length > 0) {
|
||
operatorList.addOp(OPS.setGState, [gStateObj]);
|
||
}
|
||
|
||
if (isSimpleGState) {
|
||
localGStateCache.set(cacheKey, gStateRef, gStateObj);
|
||
}
|
||
});
|
||
}
|
||
|
||
loadFont(
|
||
fontName,
|
||
font,
|
||
resources,
|
||
fallbackFontDict = null,
|
||
cssFontInfo = null
|
||
) {
|
||
const errorFont = async () => {
|
||
return new TranslatedFont({
|
||
loadedName: "g_font_error",
|
||
font: new ErrorFont(`Font "${fontName}" is not available.`),
|
||
dict: font,
|
||
evaluatorOptions: this.options,
|
||
});
|
||
};
|
||
|
||
let fontRef;
|
||
if (font) {
|
||
// Loading by ref.
|
||
if (font instanceof Ref) {
|
||
fontRef = font;
|
||
}
|
||
} else {
|
||
// Loading by name.
|
||
const fontRes = resources.get("Font");
|
||
if (fontRes) {
|
||
fontRef = fontRes.getRaw(fontName);
|
||
}
|
||
}
|
||
if (fontRef) {
|
||
if (this.parsingType3Font && this.type3FontRefs.has(fontRef)) {
|
||
return errorFont();
|
||
}
|
||
|
||
if (this.fontCache.has(fontRef)) {
|
||
return this.fontCache.get(fontRef);
|
||
}
|
||
|
||
font = this.xref.fetchIfRef(fontRef);
|
||
}
|
||
|
||
if (!(font instanceof Dict)) {
|
||
if (!this.options.ignoreErrors && !this.parsingType3Font) {
|
||
warn(`Font "${fontName}" is not available.`);
|
||
return errorFont();
|
||
}
|
||
warn(
|
||
`Font "${fontName}" is not available -- attempting to fallback to a default font.`
|
||
);
|
||
|
||
// Falling back to a default font to avoid completely broken rendering,
|
||
// but note that there're no guarantees that things will look "correct".
|
||
font = fallbackFontDict || PartialEvaluator.fallbackFontDict;
|
||
}
|
||
|
||
// We are holding `font.cacheKey` references only for `fontRef`s that
|
||
// are not actually `Ref`s, but rather `Dict`s. See explanation below.
|
||
if (font.cacheKey && this.fontCache.has(font.cacheKey)) {
|
||
return this.fontCache.get(font.cacheKey);
|
||
}
|
||
|
||
const fontCapability = new PromiseCapability();
|
||
|
||
let preEvaluatedFont;
|
||
try {
|
||
preEvaluatedFont = this.preEvaluateFont(font);
|
||
preEvaluatedFont.cssFontInfo = cssFontInfo;
|
||
} catch (reason) {
|
||
warn(`loadFont - preEvaluateFont failed: "${reason}".`);
|
||
return errorFont();
|
||
}
|
||
const { descriptor, hash } = preEvaluatedFont;
|
||
|
||
const fontRefIsRef = fontRef instanceof Ref;
|
||
let fontID;
|
||
|
||
if (hash && descriptor instanceof Dict) {
|
||
const fontAliases = (descriptor.fontAliases ||= Object.create(null));
|
||
|
||
if (fontAliases[hash]) {
|
||
const aliasFontRef = fontAliases[hash].aliasRef;
|
||
if (fontRefIsRef && aliasFontRef && this.fontCache.has(aliasFontRef)) {
|
||
this.fontCache.putAlias(fontRef, aliasFontRef);
|
||
return this.fontCache.get(fontRef);
|
||
}
|
||
} else {
|
||
fontAliases[hash] = {
|
||
fontID: this.idFactory.createFontId(),
|
||
};
|
||
}
|
||
|
||
if (fontRefIsRef) {
|
||
fontAliases[hash].aliasRef = fontRef;
|
||
}
|
||
fontID = fontAliases[hash].fontID;
|
||
} else {
|
||
fontID = this.idFactory.createFontId();
|
||
}
|
||
assert(
|
||
fontID?.startsWith("f"),
|
||
'The "fontID" must be (correctly) defined.'
|
||
);
|
||
|
||
// Workaround for bad PDF generators that reference fonts incorrectly,
|
||
// where `fontRef` is a `Dict` rather than a `Ref` (fixes bug946506.pdf).
|
||
// In this case we cannot put the font into `this.fontCache` (which is
|
||
// a `RefSetCache`), since it's not possible to use a `Dict` as a key.
|
||
//
|
||
// However, if we don't cache the font it's not possible to remove it
|
||
// when `cleanup` is triggered from the API, which causes issues on
|
||
// subsequent rendering operations (see issue7403.pdf) and would force us
|
||
// to unnecessarily load the same fonts over and over.
|
||
//
|
||
// Instead, we cheat a bit by using a modified `fontID` as a key in
|
||
// `this.fontCache`, to allow the font to be cached.
|
||
// NOTE: This works because `RefSetCache` calls `toString()` on provided
|
||
// keys. Also, since `fontRef` is used when getting cached fonts,
|
||
// we'll not accidentally match fonts cached with the `fontID`.
|
||
if (fontRefIsRef) {
|
||
this.fontCache.put(fontRef, fontCapability.promise);
|
||
} else {
|
||
font.cacheKey = `cacheKey_${fontID}`;
|
||
this.fontCache.put(font.cacheKey, fontCapability.promise);
|
||
}
|
||
|
||
// Keep track of each font we translated so the caller can
|
||
// load them asynchronously before calling display on a page.
|
||
font.loadedName = `${this.idFactory.getDocId()}_${fontID}`;
|
||
|
||
this.translateFont(preEvaluatedFont)
|
||
.then(translatedFont => {
|
||
fontCapability.resolve(
|
||
new TranslatedFont({
|
||
loadedName: font.loadedName,
|
||
font: translatedFont,
|
||
dict: font,
|
||
evaluatorOptions: this.options,
|
||
})
|
||
);
|
||
})
|
||
.catch(reason => {
|
||
// TODO fontCapability.reject?
|
||
warn(`loadFont - translateFont failed: "${reason}".`);
|
||
|
||
fontCapability.resolve(
|
||
new TranslatedFont({
|
||
loadedName: font.loadedName,
|
||
font: new ErrorFont(
|
||
reason instanceof Error ? reason.message : reason
|
||
),
|
||
dict: font,
|
||
evaluatorOptions: this.options,
|
||
})
|
||
);
|
||
});
|
||
return fontCapability.promise;
|
||
}
|
||
|
||
buildPath(operatorList, fn, args, parsingText = false) {
|
||
const lastIndex = operatorList.length - 1;
|
||
if (!args) {
|
||
args = [];
|
||
}
|
||
if (
|
||
lastIndex < 0 ||
|
||
operatorList.fnArray[lastIndex] !== OPS.constructPath
|
||
) {
|
||
// Handle corrupt PDF documents that contains path operators inside of
|
||
// text objects, which may shift subsequent text, by enclosing the path
|
||
// operator in save/restore operators (fixes issue10542_reduced.pdf).
|
||
//
|
||
// Note that this will effectively disable the optimization in the
|
||
// `else` branch below, but given that this type of corruption is
|
||
// *extremely* rare that shouldn't really matter much in practice.
|
||
if (parsingText) {
|
||
warn(`Encountered path operator "${fn}" inside of a text object.`);
|
||
operatorList.addOp(OPS.save, null);
|
||
}
|
||
|
||
let minMax;
|
||
switch (fn) {
|
||
case OPS.rectangle:
|
||
const x = args[0] + args[2];
|
||
const y = args[1] + args[3];
|
||
minMax = [
|
||
Math.min(args[0], x),
|
||
Math.max(args[0], x),
|
||
Math.min(args[1], y),
|
||
Math.max(args[1], y),
|
||
];
|
||
break;
|
||
case OPS.moveTo:
|
||
case OPS.lineTo:
|
||
minMax = [args[0], args[0], args[1], args[1]];
|
||
break;
|
||
default:
|
||
minMax = [Infinity, -Infinity, Infinity, -Infinity];
|
||
break;
|
||
}
|
||
operatorList.addOp(OPS.constructPath, [[fn], args, minMax]);
|
||
|
||
if (parsingText) {
|
||
operatorList.addOp(OPS.restore, null);
|
||
}
|
||
} else {
|
||
const opArgs = operatorList.argsArray[lastIndex];
|
||
opArgs[0].push(fn);
|
||
opArgs[1].push(...args);
|
||
const minMax = opArgs[2];
|
||
|
||
// Compute min/max in the worker instead of the main thread.
|
||
// If the current matrix (when drawing) is a scaling one
|
||
// then min/max can be easily computed in using those values.
|
||
// Only rectangle, lineTo and moveTo are handled here since
|
||
// Bezier stuff requires to have the starting point.
|
||
switch (fn) {
|
||
case OPS.rectangle:
|
||
const x = args[0] + args[2];
|
||
const y = args[1] + args[3];
|
||
minMax[0] = Math.min(minMax[0], args[0], x);
|
||
minMax[1] = Math.max(minMax[1], args[0], x);
|
||
minMax[2] = Math.min(minMax[2], args[1], y);
|
||
minMax[3] = Math.max(minMax[3], args[1], y);
|
||
break;
|
||
case OPS.moveTo:
|
||
case OPS.lineTo:
|
||
minMax[0] = Math.min(minMax[0], args[0]);
|
||
minMax[1] = Math.max(minMax[1], args[0]);
|
||
minMax[2] = Math.min(minMax[2], args[1]);
|
||
minMax[3] = Math.max(minMax[3], args[1]);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
parseColorSpace({ cs, resources, localColorSpaceCache }) {
|
||
return ColorSpace.parseAsync({
|
||
cs,
|
||
xref: this.xref,
|
||
resources,
|
||
pdfFunctionFactory: this._pdfFunctionFactory,
|
||
localColorSpaceCache,
|
||
}).catch(reason => {
|
||
if (reason instanceof AbortException) {
|
||
return null;
|
||
}
|
||
if (this.options.ignoreErrors) {
|
||
warn(`parseColorSpace - ignoring ColorSpace: "${reason}".`);
|
||
return null;
|
||
}
|
||
throw reason;
|
||
});
|
||
}
|
||
|
||
parseShading({
|
||
shading,
|
||
resources,
|
||
localColorSpaceCache,
|
||
localShadingPatternCache,
|
||
}) {
|
||
// Shadings and patterns may be referenced by the same name but the resource
|
||
// dictionary could be different so we can't use the name for the cache key.
|
||
let id = localShadingPatternCache.get(shading);
|
||
if (!id) {
|
||
var shadingFill = Pattern.parseShading(
|
||
shading,
|
||
this.xref,
|
||
resources,
|
||
this._pdfFunctionFactory,
|
||
localColorSpaceCache
|
||
);
|
||
const patternIR = shadingFill.getIR();
|
||
id = `pattern_${this.idFactory.createObjId()}`;
|
||
if (this.parsingType3Font) {
|
||
id = `${this.idFactory.getDocId()}_type3_${id}`;
|
||
}
|
||
localShadingPatternCache.set(shading, id);
|
||
|
||
if (this.parsingType3Font) {
|
||
this.handler.send("commonobj", [id, "Pattern", patternIR]);
|
||
} else {
|
||
this.handler.send("obj", [id, this.pageIndex, "Pattern", patternIR]);
|
||
}
|
||
}
|
||
return id;
|
||
}
|
||
|
||
handleColorN(
|
||
operatorList,
|
||
fn,
|
||
args,
|
||
cs,
|
||
patterns,
|
||
resources,
|
||
task,
|
||
localColorSpaceCache,
|
||
localTilingPatternCache,
|
||
localShadingPatternCache
|
||
) {
|
||
// compile tiling patterns
|
||
const patternName = args.pop();
|
||
// SCN/scn applies patterns along with normal colors
|
||
if (patternName instanceof Name) {
|
||
const rawPattern = patterns.getRaw(patternName.name);
|
||
|
||
const localTilingPattern =
|
||
rawPattern instanceof Ref &&
|
||
localTilingPatternCache.getByRef(rawPattern);
|
||
if (localTilingPattern) {
|
||
try {
|
||
const color = cs.base ? cs.base.getRgb(args, 0) : null;
|
||
const tilingPatternIR = getTilingPatternIR(
|
||
localTilingPattern.operatorListIR,
|
||
localTilingPattern.dict,
|
||
color
|
||
);
|
||
operatorList.addOp(fn, tilingPatternIR);
|
||
return undefined;
|
||
} catch {
|
||
// Handle any errors during normal TilingPattern parsing.
|
||
}
|
||
}
|
||
|
||
const pattern = this.xref.fetchIfRef(rawPattern);
|
||
if (pattern) {
|
||
const dict = pattern instanceof BaseStream ? pattern.dict : pattern;
|
||
const typeNum = dict.get("PatternType");
|
||
|
||
if (typeNum === PatternType.TILING) {
|
||
const color = cs.base ? cs.base.getRgb(args, 0) : null;
|
||
return this.handleTilingType(
|
||
fn,
|
||
color,
|
||
resources,
|
||
pattern,
|
||
dict,
|
||
operatorList,
|
||
task,
|
||
localTilingPatternCache
|
||
);
|
||
} else if (typeNum === PatternType.SHADING) {
|
||
const shading = dict.get("Shading");
|
||
const matrix = dict.getArray("Matrix");
|
||
const objId = this.parseShading({
|
||
shading,
|
||
resources,
|
||
localColorSpaceCache,
|
||
localShadingPatternCache,
|
||
});
|
||
operatorList.addOp(fn, ["Shading", objId, matrix]);
|
||
return undefined;
|
||
}
|
||
throw new FormatError(`Unknown PatternType: ${typeNum}`);
|
||
}
|
||
}
|
||
throw new FormatError(`Unknown PatternName: ${patternName}`);
|
||
}
|
||
|
||
_parseVisibilityExpression(array, nestingCounter, currentResult) {
|
||
const MAX_NESTING = 10;
|
||
if (++nestingCounter > MAX_NESTING) {
|
||
warn("Visibility expression is too deeply nested");
|
||
return;
|
||
}
|
||
const length = array.length;
|
||
const operator = this.xref.fetchIfRef(array[0]);
|
||
if (length < 2 || !(operator instanceof Name)) {
|
||
warn("Invalid visibility expression");
|
||
return;
|
||
}
|
||
switch (operator.name) {
|
||
case "And":
|
||
case "Or":
|
||
case "Not":
|
||
currentResult.push(operator.name);
|
||
break;
|
||
default:
|
||
warn(`Invalid operator ${operator.name} in visibility expression`);
|
||
return;
|
||
}
|
||
for (let i = 1; i < length; i++) {
|
||
const raw = array[i];
|
||
const object = this.xref.fetchIfRef(raw);
|
||
if (Array.isArray(object)) {
|
||
const nestedResult = [];
|
||
currentResult.push(nestedResult);
|
||
// Recursively parse a subarray.
|
||
this._parseVisibilityExpression(object, nestingCounter, nestedResult);
|
||
} else if (raw instanceof Ref) {
|
||
// Reference to an OCG dictionary.
|
||
currentResult.push(raw.toString());
|
||
}
|
||
}
|
||
}
|
||
|
||
async parseMarkedContentProps(contentProperties, resources) {
|
||
let optionalContent;
|
||
if (contentProperties instanceof Name) {
|
||
const properties = resources.get("Properties");
|
||
optionalContent = properties.get(contentProperties.name);
|
||
} else if (contentProperties instanceof Dict) {
|
||
optionalContent = contentProperties;
|
||
} else {
|
||
throw new FormatError("Optional content properties malformed.");
|
||
}
|
||
|
||
const optionalContentType = optionalContent.get("Type")?.name;
|
||
if (optionalContentType === "OCG") {
|
||
return {
|
||
type: optionalContentType,
|
||
id: optionalContent.objId,
|
||
};
|
||
} else if (optionalContentType === "OCMD") {
|
||
const expression = optionalContent.get("VE");
|
||
if (Array.isArray(expression)) {
|
||
const result = [];
|
||
this._parseVisibilityExpression(expression, 0, result);
|
||
if (result.length > 0) {
|
||
return {
|
||
type: "OCMD",
|
||
expression: result,
|
||
};
|
||
}
|
||
}
|
||
|
||
const optionalContentGroups = optionalContent.get("OCGs");
|
||
if (
|
||
Array.isArray(optionalContentGroups) ||
|
||
optionalContentGroups instanceof Dict
|
||
) {
|
||
const groupIds = [];
|
||
if (Array.isArray(optionalContentGroups)) {
|
||
for (const ocg of optionalContentGroups) {
|
||
groupIds.push(ocg.toString());
|
||
}
|
||
} else {
|
||
// Dictionary, just use the obj id.
|
||
groupIds.push(optionalContentGroups.objId);
|
||
}
|
||
|
||
return {
|
||
type: optionalContentType,
|
||
ids: groupIds,
|
||
policy:
|
||
optionalContent.get("P") instanceof Name
|
||
? optionalContent.get("P").name
|
||
: null,
|
||
expression: null,
|
||
};
|
||
} else if (optionalContentGroups instanceof Ref) {
|
||
return {
|
||
type: optionalContentType,
|
||
id: optionalContentGroups.toString(),
|
||
};
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
getOperatorList({
|
||
stream,
|
||
task,
|
||
resources,
|
||
operatorList,
|
||
initialState = null,
|
||
fallbackFontDict = null,
|
||
}) {
|
||
// Ensure that `resources`/`initialState` is correctly initialized,
|
||
// even if the provided parameter is e.g. `null`.
|
||
resources ||= Dict.empty;
|
||
initialState ||= new EvalState();
|
||
|
||
if (!operatorList) {
|
||
throw new Error('getOperatorList: missing "operatorList" parameter');
|
||
}
|
||
|
||
const self = this;
|
||
const xref = this.xref;
|
||
let parsingText = false;
|
||
const localImageCache = new LocalImageCache();
|
||
const localColorSpaceCache = new LocalColorSpaceCache();
|
||
const localGStateCache = new LocalGStateCache();
|
||
const localTilingPatternCache = new LocalTilingPatternCache();
|
||
const localShadingPatternCache = new Map();
|
||
|
||
const xobjs = resources.get("XObject") || Dict.empty;
|
||
const patterns = resources.get("Pattern") || Dict.empty;
|
||
const stateManager = new StateManager(initialState);
|
||
const preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
|
||
const timeSlotManager = new TimeSlotManager();
|
||
|
||
function closePendingRestoreOPS(argument) {
|
||
for (let i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) {
|
||
operatorList.addOp(OPS.restore, []);
|
||
}
|
||
}
|
||
|
||
return new Promise(function promiseBody(resolve, reject) {
|
||
const next = function (promise) {
|
||
Promise.all([promise, operatorList.ready]).then(function () {
|
||
try {
|
||
promiseBody(resolve, reject);
|
||
} catch (ex) {
|
||
reject(ex);
|
||
}
|
||
}, reject);
|
||
};
|
||
task.ensureNotTerminated();
|
||
timeSlotManager.reset();
|
||
|
||
const operation = {};
|
||
let stop, i, ii, cs, name, isValidName;
|
||
while (!(stop = timeSlotManager.check())) {
|
||
// The arguments parsed by read() are used beyond this loop, so we
|
||
// cannot reuse the same array on each iteration. Therefore we pass
|
||
// in |null| as the initial value (see the comment on
|
||
// EvaluatorPreprocessor_read() for why).
|
||
operation.args = null;
|
||
if (!preprocessor.read(operation)) {
|
||
break;
|
||
}
|
||
let args = operation.args;
|
||
let fn = operation.fn;
|
||
|
||
switch (fn | 0) {
|
||
case OPS.paintXObject:
|
||
// eagerly compile XForm objects
|
||
isValidName = args[0] instanceof Name;
|
||
name = args[0].name;
|
||
|
||
if (isValidName) {
|
||
const localImage = localImageCache.getByName(name);
|
||
if (localImage) {
|
||
operatorList.addImageOps(
|
||
localImage.fn,
|
||
localImage.args,
|
||
localImage.optionalContent
|
||
);
|
||
|
||
incrementCachedImageMaskCount(localImage);
|
||
args = null;
|
||
continue;
|
||
}
|
||
}
|
||
|
||
next(
|
||
new Promise(function (resolveXObject, rejectXObject) {
|
||
if (!isValidName) {
|
||
throw new FormatError("XObject must be referred to by name.");
|
||
}
|
||
|
||
let xobj = xobjs.getRaw(name);
|
||
if (xobj instanceof Ref) {
|
||
const localImage =
|
||
localImageCache.getByRef(xobj) ||
|
||
self._regionalImageCache.getByRef(xobj);
|
||
if (localImage) {
|
||
operatorList.addImageOps(
|
||
localImage.fn,
|
||
localImage.args,
|
||
localImage.optionalContent
|
||
);
|
||
|
||
incrementCachedImageMaskCount(localImage);
|
||
resolveXObject();
|
||
return;
|
||
}
|
||
|
||
const globalImage = self.globalImageCache.getData(
|
||
xobj,
|
||
self.pageIndex
|
||
);
|
||
if (globalImage) {
|
||
operatorList.addDependency(globalImage.objId);
|
||
operatorList.addImageOps(
|
||
globalImage.fn,
|
||
globalImage.args,
|
||
globalImage.optionalContent
|
||
);
|
||
|
||
resolveXObject();
|
||
return;
|
||
}
|
||
|
||
xobj = xref.fetch(xobj);
|
||
}
|
||
|
||
if (!(xobj instanceof BaseStream)) {
|
||
throw new FormatError("XObject should be a stream");
|
||
}
|
||
|
||
const type = xobj.dict.get("Subtype");
|
||
if (!(type instanceof Name)) {
|
||
throw new FormatError("XObject should have a Name subtype");
|
||
}
|
||
|
||
if (type.name === "Form") {
|
||
stateManager.save();
|
||
self
|
||
.buildFormXObject(
|
||
resources,
|
||
xobj,
|
||
null,
|
||
operatorList,
|
||
task,
|
||
stateManager.state.clone(),
|
||
localColorSpaceCache
|
||
)
|
||
.then(function () {
|
||
stateManager.restore();
|
||
resolveXObject();
|
||
}, rejectXObject);
|
||
return;
|
||
} else if (type.name === "Image") {
|
||
self
|
||
.buildPaintImageXObject({
|
||
resources,
|
||
image: xobj,
|
||
operatorList,
|
||
cacheKey: name,
|
||
localImageCache,
|
||
localColorSpaceCache,
|
||
})
|
||
.then(resolveXObject, rejectXObject);
|
||
return;
|
||
} else if (type.name === "PS") {
|
||
// PostScript XObjects are unused when viewing documents.
|
||
// See section 4.7.1 of Adobe's PDF reference.
|
||
info("Ignored XObject subtype PS");
|
||
} else {
|
||
throw new FormatError(
|
||
`Unhandled XObject subtype ${type.name}`
|
||
);
|
||
}
|
||
resolveXObject();
|
||
}).catch(function (reason) {
|
||
if (reason instanceof AbortException) {
|
||
return;
|
||
}
|
||
if (self.options.ignoreErrors) {
|
||
warn(`getOperatorList - ignoring XObject: "${reason}".`);
|
||
return;
|
||
}
|
||
throw reason;
|
||
})
|
||
);
|
||
return;
|
||
case OPS.setFont:
|
||
var fontSize = args[1];
|
||
// eagerly collect all fonts
|
||
next(
|
||
self
|
||
.handleSetFont(
|
||
resources,
|
||
args,
|
||
null,
|
||
operatorList,
|
||
task,
|
||
stateManager.state,
|
||
fallbackFontDict
|
||
)
|
||
.then(function (loadedName) {
|
||
operatorList.addDependency(loadedName);
|
||
operatorList.addOp(OPS.setFont, [loadedName, fontSize]);
|
||
})
|
||
);
|
||
return;
|
||
case OPS.beginText:
|
||
parsingText = true;
|
||
break;
|
||
case OPS.endText:
|
||
parsingText = false;
|
||
break;
|
||
case OPS.endInlineImage:
|
||
var cacheKey = args[0].cacheKey;
|
||
if (cacheKey) {
|
||
const localImage = localImageCache.getByName(cacheKey);
|
||
if (localImage) {
|
||
operatorList.addImageOps(
|
||
localImage.fn,
|
||
localImage.args,
|
||
localImage.optionalContent
|
||
);
|
||
|
||
incrementCachedImageMaskCount(localImage);
|
||
args = null;
|
||
continue;
|
||
}
|
||
}
|
||
next(
|
||
self.buildPaintImageXObject({
|
||
resources,
|
||
image: args[0],
|
||
isInline: true,
|
||
operatorList,
|
||
cacheKey,
|
||
localImageCache,
|
||
localColorSpaceCache,
|
||
})
|
||
);
|
||
return;
|
||
case OPS.showText:
|
||
if (!stateManager.state.font) {
|
||
self.ensureStateFont(stateManager.state);
|
||
continue;
|
||
}
|
||
args[0] = self.handleText(args[0], stateManager.state);
|
||
break;
|
||
case OPS.showSpacedText:
|
||
if (!stateManager.state.font) {
|
||
self.ensureStateFont(stateManager.state);
|
||
continue;
|
||
}
|
||
var combinedGlyphs = [];
|
||
var state = stateManager.state;
|
||
for (const arrItem of args[0]) {
|
||
if (typeof arrItem === "string") {
|
||
combinedGlyphs.push(...self.handleText(arrItem, state));
|
||
} else if (typeof arrItem === "number") {
|
||
combinedGlyphs.push(arrItem);
|
||
}
|
||
}
|
||
args[0] = combinedGlyphs;
|
||
fn = OPS.showText;
|
||
break;
|
||
case OPS.nextLineShowText:
|
||
if (!stateManager.state.font) {
|
||
self.ensureStateFont(stateManager.state);
|
||
continue;
|
||
}
|
||
operatorList.addOp(OPS.nextLine);
|
||
args[0] = self.handleText(args[0], stateManager.state);
|
||
fn = OPS.showText;
|
||
break;
|
||
case OPS.nextLineSetSpacingShowText:
|
||
if (!stateManager.state.font) {
|
||
self.ensureStateFont(stateManager.state);
|
||
continue;
|
||
}
|
||
operatorList.addOp(OPS.nextLine);
|
||
operatorList.addOp(OPS.setWordSpacing, [args.shift()]);
|
||
operatorList.addOp(OPS.setCharSpacing, [args.shift()]);
|
||
args[0] = self.handleText(args[0], stateManager.state);
|
||
fn = OPS.showText;
|
||
break;
|
||
case OPS.setTextRenderingMode:
|
||
stateManager.state.textRenderingMode = args[0];
|
||
break;
|
||
|
||
case OPS.setFillColorSpace: {
|
||
const cachedColorSpace = ColorSpace.getCached(
|
||
args[0],
|
||
xref,
|
||
localColorSpaceCache
|
||
);
|
||
if (cachedColorSpace) {
|
||
stateManager.state.fillColorSpace = cachedColorSpace;
|
||
continue;
|
||
}
|
||
|
||
next(
|
||
self
|
||
.parseColorSpace({
|
||
cs: args[0],
|
||
resources,
|
||
localColorSpaceCache,
|
||
})
|
||
.then(function (colorSpace) {
|
||
if (colorSpace) {
|
||
stateManager.state.fillColorSpace = colorSpace;
|
||
}
|
||
})
|
||
);
|
||
return;
|
||
}
|
||
case OPS.setStrokeColorSpace: {
|
||
const cachedColorSpace = ColorSpace.getCached(
|
||
args[0],
|
||
xref,
|
||
localColorSpaceCache
|
||
);
|
||
if (cachedColorSpace) {
|
||
stateManager.state.strokeColorSpace = cachedColorSpace;
|
||
continue;
|
||
}
|
||
|
||
next(
|
||
self
|
||
.parseColorSpace({
|
||
cs: args[0],
|
||
resources,
|
||
localColorSpaceCache,
|
||
})
|
||
.then(function (colorSpace) {
|
||
if (colorSpace) {
|
||
stateManager.state.strokeColorSpace = colorSpace;
|
||
}
|
||
})
|
||
);
|
||
return;
|
||
}
|
||
case OPS.setFillColor:
|
||
cs = stateManager.state.fillColorSpace;
|
||
args = cs.getRgb(args, 0);
|
||
fn = OPS.setFillRGBColor;
|
||
break;
|
||
case OPS.setStrokeColor:
|
||
cs = stateManager.state.strokeColorSpace;
|
||
args = cs.getRgb(args, 0);
|
||
fn = OPS.setStrokeRGBColor;
|
||
break;
|
||
case OPS.setFillGray:
|
||
stateManager.state.fillColorSpace = ColorSpace.singletons.gray;
|
||
args = ColorSpace.singletons.gray.getRgb(args, 0);
|
||
fn = OPS.setFillRGBColor;
|
||
break;
|
||
case OPS.setStrokeGray:
|
||
stateManager.state.strokeColorSpace = ColorSpace.singletons.gray;
|
||
args = ColorSpace.singletons.gray.getRgb(args, 0);
|
||
fn = OPS.setStrokeRGBColor;
|
||
break;
|
||
case OPS.setFillCMYKColor:
|
||
stateManager.state.fillColorSpace = ColorSpace.singletons.cmyk;
|
||
args = ColorSpace.singletons.cmyk.getRgb(args, 0);
|
||
fn = OPS.setFillRGBColor;
|
||
break;
|
||
case OPS.setStrokeCMYKColor:
|
||
stateManager.state.strokeColorSpace = ColorSpace.singletons.cmyk;
|
||
args = ColorSpace.singletons.cmyk.getRgb(args, 0);
|
||
fn = OPS.setStrokeRGBColor;
|
||
break;
|
||
case OPS.setFillRGBColor:
|
||
stateManager.state.fillColorSpace = ColorSpace.singletons.rgb;
|
||
args = ColorSpace.singletons.rgb.getRgb(args, 0);
|
||
break;
|
||
case OPS.setStrokeRGBColor:
|
||
stateManager.state.strokeColorSpace = ColorSpace.singletons.rgb;
|
||
args = ColorSpace.singletons.rgb.getRgb(args, 0);
|
||
break;
|
||
case OPS.setFillColorN:
|
||
cs = stateManager.state.fillColorSpace;
|
||
if (cs.name === "Pattern") {
|
||
next(
|
||
self.handleColorN(
|
||
operatorList,
|
||
OPS.setFillColorN,
|
||
args,
|
||
cs,
|
||
patterns,
|
||
resources,
|
||
task,
|
||
localColorSpaceCache,
|
||
localTilingPatternCache,
|
||
localShadingPatternCache
|
||
)
|
||
);
|
||
return;
|
||
}
|
||
args = cs.getRgb(args, 0);
|
||
fn = OPS.setFillRGBColor;
|
||
break;
|
||
case OPS.setStrokeColorN:
|
||
cs = stateManager.state.strokeColorSpace;
|
||
if (cs.name === "Pattern") {
|
||
next(
|
||
self.handleColorN(
|
||
operatorList,
|
||
OPS.setStrokeColorN,
|
||
args,
|
||
cs,
|
||
patterns,
|
||
resources,
|
||
task,
|
||
localColorSpaceCache,
|
||
localTilingPatternCache,
|
||
localShadingPatternCache
|
||
)
|
||
);
|
||
return;
|
||
}
|
||
args = cs.getRgb(args, 0);
|
||
fn = OPS.setStrokeRGBColor;
|
||
break;
|
||
|
||
case OPS.shadingFill:
|
||
var shadingRes = resources.get("Shading");
|
||
if (!shadingRes) {
|
||
throw new FormatError("No shading resource found");
|
||
}
|
||
|
||
var shading = shadingRes.get(args[0].name);
|
||
if (!shading) {
|
||
throw new FormatError("No shading object found");
|
||
}
|
||
const patternId = self.parseShading({
|
||
shading,
|
||
resources,
|
||
localColorSpaceCache,
|
||
localShadingPatternCache,
|
||
});
|
||
args = [patternId];
|
||
fn = OPS.shadingFill;
|
||
break;
|
||
case OPS.setGState:
|
||
isValidName = args[0] instanceof Name;
|
||
name = args[0].name;
|
||
|
||
if (isValidName) {
|
||
const localGStateObj = localGStateCache.getByName(name);
|
||
if (localGStateObj) {
|
||
if (localGStateObj.length > 0) {
|
||
operatorList.addOp(OPS.setGState, [localGStateObj]);
|
||
}
|
||
args = null;
|
||
continue;
|
||
}
|
||
}
|
||
|
||
next(
|
||
new Promise(function (resolveGState, rejectGState) {
|
||
if (!isValidName) {
|
||
throw new FormatError("GState must be referred to by name.");
|
||
}
|
||
|
||
const extGState = resources.get("ExtGState");
|
||
if (!(extGState instanceof Dict)) {
|
||
throw new FormatError("ExtGState should be a dictionary.");
|
||
}
|
||
|
||
const gState = extGState.get(name);
|
||
// TODO: Attempt to lookup cached GStates by reference as well,
|
||
// if and only if there are PDF documents where doing so
|
||
// would significantly improve performance.
|
||
if (!(gState instanceof Dict)) {
|
||
throw new FormatError("GState should be a dictionary.");
|
||
}
|
||
|
||
self
|
||
.setGState({
|
||
resources,
|
||
gState,
|
||
operatorList,
|
||
cacheKey: name,
|
||
task,
|
||
stateManager,
|
||
localGStateCache,
|
||
localColorSpaceCache,
|
||
})
|
||
.then(resolveGState, rejectGState);
|
||
}).catch(function (reason) {
|
||
if (reason instanceof AbortException) {
|
||
return;
|
||
}
|
||
if (self.options.ignoreErrors) {
|
||
warn(`getOperatorList - ignoring ExtGState: "${reason}".`);
|
||
return;
|
||
}
|
||
throw reason;
|
||
})
|
||
);
|
||
return;
|
||
case OPS.moveTo:
|
||
case OPS.lineTo:
|
||
case OPS.curveTo:
|
||
case OPS.curveTo2:
|
||
case OPS.curveTo3:
|
||
case OPS.closePath:
|
||
case OPS.rectangle:
|
||
self.buildPath(operatorList, fn, args, parsingText);
|
||
continue;
|
||
case OPS.markPoint:
|
||
case OPS.markPointProps:
|
||
case OPS.beginCompat:
|
||
case OPS.endCompat:
|
||
// Ignore operators where the corresponding handlers are known to
|
||
// be no-op in CanvasGraphics (display/canvas.js). This prevents
|
||
// serialization errors and is also a bit more efficient.
|
||
// We could also try to serialize all objects in a general way,
|
||
// e.g. as done in https://github.com/mozilla/pdf.js/pull/6266,
|
||
// but doing so is meaningless without knowing the semantics.
|
||
continue;
|
||
case OPS.beginMarkedContentProps:
|
||
if (!(args[0] instanceof Name)) {
|
||
warn(`Expected name for beginMarkedContentProps arg0=${args[0]}`);
|
||
continue;
|
||
}
|
||
if (args[0].name === "OC") {
|
||
next(
|
||
self
|
||
.parseMarkedContentProps(args[1], resources)
|
||
.then(data => {
|
||
operatorList.addOp(OPS.beginMarkedContentProps, [
|
||
"OC",
|
||
data,
|
||
]);
|
||
})
|
||
.catch(reason => {
|
||
if (reason instanceof AbortException) {
|
||
return;
|
||
}
|
||
if (self.options.ignoreErrors) {
|
||
warn(
|
||
`getOperatorList - ignoring beginMarkedContentProps: "${reason}".`
|
||
);
|
||
return;
|
||
}
|
||
throw reason;
|
||
})
|
||
);
|
||
return;
|
||
}
|
||
// Other marked content types aren't supported yet.
|
||
args = [
|
||
args[0].name,
|
||
args[1] instanceof Dict ? args[1].get("MCID") : null,
|
||
];
|
||
|
||
break;
|
||
case OPS.beginMarkedContent:
|
||
case OPS.endMarkedContent:
|
||
default:
|
||
// Note: Ignore the operator if it has `Dict` arguments, since
|
||
// those are non-serializable, otherwise postMessage will throw
|
||
// "An object could not be cloned.".
|
||
if (args !== null) {
|
||
for (i = 0, ii = args.length; i < ii; i++) {
|
||
if (args[i] instanceof Dict) {
|
||
break;
|
||
}
|
||
}
|
||
if (i < ii) {
|
||
warn("getOperatorList - ignoring operator: " + fn);
|
||
continue;
|
||
}
|
||
}
|
||
}
|
||
operatorList.addOp(fn, args);
|
||
}
|
||
if (stop) {
|
||
next(deferred);
|
||
return;
|
||
}
|
||
// Some PDFs don't close all restores inside object/form.
|
||
// Closing those for them.
|
||
closePendingRestoreOPS();
|
||
resolve();
|
||
}).catch(reason => {
|
||
if (reason instanceof AbortException) {
|
||
return;
|
||
}
|
||
if (this.options.ignoreErrors) {
|
||
warn(
|
||
`getOperatorList - ignoring errors during "${task.name}" ` +
|
||
`task: "${reason}".`
|
||
);
|
||
|
||
closePendingRestoreOPS();
|
||
return;
|
||
}
|
||
throw reason;
|
||
});
|
||
}
|
||
|
||
getTextContent({
|
||
stream,
|
||
task,
|
||
resources,
|
||
stateManager = null,
|
||
includeMarkedContent = false,
|
||
sink,
|
||
seenStyles = new Set(),
|
||
viewBox,
|
||
markedContentData = null,
|
||
disableNormalization = false,
|
||
}) {
|
||
// Ensure that `resources`/`stateManager` is correctly initialized,
|
||
// even if the provided parameter is e.g. `null`.
|
||
resources ||= Dict.empty;
|
||
stateManager ||= new StateManager(new TextState());
|
||
|
||
if (includeMarkedContent) {
|
||
markedContentData ||= { level: 0 };
|
||
}
|
||
|
||
const textContent = {
|
||
items: [],
|
||
styles: Object.create(null),
|
||
};
|
||
const textContentItem = {
|
||
initialized: false,
|
||
str: [],
|
||
totalWidth: 0,
|
||
totalHeight: 0,
|
||
width: 0,
|
||
height: 0,
|
||
vertical: false,
|
||
prevTransform: null,
|
||
textAdvanceScale: 0,
|
||
spaceInFlowMin: 0,
|
||
spaceInFlowMax: 0,
|
||
trackingSpaceMin: Infinity,
|
||
negativeSpaceMax: -Infinity,
|
||
notASpace: -Infinity,
|
||
transform: null,
|
||
fontName: null,
|
||
hasEOL: false,
|
||
};
|
||
|
||
// Use a circular buffer (length === 2) to save the last chars in the
|
||
// text stream.
|
||
// This implementation of the circular buffer is using a fixed array
|
||
// and the position of the next element:
|
||
// function addElement(x) {
|
||
// buffer[pos] = x;
|
||
// pos = (pos + 1) % buffer.length;
|
||
// }
|
||
// It's a way faster than:
|
||
// function addElement(x) {
|
||
// buffer.push(x);
|
||
// buffer.shift();
|
||
// }
|
||
//
|
||
// It's useful to know when we need to add a whitespace in the
|
||
// text chunk.
|
||
const twoLastChars = [" ", " "];
|
||
let twoLastCharsPos = 0;
|
||
|
||
/**
|
||
* Save the last char.
|
||
* @param {string} char
|
||
* @returns {boolean} true when the two last chars before adding the new one
|
||
* are a non-whitespace followed by a whitespace.
|
||
*/
|
||
function saveLastChar(char) {
|
||
const nextPos = (twoLastCharsPos + 1) % 2;
|
||
const ret =
|
||
twoLastChars[twoLastCharsPos] !== " " && twoLastChars[nextPos] === " ";
|
||
twoLastChars[twoLastCharsPos] = char;
|
||
twoLastCharsPos = nextPos;
|
||
|
||
return ret;
|
||
}
|
||
|
||
function shouldAddWhitepsace() {
|
||
return (
|
||
twoLastChars[twoLastCharsPos] !== " " &&
|
||
twoLastChars[(twoLastCharsPos + 1) % 2] === " "
|
||
);
|
||
}
|
||
|
||
function resetLastChars() {
|
||
twoLastChars[0] = twoLastChars[1] = " ";
|
||
twoLastCharsPos = 0;
|
||
}
|
||
|
||
// Used in addFakeSpaces.
|
||
|
||
// A white <= fontSize * TRACKING_SPACE_FACTOR is a tracking space
|
||
// so it doesn't count as a space.
|
||
const TRACKING_SPACE_FACTOR = 0.102;
|
||
|
||
// When a white <= fontSize * NOT_A_SPACE_FACTOR, there is no space
|
||
// even if one is present in the text stream.
|
||
const NOT_A_SPACE_FACTOR = 0.03;
|
||
|
||
// A negative white < fontSize * NEGATIVE_SPACE_FACTOR induces
|
||
// a break (a new chunk of text is created).
|
||
// It doesn't change anything when the text is copied but
|
||
// it improves potential mismatch between text layer and canvas.
|
||
const NEGATIVE_SPACE_FACTOR = -0.2;
|
||
|
||
// A white with a width in [fontSize * MIN_FACTOR; fontSize * MAX_FACTOR]
|
||
// is a space which will be inserted in the current flow of words.
|
||
// If the width is outside of this range then the flow is broken
|
||
// (which means a new span in the text layer).
|
||
// It's useful to adjust the best as possible the span in the layer
|
||
// to what is displayed in the canvas.
|
||
const SPACE_IN_FLOW_MIN_FACTOR = 0.102;
|
||
const SPACE_IN_FLOW_MAX_FACTOR = 0.6;
|
||
|
||
// If a char is too high/too low compared to the previous we just create
|
||
// a new chunk.
|
||
// If the advance isn't in the +/-VERTICAL_SHIFT_RATIO * height range then
|
||
// a new chunk is created.
|
||
const VERTICAL_SHIFT_RATIO = 0.25;
|
||
|
||
const self = this;
|
||
const xref = this.xref;
|
||
const showSpacedTextBuffer = [];
|
||
|
||
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
|
||
let xobjs = null;
|
||
const emptyXObjectCache = new LocalImageCache();
|
||
const emptyGStateCache = new LocalGStateCache();
|
||
|
||
const preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
|
||
|
||
let textState;
|
||
|
||
function pushWhitespace({
|
||
width = 0,
|
||
height = 0,
|
||
transform = textContentItem.prevTransform,
|
||
fontName = textContentItem.fontName,
|
||
}) {
|
||
textContent.items.push({
|
||
str: " ",
|
||
dir: "ltr",
|
||
width,
|
||
height,
|
||
transform,
|
||
fontName,
|
||
hasEOL: false,
|
||
});
|
||
}
|
||
|
||
function getCurrentTextTransform() {
|
||
// 9.4.4 Text Space Details
|
||
const font = textState.font;
|
||
const tsm = [
|
||
textState.fontSize * textState.textHScale,
|
||
0,
|
||
0,
|
||
textState.fontSize,
|
||
0,
|
||
textState.textRise,
|
||
];
|
||
|
||
if (
|
||
font.isType3Font &&
|
||
(textState.fontSize <= 1 || font.isCharBBox) &&
|
||
!isArrayEqual(textState.fontMatrix, FONT_IDENTITY_MATRIX)
|
||
) {
|
||
const glyphHeight = font.bbox[3] - font.bbox[1];
|
||
if (glyphHeight > 0) {
|
||
tsm[3] *= glyphHeight * textState.fontMatrix[3];
|
||
}
|
||
}
|
||
|
||
return Util.transform(
|
||
textState.ctm,
|
||
Util.transform(textState.textMatrix, tsm)
|
||
);
|
||
}
|
||
|
||
function ensureTextContentItem() {
|
||
if (textContentItem.initialized) {
|
||
return textContentItem;
|
||
}
|
||
const { font, loadedName } = textState;
|
||
if (!seenStyles.has(loadedName)) {
|
||
seenStyles.add(loadedName);
|
||
textContent.styles[loadedName] = {
|
||
fontFamily: font.fallbackName,
|
||
ascent: font.ascent,
|
||
descent: font.descent,
|
||
vertical: font.vertical,
|
||
};
|
||
if (self.options.fontExtraProperties && font.systemFontInfo) {
|
||
const style = textContent.styles[loadedName];
|
||
style.fontSubstitution = font.systemFontInfo.css;
|
||
style.fontSubstitutionLoadedName = font.systemFontInfo.loadedName;
|
||
}
|
||
}
|
||
textContentItem.fontName = loadedName;
|
||
|
||
const trm = (textContentItem.transform = getCurrentTextTransform());
|
||
if (!font.vertical) {
|
||
textContentItem.width = textContentItem.totalWidth = 0;
|
||
textContentItem.height = textContentItem.totalHeight = Math.hypot(
|
||
trm[2],
|
||
trm[3]
|
||
);
|
||
textContentItem.vertical = false;
|
||
} else {
|
||
textContentItem.width = textContentItem.totalWidth = Math.hypot(
|
||
trm[0],
|
||
trm[1]
|
||
);
|
||
textContentItem.height = textContentItem.totalHeight = 0;
|
||
textContentItem.vertical = true;
|
||
}
|
||
|
||
const scaleLineX = Math.hypot(
|
||
textState.textLineMatrix[0],
|
||
textState.textLineMatrix[1]
|
||
);
|
||
const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
|
||
textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
|
||
|
||
const { fontSize } = textState;
|
||
textContentItem.trackingSpaceMin = fontSize * TRACKING_SPACE_FACTOR;
|
||
textContentItem.notASpace = fontSize * NOT_A_SPACE_FACTOR;
|
||
textContentItem.negativeSpaceMax = fontSize * NEGATIVE_SPACE_FACTOR;
|
||
textContentItem.spaceInFlowMin = fontSize * SPACE_IN_FLOW_MIN_FACTOR;
|
||
textContentItem.spaceInFlowMax = fontSize * SPACE_IN_FLOW_MAX_FACTOR;
|
||
textContentItem.hasEOL = false;
|
||
|
||
textContentItem.initialized = true;
|
||
return textContentItem;
|
||
}
|
||
|
||
function updateAdvanceScale() {
|
||
if (!textContentItem.initialized) {
|
||
return;
|
||
}
|
||
|
||
const scaleLineX = Math.hypot(
|
||
textState.textLineMatrix[0],
|
||
textState.textLineMatrix[1]
|
||
);
|
||
const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
|
||
const scaleFactor = scaleCtmX * scaleLineX;
|
||
if (scaleFactor === textContentItem.textAdvanceScale) {
|
||
return;
|
||
}
|
||
|
||
if (!textContentItem.vertical) {
|
||
textContentItem.totalWidth +=
|
||
textContentItem.width * textContentItem.textAdvanceScale;
|
||
textContentItem.width = 0;
|
||
} else {
|
||
textContentItem.totalHeight +=
|
||
textContentItem.height * textContentItem.textAdvanceScale;
|
||
textContentItem.height = 0;
|
||
}
|
||
|
||
textContentItem.textAdvanceScale = scaleFactor;
|
||
}
|
||
|
||
function runBidiTransform(textChunk) {
|
||
let text = textChunk.str.join("");
|
||
if (!disableNormalization) {
|
||
text = normalizeUnicode(text);
|
||
}
|
||
const bidiResult = bidi(text, -1, textChunk.vertical);
|
||
return {
|
||
str: bidiResult.str,
|
||
dir: bidiResult.dir,
|
||
width: Math.abs(textChunk.totalWidth),
|
||
height: Math.abs(textChunk.totalHeight),
|
||
transform: textChunk.transform,
|
||
fontName: textChunk.fontName,
|
||
hasEOL: textChunk.hasEOL,
|
||
};
|
||
}
|
||
|
||
function handleSetFont(fontName, fontRef) {
|
||
return self
|
||
.loadFont(fontName, fontRef, resources)
|
||
.then(function (translated) {
|
||
if (!translated.font.isType3Font) {
|
||
return translated;
|
||
}
|
||
return translated
|
||
.loadType3Data(self, resources, task)
|
||
.catch(function () {
|
||
// Ignore Type3-parsing errors, since we only use `loadType3Data`
|
||
// here to ensure that we'll always obtain a useful /FontBBox.
|
||
})
|
||
.then(function () {
|
||
return translated;
|
||
});
|
||
})
|
||
.then(function (translated) {
|
||
textState.loadedName = translated.loadedName;
|
||
textState.font = translated.font;
|
||
textState.fontMatrix =
|
||
translated.font.fontMatrix || FONT_IDENTITY_MATRIX;
|
||
});
|
||
}
|
||
|
||
function applyInverseRotation(x, y, matrix) {
|
||
const scale = Math.hypot(matrix[0], matrix[1]);
|
||
return [
|
||
(matrix[0] * x + matrix[1] * y) / scale,
|
||
(matrix[2] * x + matrix[3] * y) / scale,
|
||
];
|
||
}
|
||
|
||
function compareWithLastPosition(glyphWidth) {
|
||
const currentTransform = getCurrentTextTransform();
|
||
let posX = currentTransform[4];
|
||
let posY = currentTransform[5];
|
||
|
||
// Check if the glyph is in the viewbox.
|
||
if (textState.font?.vertical) {
|
||
if (
|
||
posX < viewBox[0] ||
|
||
posX > viewBox[2] ||
|
||
posY + glyphWidth < viewBox[1] ||
|
||
posY > viewBox[3]
|
||
) {
|
||
return false;
|
||
}
|
||
} else if (
|
||
posX + glyphWidth < viewBox[0] ||
|
||
posX > viewBox[2] ||
|
||
posY < viewBox[1] ||
|
||
posY > viewBox[3]
|
||
) {
|
||
return false;
|
||
}
|
||
|
||
if (!textState.font || !textContentItem.prevTransform) {
|
||
return true;
|
||
}
|
||
|
||
let lastPosX = textContentItem.prevTransform[4];
|
||
let lastPosY = textContentItem.prevTransform[5];
|
||
|
||
if (lastPosX === posX && lastPosY === posY) {
|
||
return true;
|
||
}
|
||
|
||
let rotate = -1;
|
||
// Take into account the rotation is the current transform.
|
||
if (
|
||
currentTransform[0] &&
|
||
currentTransform[1] === 0 &&
|
||
currentTransform[2] === 0
|
||
) {
|
||
rotate = currentTransform[0] > 0 ? 0 : 180;
|
||
} else if (
|
||
currentTransform[1] &&
|
||
currentTransform[0] === 0 &&
|
||
currentTransform[3] === 0
|
||
) {
|
||
rotate = currentTransform[1] > 0 ? 90 : 270;
|
||
}
|
||
|
||
switch (rotate) {
|
||
case 0:
|
||
break;
|
||
case 90:
|
||
[posX, posY] = [posY, posX];
|
||
[lastPosX, lastPosY] = [lastPosY, lastPosX];
|
||
break;
|
||
case 180:
|
||
[posX, posY, lastPosX, lastPosY] = [
|
||
-posX,
|
||
-posY,
|
||
-lastPosX,
|
||
-lastPosY,
|
||
];
|
||
break;
|
||
case 270:
|
||
[posX, posY] = [-posY, -posX];
|
||
[lastPosX, lastPosY] = [-lastPosY, -lastPosX];
|
||
break;
|
||
default:
|
||
// This is not a 0, 90, 180, 270 rotation so:
|
||
// - remove the scale factor from the matrix to get a rotation matrix
|
||
// - apply the inverse (which is the transposed) to the positions
|
||
// and we can then compare positions of the glyphes to detect
|
||
// a whitespace.
|
||
[posX, posY] = applyInverseRotation(posX, posY, currentTransform);
|
||
[lastPosX, lastPosY] = applyInverseRotation(
|
||
lastPosX,
|
||
lastPosY,
|
||
textContentItem.prevTransform
|
||
);
|
||
}
|
||
|
||
if (textState.font.vertical) {
|
||
const advanceY = (lastPosY - posY) / textContentItem.textAdvanceScale;
|
||
const advanceX = posX - lastPosX;
|
||
|
||
// When the total height of the current chunk is negative
|
||
// then we're writing from bottom to top.
|
||
const textOrientation = Math.sign(textContentItem.height);
|
||
if (advanceY < textOrientation * textContentItem.negativeSpaceMax) {
|
||
if (
|
||
Math.abs(advanceX) >
|
||
0.5 * textContentItem.width /* not the same column */
|
||
) {
|
||
appendEOL();
|
||
return true;
|
||
}
|
||
|
||
resetLastChars();
|
||
flushTextContentItem();
|
||
return true;
|
||
}
|
||
|
||
if (Math.abs(advanceX) > textContentItem.width) {
|
||
appendEOL();
|
||
return true;
|
||
}
|
||
|
||
if (advanceY <= textOrientation * textContentItem.notASpace) {
|
||
// The real spacing between 2 consecutive chars is thin enough to be
|
||
// considered a non-space.
|
||
resetLastChars();
|
||
}
|
||
|
||
if (advanceY <= textOrientation * textContentItem.trackingSpaceMin) {
|
||
if (shouldAddWhitepsace()) {
|
||
// The space is very thin, hence it deserves to have its own span in
|
||
// order to avoid too much shift between the canvas and the text
|
||
// layer.
|
||
resetLastChars();
|
||
flushTextContentItem();
|
||
pushWhitespace({ height: Math.abs(advanceY) });
|
||
} else {
|
||
textContentItem.height += advanceY;
|
||
}
|
||
} else if (
|
||
!addFakeSpaces(
|
||
advanceY,
|
||
textContentItem.prevTransform,
|
||
textOrientation
|
||
)
|
||
) {
|
||
if (textContentItem.str.length === 0) {
|
||
resetLastChars();
|
||
pushWhitespace({ height: Math.abs(advanceY) });
|
||
} else {
|
||
textContentItem.height += advanceY;
|
||
}
|
||
}
|
||
|
||
if (Math.abs(advanceX) > textContentItem.width * VERTICAL_SHIFT_RATIO) {
|
||
flushTextContentItem();
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
|
||
const advanceY = posY - lastPosY;
|
||
|
||
// When the total width of the current chunk is negative
|
||
// then we're writing from right to left.
|
||
const textOrientation = Math.sign(textContentItem.width);
|
||
if (advanceX < textOrientation * textContentItem.negativeSpaceMax) {
|
||
if (
|
||
Math.abs(advanceY) >
|
||
0.5 * textContentItem.height /* not the same line */
|
||
) {
|
||
appendEOL();
|
||
return true;
|
||
}
|
||
|
||
// We're moving back so in case the last char was a whitespace
|
||
// we cancel it: it doesn't make sense to insert it.
|
||
resetLastChars();
|
||
flushTextContentItem();
|
||
return true;
|
||
}
|
||
|
||
if (Math.abs(advanceY) > textContentItem.height) {
|
||
appendEOL();
|
||
return true;
|
||
}
|
||
|
||
if (advanceX <= textOrientation * textContentItem.notASpace) {
|
||
// The real spacing between 2 consecutive chars is thin enough to be
|
||
// considered a non-space.
|
||
resetLastChars();
|
||
}
|
||
|
||
if (advanceX <= textOrientation * textContentItem.trackingSpaceMin) {
|
||
if (shouldAddWhitepsace()) {
|
||
// The space is very thin, hence it deserves to have its own span in
|
||
// order to avoid too much shift between the canvas and the text
|
||
// layer.
|
||
resetLastChars();
|
||
flushTextContentItem();
|
||
pushWhitespace({ width: Math.abs(advanceX) });
|
||
} else {
|
||
textContentItem.width += advanceX;
|
||
}
|
||
} else if (
|
||
!addFakeSpaces(advanceX, textContentItem.prevTransform, textOrientation)
|
||
) {
|
||
if (textContentItem.str.length === 0) {
|
||
resetLastChars();
|
||
pushWhitespace({ width: Math.abs(advanceX) });
|
||
} else {
|
||
textContentItem.width += advanceX;
|
||
}
|
||
}
|
||
|
||
if (Math.abs(advanceY) > textContentItem.height * VERTICAL_SHIFT_RATIO) {
|
||
flushTextContentItem();
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
function buildTextContentItem({ chars, extraSpacing }) {
|
||
const font = textState.font;
|
||
if (!chars) {
|
||
// Just move according to the space we have.
|
||
const charSpacing = textState.charSpacing + extraSpacing;
|
||
if (charSpacing) {
|
||
if (!font.vertical) {
|
||
textState.translateTextMatrix(
|
||
charSpacing * textState.textHScale,
|
||
0
|
||
);
|
||
} else {
|
||
textState.translateTextMatrix(0, -charSpacing);
|
||
}
|
||
}
|
||
|
||
return;
|
||
}
|
||
|
||
const glyphs = font.charsToGlyphs(chars);
|
||
const scale = textState.fontMatrix[0] * textState.fontSize;
|
||
|
||
for (let i = 0, ii = glyphs.length; i < ii; i++) {
|
||
const glyph = glyphs[i];
|
||
const { category } = glyph;
|
||
|
||
if (category.isInvisibleFormatMark) {
|
||
continue;
|
||
}
|
||
let charSpacing =
|
||
textState.charSpacing + (i + 1 === ii ? extraSpacing : 0);
|
||
|
||
let glyphWidth = glyph.width;
|
||
if (font.vertical) {
|
||
glyphWidth = glyph.vmetric ? glyph.vmetric[0] : -glyphWidth;
|
||
}
|
||
let scaledDim = glyphWidth * scale;
|
||
|
||
if (category.isWhitespace) {
|
||
// Don't push a " " in the textContentItem
|
||
// (except when it's between two non-spaces chars),
|
||
// it will be done (if required) in next call to
|
||
// compareWithLastPosition.
|
||
// This way we can merge real spaces and spaces due to cursor moves.
|
||
if (!font.vertical) {
|
||
charSpacing += scaledDim + textState.wordSpacing;
|
||
textState.translateTextMatrix(
|
||
charSpacing * textState.textHScale,
|
||
0
|
||
);
|
||
} else {
|
||
charSpacing += -scaledDim + textState.wordSpacing;
|
||
textState.translateTextMatrix(0, -charSpacing);
|
||
}
|
||
saveLastChar(" ");
|
||
continue;
|
||
}
|
||
|
||
if (
|
||
!category.isZeroWidthDiacritic &&
|
||
!compareWithLastPosition(scaledDim)
|
||
) {
|
||
// The glyph is not in page so just skip it but move the cursor.
|
||
if (!font.vertical) {
|
||
textState.translateTextMatrix(scaledDim * textState.textHScale, 0);
|
||
} else {
|
||
textState.translateTextMatrix(0, scaledDim);
|
||
}
|
||
continue;
|
||
}
|
||
|
||
// Must be called after compareWithLastPosition because
|
||
// the textContentItem could have been flushed.
|
||
const textChunk = ensureTextContentItem();
|
||
if (category.isZeroWidthDiacritic) {
|
||
scaledDim = 0;
|
||
}
|
||
|
||
if (!font.vertical) {
|
||
scaledDim *= textState.textHScale;
|
||
textState.translateTextMatrix(scaledDim, 0);
|
||
textChunk.width += scaledDim;
|
||
} else {
|
||
textState.translateTextMatrix(0, scaledDim);
|
||
scaledDim = Math.abs(scaledDim);
|
||
textChunk.height += scaledDim;
|
||
}
|
||
|
||
if (scaledDim) {
|
||
// Save the position of the last visible character.
|
||
textChunk.prevTransform = getCurrentTextTransform();
|
||
}
|
||
|
||
const glyphUnicode = glyph.unicode;
|
||
if (saveLastChar(glyphUnicode)) {
|
||
// The two last chars are a non-whitespace followed by a whitespace
|
||
// and then this non-whitespace, so we insert a whitespace here.
|
||
// Replaces all whitespaces with standard spaces (0x20), to avoid
|
||
// alignment issues between the textLayer and the canvas if the text
|
||
// contains e.g. tabs (fixes issue6612.pdf).
|
||
textChunk.str.push(" ");
|
||
}
|
||
textChunk.str.push(glyphUnicode);
|
||
|
||
if (charSpacing) {
|
||
if (!font.vertical) {
|
||
textState.translateTextMatrix(
|
||
charSpacing * textState.textHScale,
|
||
0
|
||
);
|
||
} else {
|
||
textState.translateTextMatrix(0, -charSpacing);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
function appendEOL() {
|
||
resetLastChars();
|
||
if (textContentItem.initialized) {
|
||
textContentItem.hasEOL = true;
|
||
flushTextContentItem();
|
||
} else {
|
||
textContent.items.push({
|
||
str: "",
|
||
dir: "ltr",
|
||
width: 0,
|
||
height: 0,
|
||
transform: getCurrentTextTransform(),
|
||
fontName: textState.loadedName,
|
||
hasEOL: true,
|
||
});
|
||
}
|
||
}
|
||
|
||
function addFakeSpaces(width, transf, textOrientation) {
|
||
if (
|
||
textOrientation * textContentItem.spaceInFlowMin <= width &&
|
||
width <= textOrientation * textContentItem.spaceInFlowMax
|
||
) {
|
||
if (textContentItem.initialized) {
|
||
resetLastChars();
|
||
textContentItem.str.push(" ");
|
||
}
|
||
return false;
|
||
}
|
||
|
||
const fontName = textContentItem.fontName;
|
||
|
||
let height = 0;
|
||
if (textContentItem.vertical) {
|
||
height = width;
|
||
width = 0;
|
||
}
|
||
|
||
flushTextContentItem();
|
||
resetLastChars();
|
||
pushWhitespace({
|
||
width: Math.abs(width),
|
||
height: Math.abs(height),
|
||
transform: transf || getCurrentTextTransform(),
|
||
fontName,
|
||
});
|
||
|
||
return true;
|
||
}
|
||
|
||
function flushTextContentItem() {
|
||
if (!textContentItem.initialized || !textContentItem.str) {
|
||
return;
|
||
}
|
||
|
||
// Do final text scaling.
|
||
if (!textContentItem.vertical) {
|
||
textContentItem.totalWidth +=
|
||
textContentItem.width * textContentItem.textAdvanceScale;
|
||
} else {
|
||
textContentItem.totalHeight +=
|
||
textContentItem.height * textContentItem.textAdvanceScale;
|
||
}
|
||
|
||
textContent.items.push(runBidiTransform(textContentItem));
|
||
textContentItem.initialized = false;
|
||
textContentItem.str.length = 0;
|
||
}
|
||
|
||
function enqueueChunk(batch = false) {
|
||
const length = textContent.items.length;
|
||
if (length === 0) {
|
||
return;
|
||
}
|
||
if (batch && length < TEXT_CHUNK_BATCH_SIZE) {
|
||
return;
|
||
}
|
||
sink.enqueue(textContent, length);
|
||
textContent.items = [];
|
||
textContent.styles = Object.create(null);
|
||
}
|
||
|
||
const timeSlotManager = new TimeSlotManager();
|
||
|
||
return new Promise(function promiseBody(resolve, reject) {
|
||
const next = function (promise) {
|
||
enqueueChunk(/* batch = */ true);
|
||
Promise.all([promise, sink.ready]).then(function () {
|
||
try {
|
||
promiseBody(resolve, reject);
|
||
} catch (ex) {
|
||
reject(ex);
|
||
}
|
||
}, reject);
|
||
};
|
||
task.ensureNotTerminated();
|
||
timeSlotManager.reset();
|
||
|
||
const operation = {};
|
||
let stop,
|
||
args = [];
|
||
while (!(stop = timeSlotManager.check())) {
|
||
// The arguments parsed by read() are not used beyond this loop, so
|
||
// we can reuse the same array on every iteration, thus avoiding
|
||
// unnecessary allocations.
|
||
args.length = 0;
|
||
operation.args = args;
|
||
if (!preprocessor.read(operation)) {
|
||
break;
|
||
}
|
||
|
||
const previousState = textState;
|
||
textState = stateManager.state;
|
||
const fn = operation.fn;
|
||
args = operation.args;
|
||
|
||
switch (fn | 0) {
|
||
case OPS.setFont:
|
||
// Optimization to ignore multiple identical Tf commands.
|
||
var fontNameArg = args[0].name,
|
||
fontSizeArg = args[1];
|
||
if (
|
||
textState.font &&
|
||
fontNameArg === textState.fontName &&
|
||
fontSizeArg === textState.fontSize
|
||
) {
|
||
break;
|
||
}
|
||
|
||
flushTextContentItem();
|
||
textState.fontName = fontNameArg;
|
||
textState.fontSize = fontSizeArg;
|
||
next(handleSetFont(fontNameArg, null));
|
||
return;
|
||
case OPS.setTextRise:
|
||
textState.textRise = args[0];
|
||
break;
|
||
case OPS.setHScale:
|
||
textState.textHScale = args[0] / 100;
|
||
break;
|
||
case OPS.setLeading:
|
||
textState.leading = args[0];
|
||
break;
|
||
case OPS.moveText:
|
||
textState.translateTextLineMatrix(args[0], args[1]);
|
||
textState.textMatrix = textState.textLineMatrix.slice();
|
||
break;
|
||
case OPS.setLeadingMoveText:
|
||
textState.leading = -args[1];
|
||
textState.translateTextLineMatrix(args[0], args[1]);
|
||
textState.textMatrix = textState.textLineMatrix.slice();
|
||
break;
|
||
case OPS.nextLine:
|
||
textState.carriageReturn();
|
||
break;
|
||
case OPS.setTextMatrix:
|
||
textState.setTextMatrix(
|
||
args[0],
|
||
args[1],
|
||
args[2],
|
||
args[3],
|
||
args[4],
|
||
args[5]
|
||
);
|
||
textState.setTextLineMatrix(
|
||
args[0],
|
||
args[1],
|
||
args[2],
|
||
args[3],
|
||
args[4],
|
||
args[5]
|
||
);
|
||
updateAdvanceScale();
|
||
break;
|
||
case OPS.setCharSpacing:
|
||
textState.charSpacing = args[0];
|
||
break;
|
||
case OPS.setWordSpacing:
|
||
textState.wordSpacing = args[0];
|
||
break;
|
||
case OPS.beginText:
|
||
textState.textMatrix = IDENTITY_MATRIX.slice();
|
||
textState.textLineMatrix = IDENTITY_MATRIX.slice();
|
||
break;
|
||
case OPS.showSpacedText:
|
||
if (!stateManager.state.font) {
|
||
self.ensureStateFont(stateManager.state);
|
||
continue;
|
||
}
|
||
|
||
const spaceFactor =
|
||
((textState.font.vertical ? 1 : -1) * textState.fontSize) / 1000;
|
||
const elements = args[0];
|
||
for (let i = 0, ii = elements.length; i < ii; i++) {
|
||
const item = elements[i];
|
||
if (typeof item === "string") {
|
||
showSpacedTextBuffer.push(item);
|
||
} else if (typeof item === "number" && item !== 0) {
|
||
// PDF Specification 5.3.2 states:
|
||
// The number is expressed in thousandths of a unit of text
|
||
// space.
|
||
// This amount is subtracted from the current horizontal or
|
||
// vertical coordinate, depending on the writing mode.
|
||
// In the default coordinate system, a positive adjustment
|
||
// has the effect of moving the next glyph painted either to
|
||
// the left or down by the given amount.
|
||
const str = showSpacedTextBuffer.join("");
|
||
showSpacedTextBuffer.length = 0;
|
||
buildTextContentItem({
|
||
chars: str,
|
||
extraSpacing: item * spaceFactor,
|
||
});
|
||
}
|
||
}
|
||
|
||
if (showSpacedTextBuffer.length > 0) {
|
||
const str = showSpacedTextBuffer.join("");
|
||
showSpacedTextBuffer.length = 0;
|
||
buildTextContentItem({
|
||
chars: str,
|
||
extraSpacing: 0,
|
||
});
|
||
}
|
||
break;
|
||
case OPS.showText:
|
||
if (!stateManager.state.font) {
|
||
self.ensureStateFont(stateManager.state);
|
||
continue;
|
||
}
|
||
buildTextContentItem({
|
||
chars: args[0],
|
||
extraSpacing: 0,
|
||
});
|
||
break;
|
||
case OPS.nextLineShowText:
|
||
if (!stateManager.state.font) {
|
||
self.ensureStateFont(stateManager.state);
|
||
continue;
|
||
}
|
||
textState.carriageReturn();
|
||
buildTextContentItem({
|
||
chars: args[0],
|
||
extraSpacing: 0,
|
||
});
|
||
break;
|
||
case OPS.nextLineSetSpacingShowText:
|
||
if (!stateManager.state.font) {
|
||
self.ensureStateFont(stateManager.state);
|
||
continue;
|
||
}
|
||
textState.wordSpacing = args[0];
|
||
textState.charSpacing = args[1];
|
||
textState.carriageReturn();
|
||
buildTextContentItem({
|
||
chars: args[2],
|
||
extraSpacing: 0,
|
||
});
|
||
break;
|
||
case OPS.paintXObject:
|
||
flushTextContentItem();
|
||
if (!xobjs) {
|
||
xobjs = resources.get("XObject") || Dict.empty;
|
||
}
|
||
|
||
var isValidName = args[0] instanceof Name;
|
||
var name = args[0].name;
|
||
|
||
if (isValidName && emptyXObjectCache.getByName(name)) {
|
||
break;
|
||
}
|
||
|
||
next(
|
||
new Promise(function (resolveXObject, rejectXObject) {
|
||
if (!isValidName) {
|
||
throw new FormatError("XObject must be referred to by name.");
|
||
}
|
||
|
||
let xobj = xobjs.getRaw(name);
|
||
if (xobj instanceof Ref) {
|
||
if (emptyXObjectCache.getByRef(xobj)) {
|
||
resolveXObject();
|
||
return;
|
||
}
|
||
|
||
const globalImage = self.globalImageCache.getData(
|
||
xobj,
|
||
self.pageIndex
|
||
);
|
||
if (globalImage) {
|
||
resolveXObject();
|
||
return;
|
||
}
|
||
|
||
xobj = xref.fetch(xobj);
|
||
}
|
||
|
||
if (!(xobj instanceof BaseStream)) {
|
||
throw new FormatError("XObject should be a stream");
|
||
}
|
||
|
||
const type = xobj.dict.get("Subtype");
|
||
if (!(type instanceof Name)) {
|
||
throw new FormatError("XObject should have a Name subtype");
|
||
}
|
||
|
||
if (type.name !== "Form") {
|
||
emptyXObjectCache.set(name, xobj.dict.objId, true);
|
||
|
||
resolveXObject();
|
||
return;
|
||
}
|
||
|
||
// Use a new `StateManager` to prevent incorrect positioning
|
||
// of textItems *after* the Form XObject, since errors in the
|
||
// data can otherwise prevent `restore` operators from
|
||
// executing.
|
||
// NOTE: Only an issue when `options.ignoreErrors === true`.
|
||
const currentState = stateManager.state.clone();
|
||
const xObjStateManager = new StateManager(currentState);
|
||
|
||
const matrix = xobj.dict.getArray("Matrix");
|
||
if (Array.isArray(matrix) && matrix.length === 6) {
|
||
xObjStateManager.transform(matrix);
|
||
}
|
||
|
||
// Enqueue the `textContent` chunk before parsing the /Form
|
||
// XObject.
|
||
enqueueChunk();
|
||
const sinkWrapper = {
|
||
enqueueInvoked: false,
|
||
|
||
enqueue(chunk, size) {
|
||
this.enqueueInvoked = true;
|
||
sink.enqueue(chunk, size);
|
||
},
|
||
|
||
get desiredSize() {
|
||
return sink.desiredSize;
|
||
},
|
||
|
||
get ready() {
|
||
return sink.ready;
|
||
},
|
||
};
|
||
|
||
self
|
||
.getTextContent({
|
||
stream: xobj,
|
||
task,
|
||
resources: xobj.dict.get("Resources") || resources,
|
||
stateManager: xObjStateManager,
|
||
includeMarkedContent,
|
||
sink: sinkWrapper,
|
||
seenStyles,
|
||
viewBox,
|
||
markedContentData,
|
||
disableNormalization,
|
||
})
|
||
.then(function () {
|
||
if (!sinkWrapper.enqueueInvoked) {
|
||
emptyXObjectCache.set(name, xobj.dict.objId, true);
|
||
}
|
||
resolveXObject();
|
||
}, rejectXObject);
|
||
}).catch(function (reason) {
|
||
if (reason instanceof AbortException) {
|
||
return;
|
||
}
|
||
if (self.options.ignoreErrors) {
|
||
// Error(s) in the XObject -- allow text-extraction to
|
||
// continue.
|
||
warn(`getTextContent - ignoring XObject: "${reason}".`);
|
||
return;
|
||
}
|
||
throw reason;
|
||
})
|
||
);
|
||
return;
|
||
case OPS.setGState:
|
||
isValidName = args[0] instanceof Name;
|
||
name = args[0].name;
|
||
|
||
if (isValidName && emptyGStateCache.getByName(name)) {
|
||
break;
|
||
}
|
||
|
||
next(
|
||
new Promise(function (resolveGState, rejectGState) {
|
||
if (!isValidName) {
|
||
throw new FormatError("GState must be referred to by name.");
|
||
}
|
||
|
||
const extGState = resources.get("ExtGState");
|
||
if (!(extGState instanceof Dict)) {
|
||
throw new FormatError("ExtGState should be a dictionary.");
|
||
}
|
||
|
||
const gState = extGState.get(name);
|
||
// TODO: Attempt to lookup cached GStates by reference as well,
|
||
// if and only if there are PDF documents where doing so
|
||
// would significantly improve performance.
|
||
if (!(gState instanceof Dict)) {
|
||
throw new FormatError("GState should be a dictionary.");
|
||
}
|
||
|
||
const gStateFont = gState.get("Font");
|
||
if (!gStateFont) {
|
||
emptyGStateCache.set(name, gState.objId, true);
|
||
|
||
resolveGState();
|
||
return;
|
||
}
|
||
flushTextContentItem();
|
||
|
||
textState.fontName = null;
|
||
textState.fontSize = gStateFont[1];
|
||
handleSetFont(null, gStateFont[0]).then(
|
||
resolveGState,
|
||
rejectGState
|
||
);
|
||
}).catch(function (reason) {
|
||
if (reason instanceof AbortException) {
|
||
return;
|
||
}
|
||
if (self.options.ignoreErrors) {
|
||
// Error(s) in the ExtGState -- allow text-extraction to
|
||
// continue.
|
||
warn(`getTextContent - ignoring ExtGState: "${reason}".`);
|
||
return;
|
||
}
|
||
throw reason;
|
||
})
|
||
);
|
||
return;
|
||
case OPS.beginMarkedContent:
|
||
flushTextContentItem();
|
||
if (includeMarkedContent) {
|
||
markedContentData.level++;
|
||
|
||
textContent.items.push({
|
||
type: "beginMarkedContent",
|
||
tag: args[0] instanceof Name ? args[0].name : null,
|
||
});
|
||
}
|
||
break;
|
||
case OPS.beginMarkedContentProps:
|
||
flushTextContentItem();
|
||
if (includeMarkedContent) {
|
||
markedContentData.level++;
|
||
|
||
let mcid = null;
|
||
if (args[1] instanceof Dict) {
|
||
mcid = args[1].get("MCID");
|
||
}
|
||
textContent.items.push({
|
||
type: "beginMarkedContentProps",
|
||
id: Number.isInteger(mcid)
|
||
? `${self.idFactory.getPageObjId()}_mc${mcid}`
|
||
: null,
|
||
tag: args[0] instanceof Name ? args[0].name : null,
|
||
});
|
||
}
|
||
break;
|
||
case OPS.endMarkedContent:
|
||
flushTextContentItem();
|
||
if (includeMarkedContent) {
|
||
if (markedContentData.level === 0) {
|
||
// Handle unbalanced beginMarkedContent/endMarkedContent
|
||
// operators (fixes issue15629.pdf).
|
||
break;
|
||
}
|
||
markedContentData.level--;
|
||
|
||
textContent.items.push({
|
||
type: "endMarkedContent",
|
||
});
|
||
}
|
||
break;
|
||
case OPS.restore:
|
||
if (
|
||
previousState &&
|
||
(previousState.font !== textState.font ||
|
||
previousState.fontSize !== textState.fontSize ||
|
||
previousState.fontName !== textState.fontName)
|
||
) {
|
||
flushTextContentItem();
|
||
}
|
||
break;
|
||
} // switch
|
||
if (textContent.items.length >= sink.desiredSize) {
|
||
// Wait for ready, if we reach highWaterMark.
|
||
stop = true;
|
||
break;
|
||
}
|
||
} // while
|
||
if (stop) {
|
||
next(deferred);
|
||
return;
|
||
}
|
||
flushTextContentItem();
|
||
enqueueChunk();
|
||
resolve();
|
||
}).catch(reason => {
|
||
if (reason instanceof AbortException) {
|
||
return;
|
||
}
|
||
if (this.options.ignoreErrors) {
|
||
// Error(s) in the TextContent -- allow text-extraction to continue.
|
||
warn(
|
||
`getTextContent - ignoring errors during "${task.name}" ` +
|
||
`task: "${reason}".`
|
||
);
|
||
|
||
flushTextContentItem();
|
||
enqueueChunk();
|
||
return;
|
||
}
|
||
throw reason;
|
||
});
|
||
}
|
||
|
||
extractDataStructures(dict, baseDict, properties) {
|
||
const xref = this.xref;
|
||
let cidToGidBytes;
|
||
// 9.10.2
|
||
const toUnicodePromise = this.readToUnicode(
|
||
properties.toUnicode || dict.get("ToUnicode") || baseDict.get("ToUnicode")
|
||
);
|
||
|
||
if (properties.composite) {
|
||
// CIDSystemInfo helps to match CID to glyphs
|
||
const cidSystemInfo = dict.get("CIDSystemInfo");
|
||
if (cidSystemInfo instanceof Dict) {
|
||
properties.cidSystemInfo = {
|
||
registry: stringToPDFString(cidSystemInfo.get("Registry")),
|
||
ordering: stringToPDFString(cidSystemInfo.get("Ordering")),
|
||
supplement: cidSystemInfo.get("Supplement"),
|
||
};
|
||
}
|
||
|
||
try {
|
||
const cidToGidMap = dict.get("CIDToGIDMap");
|
||
if (cidToGidMap instanceof BaseStream) {
|
||
cidToGidBytes = cidToGidMap.getBytes();
|
||
}
|
||
} catch (ex) {
|
||
if (!this.options.ignoreErrors) {
|
||
throw ex;
|
||
}
|
||
warn(`extractDataStructures - ignoring CIDToGIDMap data: "${ex}".`);
|
||
}
|
||
}
|
||
|
||
// Based on 9.6.6 of the spec the encoding can come from multiple places
|
||
// and depends on the font type. The base encoding and differences are
|
||
// read here, but the encoding that is actually used is chosen during
|
||
// glyph mapping in the font.
|
||
// TODO: Loading the built in encoding in the font would allow the
|
||
// differences to be merged in here not require us to hold on to it.
|
||
const differences = [];
|
||
let baseEncodingName = null;
|
||
let encoding;
|
||
if (dict.has("Encoding")) {
|
||
encoding = dict.get("Encoding");
|
||
if (encoding instanceof Dict) {
|
||
baseEncodingName = encoding.get("BaseEncoding");
|
||
baseEncodingName =
|
||
baseEncodingName instanceof Name ? baseEncodingName.name : null;
|
||
// Load the differences between the base and original
|
||
if (encoding.has("Differences")) {
|
||
const diffEncoding = encoding.get("Differences");
|
||
let index = 0;
|
||
for (const entry of diffEncoding) {
|
||
const data = xref.fetchIfRef(entry);
|
||
if (typeof data === "number") {
|
||
index = data;
|
||
} else if (data instanceof Name) {
|
||
differences[index++] = data.name;
|
||
} else {
|
||
throw new FormatError(
|
||
`Invalid entry in 'Differences' array: ${data}`
|
||
);
|
||
}
|
||
}
|
||
}
|
||
} else if (encoding instanceof Name) {
|
||
baseEncodingName = encoding.name;
|
||
} else {
|
||
const msg = "Encoding is not a Name nor a Dict";
|
||
|
||
if (!this.options.ignoreErrors) {
|
||
throw new FormatError(msg);
|
||
}
|
||
warn(msg);
|
||
}
|
||
// According to table 114 if the encoding is a named encoding it must be
|
||
// one of these predefined encodings.
|
||
if (
|
||
baseEncodingName !== "MacRomanEncoding" &&
|
||
baseEncodingName !== "MacExpertEncoding" &&
|
||
baseEncodingName !== "WinAnsiEncoding"
|
||
) {
|
||
baseEncodingName = null;
|
||
}
|
||
}
|
||
|
||
const nonEmbeddedFont = !properties.file || properties.isInternalFont,
|
||
isSymbolsFontName = getSymbolsFonts()[properties.name];
|
||
// Ignore an incorrectly specified named encoding for non-embedded
|
||
// symbol fonts (fixes issue16464.pdf).
|
||
if (baseEncodingName && nonEmbeddedFont && isSymbolsFontName) {
|
||
baseEncodingName = null;
|
||
}
|
||
|
||
if (baseEncodingName) {
|
||
properties.defaultEncoding = getEncoding(baseEncodingName);
|
||
} else {
|
||
const isSymbolicFont = !!(properties.flags & FontFlags.Symbolic);
|
||
const isNonsymbolicFont = !!(properties.flags & FontFlags.Nonsymbolic);
|
||
// According to "Table 114" in section "9.6.6.1 General" (under
|
||
// "9.6.6 Character Encoding") of the PDF specification, a Nonsymbolic
|
||
// font should use the `StandardEncoding` if no encoding is specified.
|
||
encoding = StandardEncoding;
|
||
if (properties.type === "TrueType" && !isNonsymbolicFont) {
|
||
encoding = WinAnsiEncoding;
|
||
}
|
||
// The Symbolic attribute can be misused for regular fonts
|
||
// Heuristic: we have to check if the font is a standard one also
|
||
if (isSymbolicFont || isSymbolsFontName) {
|
||
encoding = MacRomanEncoding;
|
||
if (nonEmbeddedFont) {
|
||
if (/Symbol/i.test(properties.name)) {
|
||
encoding = SymbolSetEncoding;
|
||
} else if (/Dingbats/i.test(properties.name)) {
|
||
encoding = ZapfDingbatsEncoding;
|
||
} else if (/Wingdings/i.test(properties.name)) {
|
||
encoding = WinAnsiEncoding;
|
||
}
|
||
}
|
||
}
|
||
properties.defaultEncoding = encoding;
|
||
}
|
||
|
||
properties.differences = differences;
|
||
properties.baseEncodingName = baseEncodingName;
|
||
properties.hasEncoding = !!baseEncodingName || differences.length > 0;
|
||
properties.dict = dict;
|
||
return toUnicodePromise
|
||
.then(readToUnicode => {
|
||
properties.toUnicode = readToUnicode;
|
||
return this.buildToUnicode(properties);
|
||
})
|
||
.then(builtToUnicode => {
|
||
properties.toUnicode = builtToUnicode;
|
||
if (cidToGidBytes) {
|
||
properties.cidToGidMap = this.readCidToGidMap(
|
||
cidToGidBytes,
|
||
builtToUnicode
|
||
);
|
||
}
|
||
return properties;
|
||
});
|
||
}
|
||
|
||
/**
|
||
* @returns {Array}
|
||
* @private
|
||
*/
|
||
_simpleFontToUnicode(properties, forceGlyphs = false) {
|
||
assert(!properties.composite, "Must be a simple font.");
|
||
|
||
const toUnicode = [];
|
||
const encoding = properties.defaultEncoding.slice();
|
||
const baseEncodingName = properties.baseEncodingName;
|
||
// Merge in the differences array.
|
||
const differences = properties.differences;
|
||
for (const charcode in differences) {
|
||
const glyphName = differences[charcode];
|
||
if (glyphName === ".notdef") {
|
||
// Skip .notdef to prevent rendering errors, e.g. boxes appearing
|
||
// where there should be spaces (fixes issue5256.pdf).
|
||
continue;
|
||
}
|
||
encoding[charcode] = glyphName;
|
||
}
|
||
const glyphsUnicodeMap = getGlyphsUnicode();
|
||
for (const charcode in encoding) {
|
||
// a) Map the character code to a character name.
|
||
let glyphName = encoding[charcode];
|
||
if (glyphName === "") {
|
||
continue;
|
||
}
|
||
// b) Look up the character name in the Adobe Glyph List (see the
|
||
// Bibliography) to obtain the corresponding Unicode value.
|
||
let unicode = glyphsUnicodeMap[glyphName];
|
||
if (unicode !== undefined) {
|
||
toUnicode[charcode] = String.fromCharCode(unicode);
|
||
continue;
|
||
}
|
||
// (undocumented) c) Few heuristics to recognize unknown glyphs
|
||
// NOTE: Adobe Reader does not do this step, but OSX Preview does
|
||
let code = 0;
|
||
switch (glyphName[0]) {
|
||
case "G": // Gxx glyph
|
||
if (glyphName.length === 3) {
|
||
code = parseInt(glyphName.substring(1), 16);
|
||
}
|
||
break;
|
||
case "g": // g00xx glyph
|
||
if (glyphName.length === 5) {
|
||
code = parseInt(glyphName.substring(1), 16);
|
||
}
|
||
break;
|
||
case "C": // Cdd{d} glyph
|
||
case "c": // cdd{d} glyph
|
||
if (glyphName.length >= 3 && glyphName.length <= 4) {
|
||
const codeStr = glyphName.substring(1);
|
||
|
||
if (forceGlyphs) {
|
||
code = parseInt(codeStr, 16);
|
||
break;
|
||
}
|
||
// Normally the Cdd{d}/cdd{d} glyphName format will contain
|
||
// regular, i.e. base 10, charCodes (see issue4550.pdf)...
|
||
code = +codeStr;
|
||
|
||
// ... however some PDF generators violate that assumption by
|
||
// containing glyph, i.e. base 16, codes instead.
|
||
// In that case we need to re-parse the *entire* encoding to
|
||
// prevent broken text-selection (fixes issue9655_reduced.pdf).
|
||
if (Number.isNaN(code) && Number.isInteger(parseInt(codeStr, 16))) {
|
||
return this._simpleFontToUnicode(
|
||
properties,
|
||
/* forceGlyphs */ true
|
||
);
|
||
}
|
||
}
|
||
break;
|
||
case "u": // 'uniXXXX'/'uXXXX{XX}' glyphs
|
||
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
|
||
if (unicode !== -1) {
|
||
code = unicode;
|
||
}
|
||
break;
|
||
default:
|
||
// Support (some) non-standard ligatures.
|
||
switch (glyphName) {
|
||
case "f_h":
|
||
case "f_t":
|
||
case "T_h":
|
||
toUnicode[charcode] = glyphName.replaceAll("_", "");
|
||
continue;
|
||
}
|
||
break;
|
||
}
|
||
if (code > 0 && code <= 0x10ffff && Number.isInteger(code)) {
|
||
// If `baseEncodingName` is one the predefined encodings, and `code`
|
||
// equals `charcode`, using the glyph defined in the baseEncoding
|
||
// seems to yield a better `toUnicode` mapping (fixes issue 5070).
|
||
if (baseEncodingName && code === +charcode) {
|
||
const baseEncoding = getEncoding(baseEncodingName);
|
||
if (baseEncoding && (glyphName = baseEncoding[charcode])) {
|
||
toUnicode[charcode] = String.fromCharCode(
|
||
glyphsUnicodeMap[glyphName]
|
||
);
|
||
continue;
|
||
}
|
||
}
|
||
toUnicode[charcode] = String.fromCodePoint(code);
|
||
}
|
||
}
|
||
return toUnicode;
|
||
}
|
||
|
||
/**
|
||
* Builds a char code to unicode map based on section 9.10 of the spec.
|
||
* @param {Object} properties Font properties object.
|
||
* @returns {Promise} A Promise that is resolved with a
|
||
* {ToUnicodeMap|IdentityToUnicodeMap} object.
|
||
*/
|
||
async buildToUnicode(properties) {
|
||
properties.hasIncludedToUnicodeMap = properties.toUnicode?.length > 0;
|
||
|
||
// Section 9.10.2 Mapping Character Codes to Unicode Values
|
||
if (properties.hasIncludedToUnicodeMap) {
|
||
// Some fonts contain incomplete ToUnicode data, causing issues with
|
||
// text-extraction. For simple fonts, containing encoding information,
|
||
// use a fallback ToUnicode map to improve this (fixes issue8229.pdf).
|
||
if (!properties.composite && properties.hasEncoding) {
|
||
properties.fallbackToUnicode = this._simpleFontToUnicode(properties);
|
||
}
|
||
return properties.toUnicode;
|
||
}
|
||
|
||
// According to the spec if the font is a simple font we should only map
|
||
// to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or
|
||
// the differences array only contains adobe standard or symbol set names,
|
||
// in pratice it seems better to always try to create a toUnicode map
|
||
// based of the default encoding.
|
||
if (!properties.composite /* is simple font */) {
|
||
return new ToUnicodeMap(this._simpleFontToUnicode(properties));
|
||
}
|
||
|
||
// If the font is a composite font that uses one of the predefined CMaps
|
||
// listed in Table 118 (except Identity–H and Identity–V) or whose
|
||
// descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or
|
||
// Adobe-Korea1 character collection:
|
||
if (
|
||
properties.composite &&
|
||
((properties.cMap.builtInCMap &&
|
||
!(properties.cMap instanceof IdentityCMap)) ||
|
||
(properties.cidSystemInfo.registry === "Adobe" &&
|
||
(properties.cidSystemInfo.ordering === "GB1" ||
|
||
properties.cidSystemInfo.ordering === "CNS1" ||
|
||
properties.cidSystemInfo.ordering === "Japan1" ||
|
||
properties.cidSystemInfo.ordering === "Korea1")))
|
||
) {
|
||
// Then:
|
||
// a) Map the character code to a character identifier (CID) according
|
||
// to the font’s CMap.
|
||
// b) Obtain the registry and ordering of the character collection used
|
||
// by the font’s CMap (for example, Adobe and Japan1) from its
|
||
// CIDSystemInfo dictionary.
|
||
const { registry, ordering } = properties.cidSystemInfo;
|
||
// c) Construct a second CMap name by concatenating the registry and
|
||
// ordering obtained in step (b) in the format registry–ordering–UCS2
|
||
// (for example, Adobe–Japan1–UCS2).
|
||
const ucs2CMapName = Name.get(`${registry}-${ordering}-UCS2`);
|
||
// d) Obtain the CMap with the name constructed in step (c) (available
|
||
// from the ASN Web site; see the Bibliography).
|
||
const ucs2CMap = await CMapFactory.create({
|
||
encoding: ucs2CMapName,
|
||
fetchBuiltInCMap: this._fetchBuiltInCMapBound,
|
||
useCMap: null,
|
||
});
|
||
const toUnicode = [],
|
||
buf = [];
|
||
properties.cMap.forEach(function (charcode, cid) {
|
||
if (cid > 0xffff) {
|
||
throw new FormatError("Max size of CID is 65,535");
|
||
}
|
||
// e) Map the CID obtained in step (a) according to the CMap
|
||
// obtained in step (d), producing a Unicode value.
|
||
const ucs2 = ucs2CMap.lookup(cid);
|
||
if (ucs2) {
|
||
buf.length = 0;
|
||
// Support multi-byte entries (fixes issue16176.pdf).
|
||
for (let i = 0, ii = ucs2.length; i < ii; i += 2) {
|
||
buf.push((ucs2.charCodeAt(i) << 8) + ucs2.charCodeAt(i + 1));
|
||
}
|
||
toUnicode[charcode] = String.fromCharCode(...buf);
|
||
}
|
||
});
|
||
return new ToUnicodeMap(toUnicode);
|
||
}
|
||
|
||
// The viewer's choice, just use an identity map.
|
||
return new IdentityToUnicodeMap(properties.firstChar, properties.lastChar);
|
||
}
|
||
|
||
readToUnicode(cmapObj) {
|
||
if (!cmapObj) {
|
||
return Promise.resolve(null);
|
||
}
|
||
if (cmapObj instanceof Name) {
|
||
return CMapFactory.create({
|
||
encoding: cmapObj,
|
||
fetchBuiltInCMap: this._fetchBuiltInCMapBound,
|
||
useCMap: null,
|
||
}).then(function (cmap) {
|
||
if (cmap instanceof IdentityCMap) {
|
||
return new IdentityToUnicodeMap(0, 0xffff);
|
||
}
|
||
return new ToUnicodeMap(cmap.getMap());
|
||
});
|
||
} else if (cmapObj instanceof BaseStream) {
|
||
return CMapFactory.create({
|
||
encoding: cmapObj,
|
||
fetchBuiltInCMap: this._fetchBuiltInCMapBound,
|
||
useCMap: null,
|
||
}).then(
|
||
function (cmap) {
|
||
if (cmap instanceof IdentityCMap) {
|
||
return new IdentityToUnicodeMap(0, 0xffff);
|
||
}
|
||
const map = new Array(cmap.length);
|
||
// Convert UTF-16BE
|
||
// NOTE: cmap can be a sparse array, so use forEach instead of
|
||
// `for(;;)` to iterate over all keys.
|
||
cmap.forEach(function (charCode, token) {
|
||
// Some cmaps contain *only* CID characters (fixes issue9367.pdf).
|
||
if (typeof token === "number") {
|
||
map[charCode] = String.fromCodePoint(token);
|
||
return;
|
||
}
|
||
const str = [];
|
||
for (let k = 0; k < token.length; k += 2) {
|
||
const w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
|
||
if ((w1 & 0xf800) !== 0xd800) {
|
||
// w1 < 0xD800 || w1 > 0xDFFF
|
||
str.push(w1);
|
||
continue;
|
||
}
|
||
k += 2;
|
||
const w2 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
|
||
str.push(((w1 & 0x3ff) << 10) + (w2 & 0x3ff) + 0x10000);
|
||
}
|
||
map[charCode] = String.fromCodePoint(...str);
|
||
});
|
||
return new ToUnicodeMap(map);
|
||
},
|
||
reason => {
|
||
if (reason instanceof AbortException) {
|
||
return null;
|
||
}
|
||
if (this.options.ignoreErrors) {
|
||
warn(`readToUnicode - ignoring ToUnicode data: "${reason}".`);
|
||
return null;
|
||
}
|
||
throw reason;
|
||
}
|
||
);
|
||
}
|
||
return Promise.resolve(null);
|
||
}
|
||
|
||
readCidToGidMap(glyphsData, toUnicode) {
|
||
// Extract the encoding from the CIDToGIDMap
|
||
|
||
// Set encoding 0 to later verify the font has an encoding
|
||
const result = [];
|
||
for (let j = 0, jj = glyphsData.length; j < jj; j++) {
|
||
const glyphID = (glyphsData[j++] << 8) | glyphsData[j];
|
||
const code = j >> 1;
|
||
if (glyphID === 0 && !toUnicode.has(code)) {
|
||
continue;
|
||
}
|
||
result[code] = glyphID;
|
||
}
|
||
return result;
|
||
}
|
||
|
||
extractWidths(dict, descriptor, properties) {
|
||
const xref = this.xref;
|
||
let glyphsWidths = [];
|
||
let defaultWidth = 0;
|
||
const glyphsVMetrics = [];
|
||
let defaultVMetrics;
|
||
let i, ii, j, jj, start, code, widths;
|
||
if (properties.composite) {
|
||
defaultWidth = dict.has("DW") ? dict.get("DW") : 1000;
|
||
|
||
widths = dict.get("W");
|
||
if (widths) {
|
||
for (i = 0, ii = widths.length; i < ii; i++) {
|
||
start = xref.fetchIfRef(widths[i++]);
|
||
code = xref.fetchIfRef(widths[i]);
|
||
if (Array.isArray(code)) {
|
||
for (j = 0, jj = code.length; j < jj; j++) {
|
||
glyphsWidths[start++] = xref.fetchIfRef(code[j]);
|
||
}
|
||
} else {
|
||
const width = xref.fetchIfRef(widths[++i]);
|
||
for (j = start; j <= code; j++) {
|
||
glyphsWidths[j] = width;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if (properties.vertical) {
|
||
let vmetrics = dict.getArray("DW2") || [880, -1000];
|
||
defaultVMetrics = [vmetrics[1], defaultWidth * 0.5, vmetrics[0]];
|
||
vmetrics = dict.get("W2");
|
||
if (vmetrics) {
|
||
for (i = 0, ii = vmetrics.length; i < ii; i++) {
|
||
start = xref.fetchIfRef(vmetrics[i++]);
|
||
code = xref.fetchIfRef(vmetrics[i]);
|
||
if (Array.isArray(code)) {
|
||
for (j = 0, jj = code.length; j < jj; j++) {
|
||
glyphsVMetrics[start++] = [
|
||
xref.fetchIfRef(code[j++]),
|
||
xref.fetchIfRef(code[j++]),
|
||
xref.fetchIfRef(code[j]),
|
||
];
|
||
}
|
||
} else {
|
||
const vmetric = [
|
||
xref.fetchIfRef(vmetrics[++i]),
|
||
xref.fetchIfRef(vmetrics[++i]),
|
||
xref.fetchIfRef(vmetrics[++i]),
|
||
];
|
||
for (j = start; j <= code; j++) {
|
||
glyphsVMetrics[j] = vmetric;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
} else {
|
||
const firstChar = properties.firstChar;
|
||
widths = dict.get("Widths");
|
||
if (widths) {
|
||
j = firstChar;
|
||
for (i = 0, ii = widths.length; i < ii; i++) {
|
||
glyphsWidths[j++] = xref.fetchIfRef(widths[i]);
|
||
}
|
||
defaultWidth = parseFloat(descriptor.get("MissingWidth")) || 0;
|
||
} else {
|
||
// Trying get the BaseFont metrics (see comment above).
|
||
const baseFontName = dict.get("BaseFont");
|
||
if (baseFontName instanceof Name) {
|
||
const metrics = this.getBaseFontMetrics(baseFontName.name);
|
||
|
||
glyphsWidths = this.buildCharCodeToWidth(metrics.widths, properties);
|
||
defaultWidth = metrics.defaultWidth;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Heuristic: detection of monospace font by checking all non-zero widths
|
||
let isMonospace = true;
|
||
let firstWidth = defaultWidth;
|
||
for (const glyph in glyphsWidths) {
|
||
const glyphWidth = glyphsWidths[glyph];
|
||
if (!glyphWidth) {
|
||
continue;
|
||
}
|
||
if (!firstWidth) {
|
||
firstWidth = glyphWidth;
|
||
continue;
|
||
}
|
||
if (firstWidth !== glyphWidth) {
|
||
isMonospace = false;
|
||
break;
|
||
}
|
||
}
|
||
if (isMonospace) {
|
||
properties.flags |= FontFlags.FixedPitch;
|
||
} else {
|
||
// Clear the flag.
|
||
properties.flags &= ~FontFlags.FixedPitch;
|
||
}
|
||
|
||
properties.defaultWidth = defaultWidth;
|
||
properties.widths = glyphsWidths;
|
||
properties.defaultVMetrics = defaultVMetrics;
|
||
properties.vmetrics = glyphsVMetrics;
|
||
}
|
||
|
||
isSerifFont(baseFontName) {
|
||
// Simulating descriptor flags attribute
|
||
const fontNameWoStyle = baseFontName.split("-")[0];
|
||
return (
|
||
fontNameWoStyle in getSerifFonts() || /serif/gi.test(fontNameWoStyle)
|
||
);
|
||
}
|
||
|
||
getBaseFontMetrics(name) {
|
||
let defaultWidth = 0;
|
||
let widths = Object.create(null);
|
||
let monospace = false;
|
||
const stdFontMap = getStdFontMap();
|
||
let lookupName = stdFontMap[name] || name;
|
||
const Metrics = getMetrics();
|
||
|
||
if (!(lookupName in Metrics)) {
|
||
// Use default fonts for looking up font metrics if the passed
|
||
// font is not a base font
|
||
lookupName = this.isSerifFont(name) ? "Times-Roman" : "Helvetica";
|
||
}
|
||
const glyphWidths = Metrics[lookupName];
|
||
|
||
if (typeof glyphWidths === "number") {
|
||
defaultWidth = glyphWidths;
|
||
monospace = true;
|
||
} else {
|
||
widths = glyphWidths(); // expand lazy widths array
|
||
}
|
||
|
||
return {
|
||
defaultWidth,
|
||
monospace,
|
||
widths,
|
||
};
|
||
}
|
||
|
||
buildCharCodeToWidth(widthsByGlyphName, properties) {
|
||
const widths = Object.create(null);
|
||
const differences = properties.differences;
|
||
const encoding = properties.defaultEncoding;
|
||
for (let charCode = 0; charCode < 256; charCode++) {
|
||
if (charCode in differences && widthsByGlyphName[differences[charCode]]) {
|
||
widths[charCode] = widthsByGlyphName[differences[charCode]];
|
||
continue;
|
||
}
|
||
if (charCode in encoding && widthsByGlyphName[encoding[charCode]]) {
|
||
widths[charCode] = widthsByGlyphName[encoding[charCode]];
|
||
continue;
|
||
}
|
||
}
|
||
return widths;
|
||
}
|
||
|
||
preEvaluateFont(dict) {
|
||
const baseDict = dict;
|
||
let type = dict.get("Subtype");
|
||
if (!(type instanceof Name)) {
|
||
throw new FormatError("invalid font Subtype");
|
||
}
|
||
|
||
let composite = false;
|
||
let hash, toUnicode;
|
||
if (type.name === "Type0") {
|
||
// If font is a composite
|
||
// - get the descendant font
|
||
// - set the type according to the descendant font
|
||
// - get the FontDescriptor from the descendant font
|
||
const df = dict.get("DescendantFonts");
|
||
if (!df) {
|
||
throw new FormatError("Descendant fonts are not specified");
|
||
}
|
||
dict = Array.isArray(df) ? this.xref.fetchIfRef(df[0]) : df;
|
||
|
||
if (!(dict instanceof Dict)) {
|
||
throw new FormatError("Descendant font is not a dictionary.");
|
||
}
|
||
type = dict.get("Subtype");
|
||
if (!(type instanceof Name)) {
|
||
throw new FormatError("invalid font Subtype");
|
||
}
|
||
composite = true;
|
||
}
|
||
|
||
const firstChar = dict.get("FirstChar") || 0,
|
||
lastChar = dict.get("LastChar") || (composite ? 0xffff : 0xff);
|
||
const descriptor = dict.get("FontDescriptor");
|
||
if (descriptor) {
|
||
hash = new MurmurHash3_64();
|
||
|
||
const encoding = baseDict.getRaw("Encoding");
|
||
if (encoding instanceof Name) {
|
||
hash.update(encoding.name);
|
||
} else if (encoding instanceof Ref) {
|
||
hash.update(encoding.toString());
|
||
} else if (encoding instanceof Dict) {
|
||
for (const entry of encoding.getRawValues()) {
|
||
if (entry instanceof Name) {
|
||
hash.update(entry.name);
|
||
} else if (entry instanceof Ref) {
|
||
hash.update(entry.toString());
|
||
} else if (Array.isArray(entry)) {
|
||
// 'Differences' array (fixes bug1157493.pdf).
|
||
const diffLength = entry.length,
|
||
diffBuf = new Array(diffLength);
|
||
|
||
for (let j = 0; j < diffLength; j++) {
|
||
const diffEntry = entry[j];
|
||
if (diffEntry instanceof Name) {
|
||
diffBuf[j] = diffEntry.name;
|
||
} else if (
|
||
typeof diffEntry === "number" ||
|
||
diffEntry instanceof Ref
|
||
) {
|
||
diffBuf[j] = diffEntry.toString();
|
||
}
|
||
}
|
||
hash.update(diffBuf.join());
|
||
}
|
||
}
|
||
}
|
||
|
||
hash.update(`${firstChar}-${lastChar}`); // Fixes issue10665_reduced.pdf
|
||
|
||
toUnicode = dict.get("ToUnicode") || baseDict.get("ToUnicode");
|
||
if (toUnicode instanceof BaseStream) {
|
||
const stream = toUnicode.str || toUnicode;
|
||
const uint8array = stream.buffer
|
||
? new Uint8Array(stream.buffer.buffer, 0, stream.bufferLength)
|
||
: new Uint8Array(
|
||
stream.bytes.buffer,
|
||
stream.start,
|
||
stream.end - stream.start
|
||
);
|
||
hash.update(uint8array);
|
||
} else if (toUnicode instanceof Name) {
|
||
hash.update(toUnicode.name);
|
||
}
|
||
|
||
const widths = dict.get("Widths") || baseDict.get("Widths");
|
||
if (Array.isArray(widths)) {
|
||
const widthsBuf = [];
|
||
for (const entry of widths) {
|
||
if (typeof entry === "number" || entry instanceof Ref) {
|
||
widthsBuf.push(entry.toString());
|
||
}
|
||
}
|
||
hash.update(widthsBuf.join());
|
||
}
|
||
|
||
if (composite) {
|
||
hash.update("compositeFont");
|
||
|
||
const compositeWidths = dict.get("W") || baseDict.get("W");
|
||
if (Array.isArray(compositeWidths)) {
|
||
const widthsBuf = [];
|
||
for (const entry of compositeWidths) {
|
||
if (typeof entry === "number" || entry instanceof Ref) {
|
||
widthsBuf.push(entry.toString());
|
||
} else if (Array.isArray(entry)) {
|
||
const subWidthsBuf = [];
|
||
for (const element of entry) {
|
||
if (typeof element === "number" || element instanceof Ref) {
|
||
subWidthsBuf.push(element.toString());
|
||
}
|
||
}
|
||
widthsBuf.push(`[${subWidthsBuf.join()}]`);
|
||
}
|
||
}
|
||
hash.update(widthsBuf.join());
|
||
}
|
||
|
||
const cidToGidMap =
|
||
dict.getRaw("CIDToGIDMap") || baseDict.getRaw("CIDToGIDMap");
|
||
if (cidToGidMap instanceof Name) {
|
||
hash.update(cidToGidMap.name);
|
||
} else if (cidToGidMap instanceof Ref) {
|
||
hash.update(cidToGidMap.toString());
|
||
} else if (cidToGidMap instanceof BaseStream) {
|
||
hash.update(cidToGidMap.peekBytes());
|
||
}
|
||
}
|
||
}
|
||
|
||
return {
|
||
descriptor,
|
||
dict,
|
||
baseDict,
|
||
composite,
|
||
type: type.name,
|
||
firstChar,
|
||
lastChar,
|
||
toUnicode,
|
||
hash: hash ? hash.hexdigest() : "",
|
||
};
|
||
}
|
||
|
||
async translateFont({
|
||
descriptor,
|
||
dict,
|
||
baseDict,
|
||
composite,
|
||
type,
|
||
firstChar,
|
||
lastChar,
|
||
toUnicode,
|
||
cssFontInfo,
|
||
}) {
|
||
const isType3Font = type === "Type3";
|
||
let properties;
|
||
|
||
if (!descriptor) {
|
||
if (isType3Font) {
|
||
// FontDescriptor is only required for Type3 fonts when the document
|
||
// is a tagged pdf. Create a barbebones one to get by.
|
||
descriptor = new Dict(null);
|
||
descriptor.set("FontName", Name.get(type));
|
||
descriptor.set("FontBBox", dict.getArray("FontBBox") || [0, 0, 0, 0]);
|
||
} else {
|
||
// Before PDF 1.5 if the font was one of the base 14 fonts, having a
|
||
// FontDescriptor was not required.
|
||
// This case is here for compatibility.
|
||
let baseFontName = dict.get("BaseFont");
|
||
if (!(baseFontName instanceof Name)) {
|
||
throw new FormatError("Base font is not specified");
|
||
}
|
||
|
||
// Using base font name as a font name.
|
||
baseFontName = baseFontName.name.replaceAll(/[,_]/g, "-");
|
||
const metrics = this.getBaseFontMetrics(baseFontName);
|
||
|
||
// Simulating descriptor flags attribute
|
||
const fontNameWoStyle = baseFontName.split("-")[0];
|
||
const flags =
|
||
(this.isSerifFont(fontNameWoStyle) ? FontFlags.Serif : 0) |
|
||
(metrics.monospace ? FontFlags.FixedPitch : 0) |
|
||
(getSymbolsFonts()[fontNameWoStyle]
|
||
? FontFlags.Symbolic
|
||
: FontFlags.Nonsymbolic);
|
||
|
||
properties = {
|
||
type,
|
||
name: baseFontName,
|
||
loadedName: baseDict.loadedName,
|
||
systemFontInfo: null,
|
||
widths: metrics.widths,
|
||
defaultWidth: metrics.defaultWidth,
|
||
isSimulatedFlags: true,
|
||
flags,
|
||
firstChar,
|
||
lastChar,
|
||
toUnicode,
|
||
xHeight: 0,
|
||
capHeight: 0,
|
||
italicAngle: 0,
|
||
isType3Font,
|
||
};
|
||
const widths = dict.get("Widths");
|
||
|
||
const standardFontName = getStandardFontName(baseFontName);
|
||
let file = null;
|
||
if (standardFontName) {
|
||
file = await this.fetchStandardFontData(standardFontName);
|
||
properties.isInternalFont = !!file;
|
||
}
|
||
if (!properties.isInternalFont && this.options.useSystemFonts) {
|
||
properties.systemFontInfo = getFontSubstitution(
|
||
this.systemFontCache,
|
||
this.idFactory,
|
||
this.options.standardFontDataUrl,
|
||
baseFontName,
|
||
standardFontName
|
||
);
|
||
}
|
||
return this.extractDataStructures(dict, dict, properties).then(
|
||
newProperties => {
|
||
if (widths) {
|
||
const glyphWidths = [];
|
||
let j = firstChar;
|
||
for (const width of widths) {
|
||
glyphWidths[j++] = this.xref.fetchIfRef(width);
|
||
}
|
||
newProperties.widths = glyphWidths;
|
||
} else {
|
||
newProperties.widths = this.buildCharCodeToWidth(
|
||
metrics.widths,
|
||
newProperties
|
||
);
|
||
}
|
||
return new Font(baseFontName, file, newProperties);
|
||
}
|
||
);
|
||
}
|
||
}
|
||
|
||
// According to the spec if 'FontDescriptor' is declared, 'FirstChar',
|
||
// 'LastChar' and 'Widths' should exist too, but some PDF encoders seem
|
||
// to ignore this rule when a variant of a standard font is used.
|
||
// TODO Fill the width array depending on which of the base font this is
|
||
// a variant.
|
||
|
||
let fontName = descriptor.get("FontName");
|
||
let baseFont = dict.get("BaseFont");
|
||
// Some bad PDFs have a string as the font name.
|
||
if (typeof fontName === "string") {
|
||
fontName = Name.get(fontName);
|
||
}
|
||
if (typeof baseFont === "string") {
|
||
baseFont = Name.get(baseFont);
|
||
}
|
||
|
||
const fontNameStr = fontName?.name;
|
||
const baseFontStr = baseFont?.name;
|
||
if (!isType3Font && fontNameStr !== baseFontStr) {
|
||
info(
|
||
`The FontDescriptor's FontName is "${fontNameStr}" but ` +
|
||
`should be the same as the Font's BaseFont "${baseFontStr}".`
|
||
);
|
||
// - Workaround for cases where e.g. fontNameStr = 'Arial' and
|
||
// baseFontStr = 'Arial,Bold' (needed when no font file is embedded).
|
||
//
|
||
// - Workaround for cases where e.g. fontNameStr = 'wg09np' and
|
||
// baseFontStr = 'Wingdings-Regular' (fixes issue7454.pdf).
|
||
if (
|
||
fontNameStr &&
|
||
baseFontStr &&
|
||
(baseFontStr.startsWith(fontNameStr) ||
|
||
(!isKnownFontName(fontNameStr) && isKnownFontName(baseFontStr)))
|
||
) {
|
||
fontName = null;
|
||
}
|
||
}
|
||
fontName ||= baseFont;
|
||
|
||
if (!(fontName instanceof Name)) {
|
||
throw new FormatError("invalid font name");
|
||
}
|
||
|
||
let fontFile, subtype, length1, length2, length3;
|
||
try {
|
||
fontFile = descriptor.get("FontFile", "FontFile2", "FontFile3");
|
||
} catch (ex) {
|
||
if (!this.options.ignoreErrors) {
|
||
throw ex;
|
||
}
|
||
warn(`translateFont - fetching "${fontName.name}" font file: "${ex}".`);
|
||
fontFile = new NullStream();
|
||
}
|
||
let isInternalFont = false;
|
||
let glyphScaleFactors = null;
|
||
let systemFontInfo = null;
|
||
if (fontFile) {
|
||
if (fontFile.dict) {
|
||
const subtypeEntry = fontFile.dict.get("Subtype");
|
||
if (subtypeEntry instanceof Name) {
|
||
subtype = subtypeEntry.name;
|
||
}
|
||
length1 = fontFile.dict.get("Length1");
|
||
length2 = fontFile.dict.get("Length2");
|
||
length3 = fontFile.dict.get("Length3");
|
||
}
|
||
} else if (cssFontInfo) {
|
||
// We've a missing XFA font.
|
||
const standardFontName = getXfaFontName(fontName.name);
|
||
if (standardFontName) {
|
||
cssFontInfo.fontFamily = `${cssFontInfo.fontFamily}-PdfJS-XFA`;
|
||
cssFontInfo.metrics = standardFontName.metrics || null;
|
||
glyphScaleFactors = standardFontName.factors || null;
|
||
fontFile = await this.fetchStandardFontData(standardFontName.name);
|
||
isInternalFont = !!fontFile;
|
||
|
||
// We're using a substitution font but for example widths (if any)
|
||
// are related to the glyph positions in the font.
|
||
// So we overwrite everything here to be sure that widths are
|
||
// correct.
|
||
baseDict = dict = getXfaFontDict(fontName.name);
|
||
composite = true;
|
||
}
|
||
} else if (!isType3Font) {
|
||
const standardFontName = getStandardFontName(fontName.name);
|
||
if (standardFontName) {
|
||
fontFile = await this.fetchStandardFontData(standardFontName);
|
||
isInternalFont = !!fontFile;
|
||
}
|
||
if (!isInternalFont && this.options.useSystemFonts) {
|
||
systemFontInfo = getFontSubstitution(
|
||
this.systemFontCache,
|
||
this.idFactory,
|
||
this.options.standardFontDataUrl,
|
||
fontName.name,
|
||
standardFontName
|
||
);
|
||
}
|
||
}
|
||
|
||
properties = {
|
||
type,
|
||
name: fontName.name,
|
||
subtype,
|
||
file: fontFile,
|
||
length1,
|
||
length2,
|
||
length3,
|
||
isInternalFont,
|
||
loadedName: baseDict.loadedName,
|
||
composite,
|
||
fixedPitch: false,
|
||
fontMatrix: dict.getArray("FontMatrix") || FONT_IDENTITY_MATRIX,
|
||
firstChar,
|
||
lastChar,
|
||
toUnicode,
|
||
bbox: descriptor.getArray("FontBBox") || dict.getArray("FontBBox"),
|
||
ascent: descriptor.get("Ascent"),
|
||
descent: descriptor.get("Descent"),
|
||
xHeight: descriptor.get("XHeight") || 0,
|
||
capHeight: descriptor.get("CapHeight") || 0,
|
||
flags: descriptor.get("Flags"),
|
||
italicAngle: descriptor.get("ItalicAngle") || 0,
|
||
isType3Font,
|
||
cssFontInfo,
|
||
scaleFactors: glyphScaleFactors,
|
||
systemFontInfo,
|
||
};
|
||
|
||
if (composite) {
|
||
const cidEncoding = baseDict.get("Encoding");
|
||
if (cidEncoding instanceof Name) {
|
||
properties.cidEncoding = cidEncoding.name;
|
||
}
|
||
const cMap = await CMapFactory.create({
|
||
encoding: cidEncoding,
|
||
fetchBuiltInCMap: this._fetchBuiltInCMapBound,
|
||
useCMap: null,
|
||
});
|
||
properties.cMap = cMap;
|
||
properties.vertical = properties.cMap.vertical;
|
||
}
|
||
|
||
return this.extractDataStructures(dict, baseDict, properties).then(
|
||
newProperties => {
|
||
this.extractWidths(dict, descriptor, newProperties);
|
||
|
||
return new Font(fontName.name, fontFile, newProperties);
|
||
}
|
||
);
|
||
}
|
||
|
||
static buildFontPaths(font, glyphs, handler, evaluatorOptions) {
|
||
function buildPath(fontChar) {
|
||
const glyphName = `${font.loadedName}_path_${fontChar}`;
|
||
try {
|
||
if (font.renderer.hasBuiltPath(fontChar)) {
|
||
return;
|
||
}
|
||
handler.send("commonobj", [
|
||
glyphName,
|
||
"FontPath",
|
||
font.renderer.getPathJs(fontChar),
|
||
]);
|
||
} catch (reason) {
|
||
if (evaluatorOptions.ignoreErrors) {
|
||
warn(`buildFontPaths - ignoring ${glyphName} glyph: "${reason}".`);
|
||
return;
|
||
}
|
||
throw reason;
|
||
}
|
||
}
|
||
|
||
for (const glyph of glyphs) {
|
||
buildPath(glyph.fontChar);
|
||
|
||
// If the glyph has an accent we need to build a path for its
|
||
// fontChar too, otherwise CanvasGraphics_paintChar will fail.
|
||
const accent = glyph.accent;
|
||
if (accent?.fontChar) {
|
||
buildPath(accent.fontChar);
|
||
}
|
||
}
|
||
}
|
||
|
||
static get fallbackFontDict() {
|
||
const dict = new Dict();
|
||
dict.set("BaseFont", Name.get("Helvetica"));
|
||
dict.set("Type", Name.get("FallbackType"));
|
||
dict.set("Subtype", Name.get("FallbackType"));
|
||
dict.set("Encoding", Name.get("WinAnsiEncoding"));
|
||
|
||
return shadow(this, "fallbackFontDict", dict);
|
||
}
|
||
}
|
||
|
||
class TranslatedFont {
|
||
constructor({ loadedName, font, dict, evaluatorOptions }) {
|
||
this.loadedName = loadedName;
|
||
this.font = font;
|
||
this.dict = dict;
|
||
this._evaluatorOptions = evaluatorOptions || DefaultPartialEvaluatorOptions;
|
||
this.type3Loaded = null;
|
||
this.type3Dependencies = font.isType3Font ? new Set() : null;
|
||
this.sent = false;
|
||
}
|
||
|
||
send(handler) {
|
||
if (this.sent) {
|
||
return;
|
||
}
|
||
this.sent = true;
|
||
|
||
handler.send("commonobj", [
|
||
this.loadedName,
|
||
"Font",
|
||
this.font.exportData(this._evaluatorOptions.fontExtraProperties),
|
||
]);
|
||
}
|
||
|
||
fallback(handler) {
|
||
if (!this.font.data) {
|
||
return;
|
||
}
|
||
// When font loading failed, fall back to the built-in font renderer.
|
||
this.font.disableFontFace = true;
|
||
// An arbitrary number of text rendering operators could have been
|
||
// encountered between the point in time when the 'Font' message was sent
|
||
// to the main-thread, and the point in time when the 'FontFallback'
|
||
// message was received on the worker-thread.
|
||
// To ensure that all 'FontPath's are available on the main-thread, when
|
||
// font loading failed, attempt to resend *all* previously parsed glyphs.
|
||
PartialEvaluator.buildFontPaths(
|
||
this.font,
|
||
/* glyphs = */ this.font.glyphCacheValues,
|
||
handler,
|
||
this._evaluatorOptions
|
||
);
|
||
}
|
||
|
||
loadType3Data(evaluator, resources, task) {
|
||
if (this.type3Loaded) {
|
||
return this.type3Loaded;
|
||
}
|
||
if (!this.font.isType3Font) {
|
||
throw new Error("Must be a Type3 font.");
|
||
}
|
||
// When parsing Type3 glyphs, always ignore them if there are errors.
|
||
// Compared to the parsing of e.g. an entire page, it doesn't really
|
||
// make sense to only be able to render a Type3 glyph partially.
|
||
const type3Evaluator = evaluator.clone({ ignoreErrors: false });
|
||
type3Evaluator.parsingType3Font = true;
|
||
// Prevent circular references in Type3 fonts.
|
||
const type3FontRefs = new RefSet(evaluator.type3FontRefs);
|
||
if (this.dict.objId && !type3FontRefs.has(this.dict.objId)) {
|
||
type3FontRefs.put(this.dict.objId);
|
||
}
|
||
type3Evaluator.type3FontRefs = type3FontRefs;
|
||
|
||
const translatedFont = this.font,
|
||
type3Dependencies = this.type3Dependencies;
|
||
let loadCharProcsPromise = Promise.resolve();
|
||
const charProcs = this.dict.get("CharProcs");
|
||
const fontResources = this.dict.get("Resources") || resources;
|
||
const charProcOperatorList = Object.create(null);
|
||
|
||
const fontBBox = Util.normalizeRect(translatedFont.bbox || [0, 0, 0, 0]),
|
||
width = fontBBox[2] - fontBBox[0],
|
||
height = fontBBox[3] - fontBBox[1];
|
||
const fontBBoxSize = Math.hypot(width, height);
|
||
|
||
for (const key of charProcs.getKeys()) {
|
||
loadCharProcsPromise = loadCharProcsPromise.then(() => {
|
||
const glyphStream = charProcs.get(key);
|
||
const operatorList = new OperatorList();
|
||
return type3Evaluator
|
||
.getOperatorList({
|
||
stream: glyphStream,
|
||
task,
|
||
resources: fontResources,
|
||
operatorList,
|
||
})
|
||
.then(() => {
|
||
// According to the PDF specification, section "9.6.5 Type 3 Fonts"
|
||
// and "Table 113":
|
||
// "A glyph description that begins with the d1 operator should
|
||
// not execute any operators that set the colour (or other
|
||
// colour-related parameters) in the graphics state;
|
||
// any use of such operators shall be ignored."
|
||
if (operatorList.fnArray[0] === OPS.setCharWidthAndBounds) {
|
||
this._removeType3ColorOperators(operatorList, fontBBoxSize);
|
||
}
|
||
charProcOperatorList[key] = operatorList.getIR();
|
||
|
||
for (const dependency of operatorList.dependencies) {
|
||
type3Dependencies.add(dependency);
|
||
}
|
||
})
|
||
.catch(function (reason) {
|
||
warn(`Type3 font resource "${key}" is not available.`);
|
||
const dummyOperatorList = new OperatorList();
|
||
charProcOperatorList[key] = dummyOperatorList.getIR();
|
||
});
|
||
});
|
||
}
|
||
this.type3Loaded = loadCharProcsPromise.then(() => {
|
||
translatedFont.charProcOperatorList = charProcOperatorList;
|
||
if (this._bbox) {
|
||
translatedFont.isCharBBox = true;
|
||
translatedFont.bbox = this._bbox;
|
||
}
|
||
});
|
||
return this.type3Loaded;
|
||
}
|
||
|
||
/**
|
||
* @private
|
||
*/
|
||
_removeType3ColorOperators(operatorList, fontBBoxSize = NaN) {
|
||
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
|
||
assert(
|
||
operatorList.fnArray[0] === OPS.setCharWidthAndBounds,
|
||
"Type3 glyph shall start with the d1 operator."
|
||
);
|
||
}
|
||
const charBBox = Util.normalizeRect(operatorList.argsArray[0].slice(2)),
|
||
width = charBBox[2] - charBBox[0],
|
||
height = charBBox[3] - charBBox[1];
|
||
const charBBoxSize = Math.hypot(width, height);
|
||
|
||
if (width === 0 || height === 0) {
|
||
// Skip the d1 operator when its bounds are bogus (fixes issue14953.pdf).
|
||
operatorList.fnArray.splice(0, 1);
|
||
operatorList.argsArray.splice(0, 1);
|
||
} else if (
|
||
fontBBoxSize === 0 ||
|
||
Math.round(charBBoxSize / fontBBoxSize) >= 10
|
||
) {
|
||
// Override the fontBBox when it's undefined/empty, or when it's at least
|
||
// (approximately) one order of magnitude smaller than the charBBox
|
||
// (fixes issue14999_reduced.pdf).
|
||
if (!this._bbox) {
|
||
this._bbox = [Infinity, Infinity, -Infinity, -Infinity];
|
||
}
|
||
this._bbox[0] = Math.min(this._bbox[0], charBBox[0]);
|
||
this._bbox[1] = Math.min(this._bbox[1], charBBox[1]);
|
||
this._bbox[2] = Math.max(this._bbox[2], charBBox[2]);
|
||
this._bbox[3] = Math.max(this._bbox[3], charBBox[3]);
|
||
}
|
||
|
||
let i = 0,
|
||
ii = operatorList.length;
|
||
while (i < ii) {
|
||
switch (operatorList.fnArray[i]) {
|
||
case OPS.setCharWidthAndBounds:
|
||
break; // Handled above.
|
||
case OPS.setStrokeColorSpace:
|
||
case OPS.setFillColorSpace:
|
||
case OPS.setStrokeColor:
|
||
case OPS.setStrokeColorN:
|
||
case OPS.setFillColor:
|
||
case OPS.setFillColorN:
|
||
case OPS.setStrokeGray:
|
||
case OPS.setFillGray:
|
||
case OPS.setStrokeRGBColor:
|
||
case OPS.setFillRGBColor:
|
||
case OPS.setStrokeCMYKColor:
|
||
case OPS.setFillCMYKColor:
|
||
case OPS.shadingFill:
|
||
case OPS.setRenderingIntent:
|
||
operatorList.fnArray.splice(i, 1);
|
||
operatorList.argsArray.splice(i, 1);
|
||
ii--;
|
||
continue;
|
||
|
||
case OPS.setGState:
|
||
const [gStateObj] = operatorList.argsArray[i];
|
||
let j = 0,
|
||
jj = gStateObj.length;
|
||
while (j < jj) {
|
||
const [gStateKey] = gStateObj[j];
|
||
switch (gStateKey) {
|
||
case "TR":
|
||
case "TR2":
|
||
case "HT":
|
||
case "BG":
|
||
case "BG2":
|
||
case "UCR":
|
||
case "UCR2":
|
||
gStateObj.splice(j, 1);
|
||
jj--;
|
||
continue;
|
||
}
|
||
j++;
|
||
}
|
||
break;
|
||
}
|
||
i++;
|
||
}
|
||
}
|
||
}
|
||
|
||
class StateManager {
|
||
constructor(initialState = new EvalState()) {
|
||
this.state = initialState;
|
||
this.stateStack = [];
|
||
}
|
||
|
||
save() {
|
||
const old = this.state;
|
||
this.stateStack.push(this.state);
|
||
this.state = old.clone();
|
||
}
|
||
|
||
restore() {
|
||
const prev = this.stateStack.pop();
|
||
if (prev) {
|
||
this.state = prev;
|
||
}
|
||
}
|
||
|
||
transform(args) {
|
||
this.state.ctm = Util.transform(this.state.ctm, args);
|
||
}
|
||
}
|
||
|
||
class TextState {
|
||
constructor() {
|
||
this.ctm = new Float32Array(IDENTITY_MATRIX);
|
||
this.fontName = null;
|
||
this.fontSize = 0;
|
||
this.loadedName = null;
|
||
this.font = null;
|
||
this.fontMatrix = FONT_IDENTITY_MATRIX;
|
||
this.textMatrix = IDENTITY_MATRIX.slice();
|
||
this.textLineMatrix = IDENTITY_MATRIX.slice();
|
||
this.charSpacing = 0;
|
||
this.wordSpacing = 0;
|
||
this.leading = 0;
|
||
this.textHScale = 1;
|
||
this.textRise = 0;
|
||
}
|
||
|
||
setTextMatrix(a, b, c, d, e, f) {
|
||
const m = this.textMatrix;
|
||
m[0] = a;
|
||
m[1] = b;
|
||
m[2] = c;
|
||
m[3] = d;
|
||
m[4] = e;
|
||
m[5] = f;
|
||
}
|
||
|
||
setTextLineMatrix(a, b, c, d, e, f) {
|
||
const m = this.textLineMatrix;
|
||
m[0] = a;
|
||
m[1] = b;
|
||
m[2] = c;
|
||
m[3] = d;
|
||
m[4] = e;
|
||
m[5] = f;
|
||
}
|
||
|
||
translateTextMatrix(x, y) {
|
||
const m = this.textMatrix;
|
||
m[4] = m[0] * x + m[2] * y + m[4];
|
||
m[5] = m[1] * x + m[3] * y + m[5];
|
||
}
|
||
|
||
translateTextLineMatrix(x, y) {
|
||
const m = this.textLineMatrix;
|
||
m[4] = m[0] * x + m[2] * y + m[4];
|
||
m[5] = m[1] * x + m[3] * y + m[5];
|
||
}
|
||
|
||
carriageReturn() {
|
||
this.translateTextLineMatrix(0, -this.leading);
|
||
this.textMatrix = this.textLineMatrix.slice();
|
||
}
|
||
|
||
clone() {
|
||
const clone = Object.create(this);
|
||
clone.textMatrix = this.textMatrix.slice();
|
||
clone.textLineMatrix = this.textLineMatrix.slice();
|
||
clone.fontMatrix = this.fontMatrix.slice();
|
||
return clone;
|
||
}
|
||
}
|
||
|
||
class EvalState {
|
||
constructor() {
|
||
this.ctm = new Float32Array(IDENTITY_MATRIX);
|
||
this.font = null;
|
||
this.textRenderingMode = TextRenderingMode.FILL;
|
||
this.fillColorSpace = ColorSpace.singletons.gray;
|
||
this.strokeColorSpace = ColorSpace.singletons.gray;
|
||
}
|
||
|
||
clone() {
|
||
return Object.create(this);
|
||
}
|
||
}
|
||
|
||
class EvaluatorPreprocessor {
|
||
static get opMap() {
|
||
// Specifies properties for each command
|
||
//
|
||
// If variableArgs === true: [0, `numArgs`] expected
|
||
// If variableArgs === false: exactly `numArgs` expected
|
||
return shadow(this, "opMap", {
|
||
// Graphic state
|
||
w: { id: OPS.setLineWidth, numArgs: 1, variableArgs: false },
|
||
J: { id: OPS.setLineCap, numArgs: 1, variableArgs: false },
|
||
j: { id: OPS.setLineJoin, numArgs: 1, variableArgs: false },
|
||
M: { id: OPS.setMiterLimit, numArgs: 1, variableArgs: false },
|
||
d: { id: OPS.setDash, numArgs: 2, variableArgs: false },
|
||
ri: { id: OPS.setRenderingIntent, numArgs: 1, variableArgs: false },
|
||
i: { id: OPS.setFlatness, numArgs: 1, variableArgs: false },
|
||
gs: { id: OPS.setGState, numArgs: 1, variableArgs: false },
|
||
q: { id: OPS.save, numArgs: 0, variableArgs: false },
|
||
Q: { id: OPS.restore, numArgs: 0, variableArgs: false },
|
||
cm: { id: OPS.transform, numArgs: 6, variableArgs: false },
|
||
|
||
// Path
|
||
m: { id: OPS.moveTo, numArgs: 2, variableArgs: false },
|
||
l: { id: OPS.lineTo, numArgs: 2, variableArgs: false },
|
||
c: { id: OPS.curveTo, numArgs: 6, variableArgs: false },
|
||
v: { id: OPS.curveTo2, numArgs: 4, variableArgs: false },
|
||
y: { id: OPS.curveTo3, numArgs: 4, variableArgs: false },
|
||
h: { id: OPS.closePath, numArgs: 0, variableArgs: false },
|
||
re: { id: OPS.rectangle, numArgs: 4, variableArgs: false },
|
||
S: { id: OPS.stroke, numArgs: 0, variableArgs: false },
|
||
s: { id: OPS.closeStroke, numArgs: 0, variableArgs: false },
|
||
f: { id: OPS.fill, numArgs: 0, variableArgs: false },
|
||
F: { id: OPS.fill, numArgs: 0, variableArgs: false },
|
||
"f*": { id: OPS.eoFill, numArgs: 0, variableArgs: false },
|
||
B: { id: OPS.fillStroke, numArgs: 0, variableArgs: false },
|
||
"B*": { id: OPS.eoFillStroke, numArgs: 0, variableArgs: false },
|
||
b: { id: OPS.closeFillStroke, numArgs: 0, variableArgs: false },
|
||
"b*": { id: OPS.closeEOFillStroke, numArgs: 0, variableArgs: false },
|
||
n: { id: OPS.endPath, numArgs: 0, variableArgs: false },
|
||
|
||
// Clipping
|
||
W: { id: OPS.clip, numArgs: 0, variableArgs: false },
|
||
"W*": { id: OPS.eoClip, numArgs: 0, variableArgs: false },
|
||
|
||
// Text
|
||
BT: { id: OPS.beginText, numArgs: 0, variableArgs: false },
|
||
ET: { id: OPS.endText, numArgs: 0, variableArgs: false },
|
||
Tc: { id: OPS.setCharSpacing, numArgs: 1, variableArgs: false },
|
||
Tw: { id: OPS.setWordSpacing, numArgs: 1, variableArgs: false },
|
||
Tz: { id: OPS.setHScale, numArgs: 1, variableArgs: false },
|
||
TL: { id: OPS.setLeading, numArgs: 1, variableArgs: false },
|
||
Tf: { id: OPS.setFont, numArgs: 2, variableArgs: false },
|
||
Tr: { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false },
|
||
Ts: { id: OPS.setTextRise, numArgs: 1, variableArgs: false },
|
||
Td: { id: OPS.moveText, numArgs: 2, variableArgs: false },
|
||
TD: { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false },
|
||
Tm: { id: OPS.setTextMatrix, numArgs: 6, variableArgs: false },
|
||
"T*": { id: OPS.nextLine, numArgs: 0, variableArgs: false },
|
||
Tj: { id: OPS.showText, numArgs: 1, variableArgs: false },
|
||
TJ: { id: OPS.showSpacedText, numArgs: 1, variableArgs: false },
|
||
"'": { id: OPS.nextLineShowText, numArgs: 1, variableArgs: false },
|
||
'"': {
|
||
id: OPS.nextLineSetSpacingShowText,
|
||
numArgs: 3,
|
||
variableArgs: false,
|
||
},
|
||
|
||
// Type3 fonts
|
||
d0: { id: OPS.setCharWidth, numArgs: 2, variableArgs: false },
|
||
d1: {
|
||
id: OPS.setCharWidthAndBounds,
|
||
numArgs: 6,
|
||
variableArgs: false,
|
||
},
|
||
|
||
// Color
|
||
CS: { id: OPS.setStrokeColorSpace, numArgs: 1, variableArgs: false },
|
||
cs: { id: OPS.setFillColorSpace, numArgs: 1, variableArgs: false },
|
||
SC: { id: OPS.setStrokeColor, numArgs: 4, variableArgs: true },
|
||
SCN: { id: OPS.setStrokeColorN, numArgs: 33, variableArgs: true },
|
||
sc: { id: OPS.setFillColor, numArgs: 4, variableArgs: true },
|
||
scn: { id: OPS.setFillColorN, numArgs: 33, variableArgs: true },
|
||
G: { id: OPS.setStrokeGray, numArgs: 1, variableArgs: false },
|
||
g: { id: OPS.setFillGray, numArgs: 1, variableArgs: false },
|
||
RG: { id: OPS.setStrokeRGBColor, numArgs: 3, variableArgs: false },
|
||
rg: { id: OPS.setFillRGBColor, numArgs: 3, variableArgs: false },
|
||
K: { id: OPS.setStrokeCMYKColor, numArgs: 4, variableArgs: false },
|
||
k: { id: OPS.setFillCMYKColor, numArgs: 4, variableArgs: false },
|
||
|
||
// Shading
|
||
sh: { id: OPS.shadingFill, numArgs: 1, variableArgs: false },
|
||
|
||
// Images
|
||
BI: { id: OPS.beginInlineImage, numArgs: 0, variableArgs: false },
|
||
ID: { id: OPS.beginImageData, numArgs: 0, variableArgs: false },
|
||
EI: { id: OPS.endInlineImage, numArgs: 1, variableArgs: false },
|
||
|
||
// XObjects
|
||
Do: { id: OPS.paintXObject, numArgs: 1, variableArgs: false },
|
||
MP: { id: OPS.markPoint, numArgs: 1, variableArgs: false },
|
||
DP: { id: OPS.markPointProps, numArgs: 2, variableArgs: false },
|
||
BMC: { id: OPS.beginMarkedContent, numArgs: 1, variableArgs: false },
|
||
BDC: {
|
||
id: OPS.beginMarkedContentProps,
|
||
numArgs: 2,
|
||
variableArgs: false,
|
||
},
|
||
EMC: { id: OPS.endMarkedContent, numArgs: 0, variableArgs: false },
|
||
|
||
// Compatibility
|
||
BX: { id: OPS.beginCompat, numArgs: 0, variableArgs: false },
|
||
EX: { id: OPS.endCompat, numArgs: 0, variableArgs: false },
|
||
|
||
// (reserved partial commands for the lexer)
|
||
BM: null,
|
||
BD: null,
|
||
true: null,
|
||
fa: null,
|
||
fal: null,
|
||
fals: null,
|
||
false: null,
|
||
nu: null,
|
||
nul: null,
|
||
null: null,
|
||
});
|
||
}
|
||
|
||
static MAX_INVALID_PATH_OPS = 10;
|
||
|
||
constructor(stream, xref, stateManager = new StateManager()) {
|
||
// TODO(mduan): pass array of knownCommands rather than this.opMap
|
||
// dictionary
|
||
this.parser = new Parser({
|
||
lexer: new Lexer(stream, EvaluatorPreprocessor.opMap),
|
||
xref,
|
||
});
|
||
this.stateManager = stateManager;
|
||
this.nonProcessedArgs = [];
|
||
this._isPathOp = false;
|
||
this._numInvalidPathOPS = 0;
|
||
}
|
||
|
||
get savedStatesDepth() {
|
||
return this.stateManager.stateStack.length;
|
||
}
|
||
|
||
// |operation| is an object with two fields:
|
||
//
|
||
// - |fn| is an out param.
|
||
//
|
||
// - |args| is an inout param. On entry, it should have one of two values.
|
||
//
|
||
// - An empty array. This indicates that the caller is providing the
|
||
// array in which the args will be stored in. The caller should use
|
||
// this value if it can reuse a single array for each call to read().
|
||
//
|
||
// - |null|. This indicates that the caller needs this function to create
|
||
// the array in which any args are stored in. If there are zero args,
|
||
// this function will leave |operation.args| as |null| (thus avoiding
|
||
// allocations that would occur if we used an empty array to represent
|
||
// zero arguments). Otherwise, it will replace |null| with a new array
|
||
// containing the arguments. The caller should use this value if it
|
||
// cannot reuse an array for each call to read().
|
||
//
|
||
// These two modes are present because this function is very hot and so
|
||
// avoiding allocations where possible is worthwhile.
|
||
//
|
||
read(operation) {
|
||
let args = operation.args;
|
||
while (true) {
|
||
const obj = this.parser.getObj();
|
||
if (obj instanceof Cmd) {
|
||
const cmd = obj.cmd;
|
||
// Check that the command is valid
|
||
const opSpec = EvaluatorPreprocessor.opMap[cmd];
|
||
if (!opSpec) {
|
||
warn(`Unknown command "${cmd}".`);
|
||
continue;
|
||
}
|
||
|
||
const fn = opSpec.id;
|
||
const numArgs = opSpec.numArgs;
|
||
let argsLength = args !== null ? args.length : 0;
|
||
|
||
// If the *previous* command wasn't a path operator, reset the heuristic
|
||
// used with incomplete path operators below (fixes issue14917.pdf).
|
||
if (!this._isPathOp) {
|
||
this._numInvalidPathOPS = 0;
|
||
}
|
||
this._isPathOp = fn >= OPS.moveTo && fn <= OPS.endPath;
|
||
|
||
if (!opSpec.variableArgs) {
|
||
// Postscript commands can be nested, e.g. /F2 /GS2 gs 5.711 Tf
|
||
if (argsLength !== numArgs) {
|
||
const nonProcessedArgs = this.nonProcessedArgs;
|
||
while (argsLength > numArgs) {
|
||
nonProcessedArgs.push(args.shift());
|
||
argsLength--;
|
||
}
|
||
while (argsLength < numArgs && nonProcessedArgs.length !== 0) {
|
||
if (args === null) {
|
||
args = [];
|
||
}
|
||
args.unshift(nonProcessedArgs.pop());
|
||
argsLength++;
|
||
}
|
||
}
|
||
|
||
if (argsLength < numArgs) {
|
||
const partialMsg =
|
||
`command ${cmd}: expected ${numArgs} args, ` +
|
||
`but received ${argsLength} args.`;
|
||
|
||
// Incomplete path operators, in particular, can result in fairly
|
||
// chaotic rendering artifacts. Hence the following heuristics is
|
||
// used to error, rather than just warn, once a number of invalid
|
||
// path operators have been encountered (fixes bug1443140.pdf).
|
||
if (
|
||
this._isPathOp &&
|
||
++this._numInvalidPathOPS >
|
||
EvaluatorPreprocessor.MAX_INVALID_PATH_OPS
|
||
) {
|
||
throw new FormatError(`Invalid ${partialMsg}`);
|
||
}
|
||
// If we receive too few arguments, it's not possible to execute
|
||
// the command, hence we skip the command.
|
||
warn(`Skipping ${partialMsg}`);
|
||
if (args !== null) {
|
||
args.length = 0;
|
||
}
|
||
continue;
|
||
}
|
||
} else if (argsLength > numArgs) {
|
||
info(
|
||
`Command ${cmd}: expected [0, ${numArgs}] args, ` +
|
||
`but received ${argsLength} args.`
|
||
);
|
||
}
|
||
|
||
// TODO figure out how to type-check vararg functions
|
||
this.preprocessCommand(fn, args);
|
||
|
||
operation.fn = fn;
|
||
operation.args = args;
|
||
return true;
|
||
}
|
||
if (obj === EOF) {
|
||
return false; // no more commands
|
||
}
|
||
// argument
|
||
if (obj !== null) {
|
||
if (args === null) {
|
||
args = [];
|
||
}
|
||
args.push(obj);
|
||
if (args.length > 33) {
|
||
throw new FormatError("Too many arguments");
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
preprocessCommand(fn, args) {
|
||
switch (fn | 0) {
|
||
case OPS.save:
|
||
this.stateManager.save();
|
||
break;
|
||
case OPS.restore:
|
||
this.stateManager.restore();
|
||
break;
|
||
case OPS.transform:
|
||
this.stateManager.transform(args);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
export { EvaluatorPreprocessor, PartialEvaluator };
|