6d4d402a78
Given that this helper function is only used on the worker-thread, there's no reason to duplicate it in both of the *built* `pdf.js` and `pdf.worker.js` files.
649 lines
17 KiB
JavaScript
649 lines
17 KiB
JavaScript
/* Copyright 2019 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
import {
|
|
AnnotationEditorPrefix,
|
|
assert,
|
|
BaseException,
|
|
objectSize,
|
|
stringToPDFString,
|
|
warn,
|
|
} from "../shared/util.js";
|
|
import { Dict, isName, Ref, RefSet } from "./primitives.js";
|
|
import { BaseStream } from "./base_stream.js";
|
|
|
|
const PDF_VERSION_REGEXP = /^[1-9]\.\d$/;
|
|
|
|
function getLookupTableFactory(initializer) {
|
|
let lookup;
|
|
return function () {
|
|
if (initializer) {
|
|
lookup = Object.create(null);
|
|
initializer(lookup);
|
|
initializer = null;
|
|
}
|
|
return lookup;
|
|
};
|
|
}
|
|
|
|
function getArrayLookupTableFactory(initializer) {
|
|
let lookup;
|
|
return function () {
|
|
if (initializer) {
|
|
let arr = initializer();
|
|
initializer = null;
|
|
lookup = Object.create(null);
|
|
for (let i = 0, ii = arr.length; i < ii; i += 2) {
|
|
lookup[arr[i]] = arr[i + 1];
|
|
}
|
|
arr = null;
|
|
}
|
|
return lookup;
|
|
};
|
|
}
|
|
|
|
class MissingDataException extends BaseException {
|
|
constructor(begin, end) {
|
|
super(`Missing data [${begin}, ${end})`, "MissingDataException");
|
|
this.begin = begin;
|
|
this.end = end;
|
|
}
|
|
}
|
|
|
|
class ParserEOFException extends BaseException {
|
|
constructor(msg) {
|
|
super(msg, "ParserEOFException");
|
|
}
|
|
}
|
|
|
|
class XRefEntryException extends BaseException {
|
|
constructor(msg) {
|
|
super(msg, "XRefEntryException");
|
|
}
|
|
}
|
|
|
|
class XRefParseException extends BaseException {
|
|
constructor(msg) {
|
|
super(msg, "XRefParseException");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Combines multiple ArrayBuffers into a single Uint8Array.
|
|
* @param {Array<ArrayBuffer>} arr - An array of ArrayBuffers.
|
|
* @returns {Uint8Array}
|
|
*/
|
|
function arrayBuffersToBytes(arr) {
|
|
if (
|
|
typeof PDFJSDev === "undefined" ||
|
|
PDFJSDev.test("!PRODUCTION || TESTING")
|
|
) {
|
|
for (const item of arr) {
|
|
assert(
|
|
item instanceof ArrayBuffer,
|
|
"arrayBuffersToBytes - expected an ArrayBuffer."
|
|
);
|
|
}
|
|
}
|
|
const length = arr.length;
|
|
if (length === 0) {
|
|
return new Uint8Array(0);
|
|
}
|
|
if (length === 1) {
|
|
return new Uint8Array(arr[0]);
|
|
}
|
|
let dataLength = 0;
|
|
for (let i = 0; i < length; i++) {
|
|
dataLength += arr[i].byteLength;
|
|
}
|
|
const data = new Uint8Array(dataLength);
|
|
let pos = 0;
|
|
for (let i = 0; i < length; i++) {
|
|
const item = new Uint8Array(arr[i]);
|
|
data.set(item, pos);
|
|
pos += item.byteLength;
|
|
}
|
|
return data;
|
|
}
|
|
|
|
/**
|
|
* Get the value of an inheritable property.
|
|
*
|
|
* If the PDF specification explicitly lists a property in a dictionary as
|
|
* inheritable, then the value of the property may be present in the dictionary
|
|
* itself or in one or more parents of the dictionary.
|
|
*
|
|
* If the key is not found in the tree, `undefined` is returned. Otherwise,
|
|
* the value for the key is returned or, if `stopWhenFound` is `false`, a list
|
|
* of values is returned.
|
|
*
|
|
* @param {Dict} dict - Dictionary from where to start the traversal.
|
|
* @param {string} key - The key of the property to find the value for.
|
|
* @param {boolean} getArray - Whether or not the value should be fetched as an
|
|
* array. The default value is `false`.
|
|
* @param {boolean} stopWhenFound - Whether or not to stop the traversal when
|
|
* the key is found. If set to `false`, we always walk up the entire parent
|
|
* chain, for example to be able to find `\Resources` placed on multiple
|
|
* levels of the tree. The default value is `true`.
|
|
*/
|
|
function getInheritableProperty({
|
|
dict,
|
|
key,
|
|
getArray = false,
|
|
stopWhenFound = true,
|
|
}) {
|
|
let values;
|
|
const visited = new RefSet();
|
|
|
|
while (dict instanceof Dict && !(dict.objId && visited.has(dict.objId))) {
|
|
if (dict.objId) {
|
|
visited.put(dict.objId);
|
|
}
|
|
const value = getArray ? dict.getArray(key) : dict.get(key);
|
|
if (value !== undefined) {
|
|
if (stopWhenFound) {
|
|
return value;
|
|
}
|
|
if (!values) {
|
|
values = [];
|
|
}
|
|
values.push(value);
|
|
}
|
|
dict = dict.get("Parent");
|
|
}
|
|
return values;
|
|
}
|
|
|
|
// prettier-ignore
|
|
const ROMAN_NUMBER_MAP = [
|
|
"", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM",
|
|
"", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC",
|
|
"", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX"
|
|
];
|
|
|
|
/**
|
|
* Converts positive integers to (upper case) Roman numerals.
|
|
* @param {number} number - The number that should be converted.
|
|
* @param {boolean} lowerCase - Indicates if the result should be converted
|
|
* to lower case letters. The default value is `false`.
|
|
* @returns {string} The resulting Roman number.
|
|
*/
|
|
function toRomanNumerals(number, lowerCase = false) {
|
|
assert(
|
|
Number.isInteger(number) && number > 0,
|
|
"The number should be a positive integer."
|
|
);
|
|
const romanBuf = [];
|
|
let pos;
|
|
// Thousands
|
|
while (number >= 1000) {
|
|
number -= 1000;
|
|
romanBuf.push("M");
|
|
}
|
|
// Hundreds
|
|
pos = (number / 100) | 0;
|
|
number %= 100;
|
|
romanBuf.push(ROMAN_NUMBER_MAP[pos]);
|
|
// Tens
|
|
pos = (number / 10) | 0;
|
|
number %= 10;
|
|
romanBuf.push(ROMAN_NUMBER_MAP[10 + pos]);
|
|
// Ones
|
|
romanBuf.push(ROMAN_NUMBER_MAP[20 + number]); // eslint-disable-line unicorn/no-array-push-push
|
|
|
|
const romanStr = romanBuf.join("");
|
|
return lowerCase ? romanStr.toLowerCase() : romanStr;
|
|
}
|
|
|
|
// Calculate the base 2 logarithm of the number `x`. This differs from the
|
|
// native function in the sense that it returns the ceiling value and that it
|
|
// returns 0 instead of `Infinity`/`NaN` for `x` values smaller than/equal to 0.
|
|
function log2(x) {
|
|
if (x <= 0) {
|
|
return 0;
|
|
}
|
|
return Math.ceil(Math.log2(x));
|
|
}
|
|
|
|
function readInt8(data, offset) {
|
|
return (data[offset] << 24) >> 24;
|
|
}
|
|
|
|
function readUint16(data, offset) {
|
|
return (data[offset] << 8) | data[offset + 1];
|
|
}
|
|
|
|
function readUint32(data, offset) {
|
|
return (
|
|
((data[offset] << 24) |
|
|
(data[offset + 1] << 16) |
|
|
(data[offset + 2] << 8) |
|
|
data[offset + 3]) >>>
|
|
0
|
|
);
|
|
}
|
|
|
|
// Checks if ch is one of the following characters: SPACE, TAB, CR or LF.
|
|
function isWhiteSpace(ch) {
|
|
return ch === 0x20 || ch === 0x09 || ch === 0x0d || ch === 0x0a;
|
|
}
|
|
|
|
/**
|
|
* AcroForm field names use an array like notation to refer to
|
|
* repeated XFA elements e.g. foo.bar[nnn].
|
|
* see: XFA Spec Chapter 3 - Repeated Elements
|
|
*
|
|
* @param {string} path - XFA path name.
|
|
* @returns {Array} - Array of Objects with the name and pos of
|
|
* each part of the path.
|
|
*/
|
|
function parseXFAPath(path) {
|
|
const positionPattern = /(.+)\[(\d+)\]$/;
|
|
return path.split(".").map(component => {
|
|
const m = component.match(positionPattern);
|
|
if (m) {
|
|
return { name: m[1], pos: parseInt(m[2], 10) };
|
|
}
|
|
return { name: component, pos: 0 };
|
|
});
|
|
}
|
|
|
|
function escapePDFName(str) {
|
|
const buffer = [];
|
|
let start = 0;
|
|
for (let i = 0, ii = str.length; i < ii; i++) {
|
|
const char = str.charCodeAt(i);
|
|
// Whitespace or delimiters aren't regular chars, so escape them.
|
|
if (
|
|
char < 0x21 ||
|
|
char > 0x7e ||
|
|
char === 0x23 /* # */ ||
|
|
char === 0x28 /* ( */ ||
|
|
char === 0x29 /* ) */ ||
|
|
char === 0x3c /* < */ ||
|
|
char === 0x3e /* > */ ||
|
|
char === 0x5b /* [ */ ||
|
|
char === 0x5d /* ] */ ||
|
|
char === 0x7b /* { */ ||
|
|
char === 0x7d /* } */ ||
|
|
char === 0x2f /* / */ ||
|
|
char === 0x25 /* % */
|
|
) {
|
|
if (start < i) {
|
|
buffer.push(str.substring(start, i));
|
|
}
|
|
buffer.push(`#${char.toString(16)}`);
|
|
start = i + 1;
|
|
}
|
|
}
|
|
|
|
if (buffer.length === 0) {
|
|
return str;
|
|
}
|
|
|
|
if (start < str.length) {
|
|
buffer.push(str.substring(start, str.length));
|
|
}
|
|
|
|
return buffer.join("");
|
|
}
|
|
|
|
// Replace "(", ")", "\n", "\r" and "\" by "\(", "\)", "\\n", "\\r" and "\\"
|
|
// in order to write it in a PDF file.
|
|
function escapeString(str) {
|
|
return str.replace(/([()\\\n\r])/g, match => {
|
|
if (match === "\n") {
|
|
return "\\n";
|
|
} else if (match === "\r") {
|
|
return "\\r";
|
|
}
|
|
return `\\${match}`;
|
|
});
|
|
}
|
|
|
|
function _collectJS(entry, xref, list, parents) {
|
|
if (!entry) {
|
|
return;
|
|
}
|
|
|
|
let parent = null;
|
|
if (entry instanceof Ref) {
|
|
if (parents.has(entry)) {
|
|
// If we've already found entry then we've a cycle.
|
|
return;
|
|
}
|
|
parent = entry;
|
|
parents.put(parent);
|
|
entry = xref.fetch(entry);
|
|
}
|
|
if (Array.isArray(entry)) {
|
|
for (const element of entry) {
|
|
_collectJS(element, xref, list, parents);
|
|
}
|
|
} else if (entry instanceof Dict) {
|
|
if (isName(entry.get("S"), "JavaScript")) {
|
|
const js = entry.get("JS");
|
|
let code;
|
|
if (js instanceof BaseStream) {
|
|
code = js.getString();
|
|
} else if (typeof js === "string") {
|
|
code = js;
|
|
}
|
|
code = code && stringToPDFString(code).replace(/\u0000/g, "");
|
|
if (code) {
|
|
list.push(code);
|
|
}
|
|
}
|
|
_collectJS(entry.getRaw("Next"), xref, list, parents);
|
|
}
|
|
|
|
if (parent) {
|
|
parents.remove(parent);
|
|
}
|
|
}
|
|
|
|
function collectActions(xref, dict, eventType) {
|
|
const actions = Object.create(null);
|
|
const additionalActionsDicts = getInheritableProperty({
|
|
dict,
|
|
key: "AA",
|
|
stopWhenFound: false,
|
|
});
|
|
if (additionalActionsDicts) {
|
|
// additionalActionsDicts contains dicts from ancestors
|
|
// as they're found in the tree from bottom to top.
|
|
// So the dicts are visited in reverse order to guarantee
|
|
// that actions from elder ancestors will be overwritten
|
|
// by ones from younger ancestors.
|
|
for (let i = additionalActionsDicts.length - 1; i >= 0; i--) {
|
|
const additionalActions = additionalActionsDicts[i];
|
|
if (!(additionalActions instanceof Dict)) {
|
|
continue;
|
|
}
|
|
for (const key of additionalActions.getKeys()) {
|
|
const action = eventType[key];
|
|
if (!action) {
|
|
continue;
|
|
}
|
|
const actionDict = additionalActions.getRaw(key);
|
|
const parents = new RefSet();
|
|
const list = [];
|
|
_collectJS(actionDict, xref, list, parents);
|
|
if (list.length > 0) {
|
|
actions[action] = list;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Collect the Action if any (we may have one on pushbutton).
|
|
if (dict.has("A")) {
|
|
const actionDict = dict.get("A");
|
|
const parents = new RefSet();
|
|
const list = [];
|
|
_collectJS(actionDict, xref, list, parents);
|
|
if (list.length > 0) {
|
|
actions.Action = list;
|
|
}
|
|
}
|
|
return objectSize(actions) > 0 ? actions : null;
|
|
}
|
|
|
|
const XMLEntities = {
|
|
/* < */ 0x3c: "<",
|
|
/* > */ 0x3e: ">",
|
|
/* & */ 0x26: "&",
|
|
/* " */ 0x22: """,
|
|
/* ' */ 0x27: "'",
|
|
};
|
|
|
|
function encodeToXmlString(str) {
|
|
const buffer = [];
|
|
let start = 0;
|
|
for (let i = 0, ii = str.length; i < ii; i++) {
|
|
const char = str.codePointAt(i);
|
|
if (0x20 <= char && char <= 0x7e) {
|
|
// ascii
|
|
const entity = XMLEntities[char];
|
|
if (entity) {
|
|
if (start < i) {
|
|
buffer.push(str.substring(start, i));
|
|
}
|
|
buffer.push(entity);
|
|
start = i + 1;
|
|
}
|
|
} else {
|
|
if (start < i) {
|
|
buffer.push(str.substring(start, i));
|
|
}
|
|
buffer.push(`&#x${char.toString(16).toUpperCase()};`);
|
|
if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
|
|
// char is represented by two u16
|
|
i++;
|
|
}
|
|
start = i + 1;
|
|
}
|
|
}
|
|
|
|
if (buffer.length === 0) {
|
|
return str;
|
|
}
|
|
if (start < str.length) {
|
|
buffer.push(str.substring(start, str.length));
|
|
}
|
|
return buffer.join("");
|
|
}
|
|
|
|
function validateCSSFont(cssFontInfo) {
|
|
// See https://developer.mozilla.org/en-US/docs/Web/CSS/font-style.
|
|
const DEFAULT_CSS_FONT_OBLIQUE = "14";
|
|
// See https://developer.mozilla.org/en-US/docs/Web/CSS/font-weight.
|
|
const DEFAULT_CSS_FONT_WEIGHT = "400";
|
|
const CSS_FONT_WEIGHT_VALUES = new Set([
|
|
"100",
|
|
"200",
|
|
"300",
|
|
"400",
|
|
"500",
|
|
"600",
|
|
"700",
|
|
"800",
|
|
"900",
|
|
"1000",
|
|
"normal",
|
|
"bold",
|
|
"bolder",
|
|
"lighter",
|
|
]);
|
|
|
|
const { fontFamily, fontWeight, italicAngle } = cssFontInfo;
|
|
|
|
// See https://developer.mozilla.org/en-US/docs/Web/CSS/string.
|
|
if (/^".*"$/.test(fontFamily)) {
|
|
if (/[^\\]"/.test(fontFamily.slice(1, fontFamily.length - 1))) {
|
|
warn(`XFA - FontFamily contains some unescaped ": ${fontFamily}.`);
|
|
return false;
|
|
}
|
|
} else if (/^'.*'$/.test(fontFamily)) {
|
|
if (/[^\\]'/.test(fontFamily.slice(1, fontFamily.length - 1))) {
|
|
warn(`XFA - FontFamily contains some unescaped ': ${fontFamily}.`);
|
|
return false;
|
|
}
|
|
} else {
|
|
// See https://developer.mozilla.org/en-US/docs/Web/CSS/custom-ident.
|
|
for (const ident of fontFamily.split(/[ \t]+/)) {
|
|
if (/^(\d|(-(\d|-)))/.test(ident) || !/^[\w-\\]+$/.test(ident)) {
|
|
warn(
|
|
`XFA - FontFamily contains some invalid <custom-ident>: ${fontFamily}.`
|
|
);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
const weight = fontWeight ? fontWeight.toString() : "";
|
|
cssFontInfo.fontWeight = CSS_FONT_WEIGHT_VALUES.has(weight)
|
|
? weight
|
|
: DEFAULT_CSS_FONT_WEIGHT;
|
|
|
|
const angle = parseFloat(italicAngle);
|
|
cssFontInfo.italicAngle =
|
|
isNaN(angle) || angle < -90 || angle > 90
|
|
? DEFAULT_CSS_FONT_OBLIQUE
|
|
: italicAngle.toString();
|
|
|
|
return true;
|
|
}
|
|
|
|
function recoverJsURL(str) {
|
|
// Attempt to recover valid URLs from `JS` entries with certain
|
|
// white-listed formats:
|
|
// - window.open('http://example.com')
|
|
// - app.launchURL('http://example.com', true)
|
|
// - xfa.host.gotoURL('http://example.com')
|
|
const URL_OPEN_METHODS = ["app.launchURL", "window.open", "xfa.host.gotoURL"];
|
|
const regex = new RegExp(
|
|
"^\\s*(" +
|
|
URL_OPEN_METHODS.join("|").split(".").join("\\.") +
|
|
")\\((?:'|\")([^'\"]*)(?:'|\")(?:,\\s*(\\w+)\\)|\\))",
|
|
"i"
|
|
);
|
|
|
|
const jsUrl = regex.exec(str);
|
|
if (jsUrl && jsUrl[2]) {
|
|
const url = jsUrl[2];
|
|
let newWindow = false;
|
|
|
|
if (jsUrl[3] === "true" && jsUrl[1] === "app.launchURL") {
|
|
newWindow = true;
|
|
}
|
|
return { url, newWindow };
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function numberToString(value) {
|
|
if (Number.isInteger(value)) {
|
|
return value.toString();
|
|
}
|
|
|
|
const roundedValue = Math.round(value * 100);
|
|
if (roundedValue % 100 === 0) {
|
|
return (roundedValue / 100).toString();
|
|
}
|
|
|
|
if (roundedValue % 10 === 0) {
|
|
return value.toFixed(1);
|
|
}
|
|
|
|
return value.toFixed(2);
|
|
}
|
|
|
|
function getNewAnnotationsMap(annotationStorage) {
|
|
if (!annotationStorage) {
|
|
return null;
|
|
}
|
|
const newAnnotationsByPage = new Map();
|
|
// The concept of page in a XFA is very different, so
|
|
// editing is just not implemented.
|
|
for (const [key, value] of annotationStorage) {
|
|
if (!key.startsWith(AnnotationEditorPrefix)) {
|
|
continue;
|
|
}
|
|
let annotations = newAnnotationsByPage.get(value.pageIndex);
|
|
if (!annotations) {
|
|
annotations = [];
|
|
newAnnotationsByPage.set(value.pageIndex, annotations);
|
|
}
|
|
annotations.push(value);
|
|
}
|
|
return newAnnotationsByPage.size > 0 ? newAnnotationsByPage : null;
|
|
}
|
|
|
|
function isAscii(str) {
|
|
return /^[\x00-\x7F]*$/.test(str);
|
|
}
|
|
|
|
function stringToUTF16HexString(str) {
|
|
const buf = [];
|
|
for (let i = 0, ii = str.length; i < ii; i++) {
|
|
const char = str.charCodeAt(i);
|
|
buf.push(
|
|
((char >> 8) & 0xff).toString(16).padStart(2, "0"),
|
|
(char & 0xff).toString(16).padStart(2, "0")
|
|
);
|
|
}
|
|
return buf.join("");
|
|
}
|
|
|
|
function stringToUTF16String(str, bigEndian = false) {
|
|
const buf = [];
|
|
if (bigEndian) {
|
|
buf.push("\xFE\xFF");
|
|
}
|
|
for (let i = 0, ii = str.length; i < ii; i++) {
|
|
const char = str.charCodeAt(i);
|
|
buf.push(
|
|
String.fromCharCode((char >> 8) & 0xff),
|
|
String.fromCharCode(char & 0xff)
|
|
);
|
|
}
|
|
return buf.join("");
|
|
}
|
|
|
|
function getRotationMatrix(rotation, width, height) {
|
|
switch (rotation) {
|
|
case 90:
|
|
return [0, 1, -1, 0, width, 0];
|
|
case 180:
|
|
return [-1, 0, 0, -1, width, height];
|
|
case 270:
|
|
return [0, -1, 1, 0, 0, height];
|
|
default:
|
|
throw new Error("Invalid rotation");
|
|
}
|
|
}
|
|
|
|
export {
|
|
arrayBuffersToBytes,
|
|
collectActions,
|
|
encodeToXmlString,
|
|
escapePDFName,
|
|
escapeString,
|
|
getArrayLookupTableFactory,
|
|
getInheritableProperty,
|
|
getLookupTableFactory,
|
|
getNewAnnotationsMap,
|
|
getRotationMatrix,
|
|
isAscii,
|
|
isWhiteSpace,
|
|
log2,
|
|
MissingDataException,
|
|
numberToString,
|
|
ParserEOFException,
|
|
parseXFAPath,
|
|
PDF_VERSION_REGEXP,
|
|
readInt8,
|
|
readUint16,
|
|
readUint32,
|
|
recoverJsURL,
|
|
stringToUTF16HexString,
|
|
stringToUTF16String,
|
|
toRomanNumerals,
|
|
validateCSSFont,
|
|
XRefEntryException,
|
|
XRefParseException,
|
|
};
|