pdf.js/src/core/obj.js

2171 lines
67 KiB
JavaScript
Raw Normal View History

2012-09-01 07:48:21 +09:00
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2011-10-26 10:18:22 +09:00
import {
assert, bytesToString, createPromiseCapability, createValidAbsoluteUrl,
FormatError, info, InvalidPDFException, isBool, isNum, isString,
PermissionFlag, shadow, stringToPDFString, stringToUTF8String, unreachable,
warn
} from '../shared/util';
import {
clearPrimitiveCaches, Cmd, Dict, isCmd, isDict, isName, isRef, isRefsEqual,
isStream, Ref, RefSet, RefSetCache
} from './primitives';
import { Lexer, Parser } from './parser';
import {
MissingDataException, toRomanNumerals, XRefEntryException, XRefParseException
} from './core_utils';
import { ChunkedStream } from './chunked_stream';
import { CipherTransformFactory } from './crypto';
import { ColorSpace } from './colorspace';
2013-06-26 02:33:53 +09:00
function fetchDestination(dest) {
return isDict(dest) ? dest.get('D') : dest;
}
class Catalog {
constructor(pdfManager, xref) {
this.pdfManager = pdfManager;
2011-10-25 08:55:23 +09:00
this.xref = xref;
2013-02-07 08:19:29 +09:00
this.catDict = xref.getCatalogObj();
if (!isDict(this.catDict)) {
throw new FormatError('Catalog object is not a dictionary.');
}
2013-02-07 08:19:29 +09:00
this.fontCache = new RefSetCache();
this.builtInCMapCache = new Map();
this.pageKidsCountCache = new RefSetCache();
2011-10-25 08:55:23 +09:00
}
get metadata() {
const streamRef = this.catDict.getRaw('Metadata');
if (!isRef(streamRef)) {
return shadow(this, 'metadata', null);
}
2012-05-28 08:03:04 +09:00
2018-08-25 23:35:23 +09:00
const suppressEncryption = !(this.xref.encrypt &&
this.xref.encrypt.encryptMetadata);
const stream = this.xref.fetch(streamRef, suppressEncryption);
let metadata;
if (stream && isDict(stream.dict)) {
const type = stream.dict.get('Type');
const subtype = stream.dict.get('Subtype');
if (isName(type, 'Metadata') && isName(subtype, 'XML')) {
// XXX: This should examine the charset the XML document defines,
// however since there are currently no real means to decode
// arbitrary charsets, let's just hope that the author of the PDF
// was reasonable enough to stick with the XML default charset,
// which is UTF-8.
try {
metadata = stringToUTF8String(bytesToString(stream.getBytes()));
} catch (e) {
if (e instanceof MissingDataException) {
throw e;
2012-05-28 09:00:13 +09:00
}
info('Skipping invalid metadata.');
2012-03-25 03:59:51 +09:00
}
}
}
return shadow(this, 'metadata', metadata);
}
2012-03-25 03:59:51 +09:00
get toplevelPagesDict() {
const pagesObj = this.catDict.get('Pages');
if (!isDict(pagesObj)) {
throw new FormatError('Invalid top-level pages dictionary.');
}
return shadow(this, 'toplevelPagesDict', pagesObj);
}
get documentOutline() {
let obj = null;
try {
obj = this._readDocumentOutline();
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn('Unable to read document outline.');
}
return shadow(this, 'documentOutline', obj);
}
/**
* @private
*/
_readDocumentOutline() {
let obj = this.catDict.get('Outlines');
if (!isDict(obj)) {
return null;
}
obj = obj.getRaw('First');
if (!isRef(obj)) {
return null;
}
const root = { items: [], };
const queue = [{ obj, parent: root, }];
// To avoid recursion, keep track of the already processed items.
const processed = new RefSet();
processed.put(obj);
const xref = this.xref, blackColor = new Uint8ClampedArray(3);
while (queue.length > 0) {
const i = queue.shift();
const outlineDict = xref.fetchIfRef(i.obj);
if (outlineDict === null) {
continue;
}
if (!outlineDict.has('Title')) {
throw new FormatError('Invalid outline item encountered.');
}
const data = { url: null, dest: null, };
Catalog.parseDestDictionary({
destDict: outlineDict,
resultObj: data,
docBaseUrl: this.pdfManager.docBaseUrl,
});
const title = outlineDict.get('Title');
const flags = outlineDict.get('F') || 0;
const color = outlineDict.getArray('C');
const count = outlineDict.get('Count');
let rgbColor = blackColor;
// We only need to parse the color when it's valid, and non-default.
if (Array.isArray(color) && color.length === 3 &&
(color[0] !== 0 || color[1] !== 0 || color[2] !== 0)) {
rgbColor = ColorSpace.singletons.rgb.getRgb(color, 0);
}
const outlineItem = {
dest: data.dest,
url: data.url,
unsafeUrl: data.unsafeUrl,
newWindow: data.newWindow,
title: stringToPDFString(title),
color: rgbColor,
count: Number.isInteger(count) ? count : undefined,
bold: !!(flags & 2),
italic: !!(flags & 1),
items: [],
};
i.parent.items.push(outlineItem);
obj = outlineDict.getRaw('First');
if (isRef(obj) && !processed.has(obj)) {
queue.push({ obj, parent: outlineItem, });
processed.put(obj);
2011-10-25 08:55:23 +09:00
}
obj = outlineDict.getRaw('Next');
if (isRef(obj) && !processed.has(obj)) {
queue.push({ obj, parent: i.parent, });
processed.put(obj);
}
}
return (root.items.length > 0 ? root.items : null);
}
2011-10-25 08:55:23 +09:00
2018-08-27 04:37:05 +09:00
get permissions() {
let permissions = null;
try {
permissions = this._readPermissions();
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn('Unable to read permissions.');
}
return shadow(this, 'permissions', permissions);
}
/**
* @private
*/
_readPermissions() {
const encrypt = this.xref.trailer.get('Encrypt');
if (!isDict(encrypt)) {
return null;
}
let flags = encrypt.get('P');
if (!isNum(flags)) {
return null;
}
// PDF integer objects are represented internally in signed 2's complement
// form. Therefore, convert the signed decimal integer to a signed 2's
// complement binary integer so we can use regular bitwise operations on it.
flags += 2 ** 32;
const permissions = [];
for (const key in PermissionFlag) {
const value = PermissionFlag[key];
if (flags & value) {
permissions.push(value);
}
}
return permissions;
}
get numPages() {
const obj = this.toplevelPagesDict.get('Count');
if (!Number.isInteger(obj)) {
throw new FormatError(
'Page count in top-level pages dictionary is not an integer.');
}
return shadow(this, 'numPages', obj);
}
get destinations() {
const obj = this._readDests(), dests = Object.create(null);
if (obj instanceof NameTree) {
const names = obj.getAll();
for (let name in names) {
dests[name] = fetchDestination(names[name]);
2011-10-25 08:55:23 +09:00
}
} else if (obj instanceof Dict) {
obj.forEach(function(key, value) {
if (value) {
dests[key] = fetchDestination(value);
}
});
}
return shadow(this, 'destinations', dests);
}
getDestination(destinationId) {
const obj = this._readDests();
if (obj instanceof NameTree || obj instanceof Dict) {
return fetchDestination(obj.get(destinationId) || null);
}
return null;
}
/**
* @private
*/
_readDests() {
const obj = this.catDict.get('Names');
if (obj && obj.has('Dests')) {
return new NameTree(obj.getRaw('Dests'), this.xref);
} else if (this.catDict.has('Dests')) { // Simple destination dictionary.
return this.catDict.get('Dests');
}
return undefined;
}
get pageLabels() {
let obj = null;
try {
obj = this._readPageLabels();
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn('Unable to read page labels.');
}
return shadow(this, 'pageLabels', obj);
}
/**
* @private
*/
_readPageLabels() {
const obj = this.catDict.getRaw('PageLabels');
if (!obj) {
return null;
}
const pageLabels = new Array(this.numPages);
let style = null, prefix = '';
const numberTree = new NumberTree(obj, this.xref);
const nums = numberTree.getAll();
let currentLabel = '', currentIndex = 1;
for (let i = 0, ii = this.numPages; i < ii; i++) {
if (i in nums) {
const labelDict = nums[i];
if (!isDict(labelDict)) {
throw new FormatError('PageLabel is not a dictionary.');
}
if (labelDict.has('Type') &&
!isName(labelDict.get('Type'), 'PageLabel')) {
throw new FormatError('Invalid type in PageLabel dictionary.');
}
if (labelDict.has('S')) {
const s = labelDict.get('S');
if (!isName(s)) {
throw new FormatError('Invalid style in PageLabel dictionary.');
}
style = s.name;
} else {
style = null;
}
if (labelDict.has('P')) {
const p = labelDict.get('P');
if (!isString(p)) {
throw new FormatError('Invalid prefix in PageLabel dictionary.');
}
prefix = stringToPDFString(p);
} else {
prefix = '';
}
if (labelDict.has('St')) {
const st = labelDict.get('St');
if (!(Number.isInteger(st) && st >= 1)) {
throw new FormatError('Invalid start in PageLabel dictionary.');
}
currentIndex = st;
} else {
currentIndex = 1;
}
}
switch (style) {
case 'D':
currentLabel = currentIndex;
break;
case 'R':
case 'r':
currentLabel = toRomanNumerals(currentIndex, style === 'r');
break;
case 'A':
case 'a':
const LIMIT = 26; // Use only the characters A-Z, or a-z.
const A_UPPER_CASE = 0x41, A_LOWER_CASE = 0x61;
const baseCharCode = (style === 'a' ? A_LOWER_CASE : A_UPPER_CASE);
const letterIndex = currentIndex - 1;
const character = String.fromCharCode(baseCharCode +
(letterIndex % LIMIT));
const charBuf = [];
for (let j = 0, jj = (letterIndex / LIMIT) | 0; j <= jj; j++) {
charBuf.push(character);
}
currentLabel = charBuf.join('');
break;
default:
if (style) {
throw new FormatError(
`Invalid style "${style}" in PageLabel dictionary.`);
}
currentLabel = '';
}
pageLabels[i] = prefix + currentLabel;
currentIndex++;
}
return pageLabels;
}
get pageLayout() {
const obj = this.catDict.get('PageLayout');
// Purposely use a non-standard default value, rather than 'SinglePage', to
// allow differentiating between `undefined` and /SinglePage since that does
// affect the Scroll mode (continuous/non-continuous) used in Adobe Reader.
let pageLayout = '';
if (isName(obj)) {
switch (obj.name) {
case 'SinglePage':
case 'OneColumn':
case 'TwoColumnLeft':
case 'TwoColumnRight':
case 'TwoPageLeft':
case 'TwoPageRight':
pageLayout = obj.name;
}
}
return shadow(this, 'pageLayout', pageLayout);
}
get pageMode() {
const obj = this.catDict.get('PageMode');
let pageMode = 'UseNone'; // Default value.
if (isName(obj)) {
switch (obj.name) {
case 'UseNone':
case 'UseOutlines':
case 'UseThumbs':
case 'FullScreen':
case 'UseOC':
case 'UseAttachments':
pageMode = obj.name;
}
}
return shadow(this, 'pageMode', pageMode);
}
get viewerPreferences() {
const ViewerPreferencesValidators = {
HideToolbar: isBool,
HideMenubar: isBool,
HideWindowUI: isBool,
FitWindow: isBool,
CenterWindow: isBool,
DisplayDocTitle: isBool,
NonFullScreenPageMode: isName,
Direction: isName,
ViewArea: isName,
ViewClip: isName,
PrintArea: isName,
PrintClip: isName,
PrintScaling: isName,
Duplex: isName,
PickTrayByPDFSize: isBool,
PrintPageRange: Array.isArray,
NumCopies: Number.isInteger,
};
const obj = this.catDict.get('ViewerPreferences');
const prefs = Object.create(null);
if (isDict(obj)) {
for (const key in ViewerPreferencesValidators) {
if (!obj.has(key)) {
continue;
}
const value = obj.get(key);
// Make sure the (standard) value conforms to the specification.
if (!ViewerPreferencesValidators[key](value)) {
info(`Bad value in ViewerPreferences for "${key}".`);
continue;
}
let prefValue;
switch (key) {
case 'NonFullScreenPageMode':
switch (value.name) {
case 'UseNone':
case 'UseOutlines':
case 'UseThumbs':
case 'UseOC':
prefValue = value.name;
break;
default:
prefValue = 'UseNone';
}
break;
case 'Direction':
switch (value.name) {
case 'L2R':
case 'R2L':
prefValue = value.name;
break;
default:
prefValue = 'L2R';
}
break;
case 'ViewArea':
case 'ViewClip':
case 'PrintArea':
case 'PrintClip':
switch (value.name) {
case 'MediaBox':
case 'CropBox':
case 'BleedBox':
case 'TrimBox':
case 'ArtBox':
prefValue = value.name;
break;
default:
prefValue = 'CropBox';
}
break;
case 'PrintScaling':
switch (value.name) {
case 'None':
case 'AppDefault':
prefValue = value.name;
break;
default:
prefValue = 'AppDefault';
}
break;
case 'Duplex':
switch (value.name) {
case 'Simplex':
case 'DuplexFlipShortEdge':
case 'DuplexFlipLongEdge':
prefValue = value.name;
break;
default:
prefValue = 'None';
}
break;
case 'PrintPageRange':
const length = value.length;
if (length % 2 !== 0) { // The number of elements must be even.
break;
}
const isValid = value.every((page, i, arr) => {
return (Number.isInteger(page) && page > 0) &&
(i === 0 || page >= arr[i - 1]) && page <= this.numPages;
});
if (isValid) {
prefValue = value;
}
break;
case 'NumCopies':
if (value > 0) {
prefValue = value;
}
break;
default:
assert(typeof value === 'boolean');
prefValue = value;
}
if (prefValue !== undefined) {
prefs[key] = prefValue;
} else {
info(`Bad value in ViewerPreferences for "${key}".`);
}
}
}
return shadow(this, 'viewerPreferences', prefs);
}
get openActionDestination() {
const obj = this.catDict.get('OpenAction');
let openActionDest = null;
if (isDict(obj)) {
// Convert the OpenAction dictionary into a format that works with
// `parseDestDictionary`, to avoid having to re-implement those checks.
const destDict = new Dict(this.xref);
destDict.set('A', obj);
const resultObj = { url: null, dest: null, };
Catalog.parseDestDictionary({ destDict, resultObj, });
if (Array.isArray(resultObj.dest)) {
openActionDest = resultObj.dest;
}
} else if (Array.isArray(obj)) {
openActionDest = obj;
}
return shadow(this, 'openActionDestination', openActionDest);
}
get attachments() {
const obj = this.catDict.get('Names');
2018-08-25 23:35:23 +09:00
let attachments = null;
2018-08-25 23:35:23 +09:00
if (obj && obj.has('EmbeddedFiles')) {
const nameTree = new NameTree(obj.getRaw('EmbeddedFiles'), this.xref);
const names = nameTree.getAll();
for (const name in names) {
const fs = new FileSpec(names[name], this.xref);
if (!attachments) {
attachments = Object.create(null);
}
attachments[stringToPDFString(name)] = fs.serializable;
}
}
return shadow(this, 'attachments', attachments);
}
get javaScript() {
const obj = this.catDict.get('Names');
let javaScript = null;
function appendIfJavaScriptDict(jsDict) {
const type = jsDict.get('S');
if (!isName(type, 'JavaScript')) {
return;
}
let js = jsDict.get('JS');
if (isStream(js)) {
js = bytesToString(js.getBytes());
} else if (!isString(js)) {
return;
}
if (!javaScript) {
javaScript = [];
}
javaScript.push(stringToPDFString(js));
}
if (obj && obj.has('JavaScript')) {
const nameTree = new NameTree(obj.getRaw('JavaScript'), this.xref);
const names = nameTree.getAll();
for (const name in names) {
// We don't use most JavaScript in PDF documents. This code is
// defensive so we don't cause errors on document load.
const jsDict = names[name];
if (isDict(jsDict)) {
appendIfJavaScriptDict(jsDict);
}
}
}
2014-05-24 09:07:25 +09:00
// Append OpenAction actions to the JavaScript array.
const openActionDict = this.catDict.get('OpenAction');
if (isDict(openActionDict, 'Action')) {
const actionType = openActionDict.get('S');
if (isName(actionType, 'Named')) {
// The named Print action is not a part of the PDF 1.7 specification,
// but is supported by many PDF readers/writers (including Adobe's).
const action = openActionDict.get('N');
if (isName(action, 'Print')) {
if (!javaScript) {
javaScript = [];
}
javaScript.push('print({});');
2014-05-24 09:07:25 +09:00
}
} else {
appendIfJavaScriptDict(openActionDict);
2014-05-24 09:07:25 +09:00
}
}
2014-05-24 09:07:25 +09:00
return shadow(this, 'javaScript', javaScript);
}
2013-02-07 08:19:29 +09:00
Fallback to the built-in font renderer when font loading fails After PR 9340 all glyphs are now re-mapped to a Private Use Area (PUA) which means that if a font fails to load, for whatever reason[1], all glyphs in the font will now render as Unicode glyph outlines. This obviously doesn't look good, to say the least, and might be seen as a "regression" since previously many glyphs were left in their original positions which provided a slightly better fallback[2]. Hence this patch, which implements a *general* fallback to the PDF.js built-in font renderer for fonts that fail to load (i.e. are rejected by the sanitizer). One caveat here is that this only works for the Font Loading API, since it's easy to handle errors in that case[3]. The solution implemented in this patch does *not* in any way delay the loading of valid fonts, which was the problem with my previous attempt at a solution, and will only require a bit of extra work/waiting for those fonts that actually fail to load. *Please note:* This patch doesn't fix any of the underlying PDF.js font conversion bugs that's responsible for creating corrupt font files, however it does *improve* rendering in a number of cases; refer to this possibly incomplete list: [Bug 1524888](https://bugzilla.mozilla.org/show_bug.cgi?id=1524888) Issue 10175 Issue 10232 --- [1] Usually because the PDF.js font conversion code wasn't able to parse the font file correctly. [2] Glyphs fell back to some default font, which while not accurate was more useful than the current state. [3] Furthermore I'm not sure how to implement this generally, assuming that's even possible, and don't really have time/interest to look into it either.
2019-02-11 08:47:56 +09:00
fontFallback(id, handler) {
const promises = [];
this.fontCache.forEach(function(promise) {
promises.push(promise);
});
return Promise.all(promises).then((translatedFonts) => {
for (const translatedFont of translatedFonts) {
if (translatedFont.loadedName === id) {
translatedFont.fallback(handler);
return;
}
}
});
}
cleanup() {
clearPrimitiveCaches();
this.pageKidsCountCache.clear();
const promises = [];
this.fontCache.forEach(function(promise) {
promises.push(promise);
});
return Promise.all(promises).then((translatedFonts) => {
for (let i = 0, ii = translatedFonts.length; i < ii; i++) {
const font = translatedFonts[i].dict;
delete font.translated;
}
this.fontCache.clear();
this.builtInCMapCache.clear();
});
}
getPageDict(pageIndex) {
const capability = createPromiseCapability();
const nodesToVisit = [this.catDict.getRaw('Pages')];
const xref = this.xref, pageKidsCountCache = this.pageKidsCountCache;
let count, currentPageIndex = 0;
function next() {
while (nodesToVisit.length) {
const currentNode = nodesToVisit.pop();
if (isRef(currentNode)) {
count = pageKidsCountCache.get(currentNode);
// Skip nodes where the page can't be.
if (count > 0 && currentPageIndex + count < pageIndex) {
currentPageIndex += count;
continue;
}
xref.fetchAsync(currentNode).then(function(obj) {
if (isDict(obj, 'Page') || (isDict(obj) && !obj.has('Kids'))) {
if (pageIndex === currentPageIndex) {
// Cache the Page reference, since it can *greatly* improve
// performance by reducing redundant lookups in long documents
// where all nodes are found at *one* level of the tree.
if (currentNode && !pageKidsCountCache.has(currentNode)) {
pageKidsCountCache.put(currentNode, 1);
}
capability.resolve([obj, currentNode]);
} else {
currentPageIndex++;
next();
}
return;
}
nodesToVisit.push(obj);
next();
}, capability.reject);
return;
}
2013-02-07 08:19:29 +09:00
// Must be a child page dictionary.
if (!isDict(currentNode)) {
capability.reject(new FormatError(
'Page dictionary kid reference points to wrong type of object.'));
return;
}
count = currentNode.get('Count');
if (Number.isInteger(count) && count >= 0) {
// Cache the Kids count, since it can reduce redundant lookups in
// documents where all nodes are found at *one* level of the tree.
const objId = currentNode.objId;
if (objId && !pageKidsCountCache.has(objId)) {
pageKidsCountCache.put(objId, count);
}
// Skip nodes where the page can't be.
if (currentPageIndex + count <= pageIndex) {
currentPageIndex += count;
continue;
}
}
2013-02-07 08:19:29 +09:00
const kids = currentNode.get('Kids');
if (!Array.isArray(kids)) {
// Prevent errors in corrupt PDF documents that violate the
// specification by *inlining* Page dicts directly in the Kids
// array, rather than using indirect objects (fixes issue9540.pdf).
if (isName(currentNode.get('Type'), 'Page') ||
(!currentNode.has('Type') && currentNode.has('Contents'))) {
if (currentPageIndex === pageIndex) {
capability.resolve([currentNode, null]);
return;
}
currentPageIndex++;
continue;
}
capability.reject(new FormatError(
'Page dictionary kids object is not an array.'));
return;
}
// Always check all `Kids` nodes, to avoid getting stuck in an empty
// node further down in the tree (see issue5644.pdf, issue8088.pdf),
// and to ensure that we actually find the correct `Page` dict.
for (let last = kids.length - 1; last >= 0; last--) {
nodesToVisit.push(kids[last]);
2013-02-07 08:19:29 +09:00
}
}
capability.reject(new Error(`Page index ${pageIndex} not found.`));
}
next();
return capability.promise;
}
getPageIndex(pageRef) {
// The page tree nodes have the count of all the leaves below them. To get
// how many pages are before we just have to walk up the tree and keep
// adding the count of siblings to the left of the node.
const xref = this.xref;
function pagesBeforeRef(kidRef) {
let total = 0, parentRef;
return xref.fetchAsync(kidRef).then(function(node) {
if (isRefsEqual(kidRef, pageRef) && !isDict(node, 'Page') &&
!(isDict(node) && !node.has('Type') && node.has('Contents'))) {
throw new FormatError(
'The reference does not point to a /Page dictionary.');
}
if (!node) {
return null;
}
if (!isDict(node)) {
throw new FormatError('Node must be a dictionary.');
}
parentRef = node.getRaw('Parent');
return node.getAsync('Parent');
}).then(function(parent) {
if (!parent) {
return null;
}
if (!isDict(parent)) {
throw new FormatError('Parent must be a dictionary.');
}
return parent.getAsync('Kids');
}).then(function(kids) {
if (!kids) {
return null;
}
const kidPromises = [];
let found = false;
for (let i = 0, ii = kids.length; i < ii; i++) {
const kid = kids[i];
if (!isRef(kid)) {
throw new FormatError('Kid must be a reference.');
}
if (isRefsEqual(kid, kidRef)) {
found = true;
break;
}
kidPromises.push(xref.fetchAsync(kid).then(function(kid) {
if (!isDict(kid)) {
throw new FormatError('Kid node must be a dictionary.');
}
if (kid.has('Count')) {
total += kid.get('Count');
} else { // Page leaf node.
total++;
}
}));
}
if (!found) {
throw new FormatError('Kid reference not found in parent\'s kids.');
}
return Promise.all(kidPromises).then(function() {
return [total, parentRef];
});
});
}
let total = 0;
function next(ref) {
return pagesBeforeRef(ref).then(function(args) {
if (!args) {
return total;
}
const [count, parentRef] = args;
total += count;
return next(parentRef);
});
}
return next(pageRef);
}
2011-10-25 08:55:23 +09:00
/**
* @typedef ParseDestDictionaryParameters
* @property {Dict} destDict - The dictionary containing the destination.
* @property {Object} resultObj - The object where the parsed destination
* properties will be placed.
* @property {string} [docBaseUrl] - The document base URL that is used when
* attempting to recover valid absolute URLs from relative ones.
*/
/**
* Helper function used to parse the contents of destination dictionaries.
* @param {ParseDestDictionaryParameters} params
*/
static parseDestDictionary(params) {
// Lets URLs beginning with 'www.' default to using the 'http://' protocol.
function addDefaultProtocolToUrl(url) {
return (url.startsWith('www.') ? `http://${url}` : url);
}
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding; see Bugzilla 1122280.
function tryConvertUrlEncoding(url) {
try {
return stringToUTF8String(url);
} catch (e) {
return url;
}
}
const destDict = params.destDict;
if (!isDict(destDict)) {
warn('parseDestDictionary: `destDict` must be a dictionary.');
return;
}
const resultObj = params.resultObj;
if (typeof resultObj !== 'object') {
warn('parseDestDictionary: `resultObj` must be an object.');
return;
}
const docBaseUrl = params.docBaseUrl || null;
let action = destDict.get('A'), url, dest;
if (!isDict(action) && destDict.has('Dest')) {
// A /Dest entry should *only* contain a Name or an Array, but some bad
// PDF generators ignore that and treat it as an /A entry.
action = destDict.get('Dest');
}
if (isDict(action)) {
const actionType = action.get('S');
if (!isName(actionType)) {
warn('parseDestDictionary: Invalid type in Action dictionary.');
return;
}
const actionName = actionType.name;
switch (actionName) {
case 'URI':
url = action.get('URI');
if (isName(url)) {
// Some bad PDFs do not put parentheses around relative URLs.
url = '/' + url.name;
} else if (isString(url)) {
url = addDefaultProtocolToUrl(url);
}
// TODO: pdf spec mentions urls can be relative to a Base
// entry in the dictionary.
break;
case 'GoTo':
dest = action.get('D');
break;
case 'Launch':
// We neither want, nor can, support arbitrary 'Launch' actions.
// However, in practice they are mostly used for linking to other PDF
// files, which we thus attempt to support (utilizing `docBaseUrl`).
/* falls through */
case 'GoToR':
const urlDict = action.get('F');
if (isDict(urlDict)) {
// We assume that we found a FileSpec dictionary
// and fetch the URL without checking any further.
url = urlDict.get('F') || null;
} else if (isString(urlDict)) {
url = urlDict;
}
// NOTE: the destination is relative to the *remote* document.
let remoteDest = action.get('D');
if (remoteDest) {
if (isName(remoteDest)) {
remoteDest = remoteDest.name;
}
if (isString(url)) {
const baseUrl = url.split('#')[0];
if (isString(remoteDest)) {
url = baseUrl + '#' + remoteDest;
} else if (Array.isArray(remoteDest)) {
url = baseUrl + '#' + JSON.stringify(remoteDest);
}
}
}
// The 'NewWindow' property, equal to `LinkTarget.BLANK`.
const newWindow = action.get('NewWindow');
if (isBool(newWindow)) {
resultObj.newWindow = newWindow;
}
break;
case 'Named':
const namedAction = action.get('N');
if (isName(namedAction)) {
resultObj.action = namedAction.name;
}
break;
case 'JavaScript':
const jsAction = action.get('JS');
let js;
if (isStream(jsAction)) {
js = bytesToString(jsAction.getBytes());
} else if (isString(jsAction)) {
js = jsAction;
}
if (js) {
// Attempt to recover valid URLs from `JS` entries with certain
// white-listed formats:
// - window.open('http://example.com')
// - app.launchURL('http://example.com', true)
const URL_OPEN_METHODS = [
'app.launchURL',
'window.open'
];
const regex = new RegExp(
'^\\s*(' + URL_OPEN_METHODS.join('|').split('.').join('\\.') +
')\\((?:\'|\")([^\'\"]*)(?:\'|\")(?:,\\s*(\\w+)\\)|\\))', 'i');
const jsUrl = regex.exec(stringToPDFString(js));
if (jsUrl && jsUrl[2]) {
url = jsUrl[2];
if (jsUrl[3] === 'true' && jsUrl[1] === 'app.launchURL') {
resultObj.newWindow = true;
}
break;
}
}
/* falls through */
default:
warn(`parseDestDictionary: unsupported action type "${actionName}".`);
break;
}
} else if (destDict.has('Dest')) { // Simple destination.
dest = destDict.get('Dest');
}
if (isString(url)) {
url = tryConvertUrlEncoding(url);
const absoluteUrl = createValidAbsoluteUrl(url, docBaseUrl);
if (absoluteUrl) {
resultObj.url = absoluteUrl.href;
}
resultObj.unsafeUrl = url;
}
if (dest) {
if (isName(dest)) {
dest = dest.name;
}
if (isString(dest) || Array.isArray(dest)) {
resultObj.dest = dest;
}
}
}
}
2011-10-25 08:55:23 +09:00
2011-12-07 07:18:40 +09:00
var XRef = (function XRefClosure() {
function XRef(stream, pdfManager) {
2011-10-25 08:55:23 +09:00
this.stream = stream;
this.pdfManager = pdfManager;
2011-10-25 08:55:23 +09:00
this.entries = [];
this.xrefstms = Object.create(null);
this._cacheMap = new Map(); // Prepare the XRef cache.
this.stats = {
streamTypes: Object.create(null),
fontTypes: Object.create(null),
};
2011-10-25 08:55:23 +09:00
}
2011-12-07 07:18:40 +09:00
XRef.prototype = {
2013-02-07 08:19:29 +09:00
setStartXRef: function XRef_setStartXRef(startXRef) {
// Store the starting positions of xref tables as we process them
// so we can recover from missing data errors
this.startXRefQueue = [startXRef];
},
parse: function XRef_parse(recoveryMode) {
var trailerDict;
if (!recoveryMode) {
trailerDict = this.readXRef();
} else {
warn('Indexing all PDF objects');
trailerDict = this.indexObjects();
}
trailerDict.assignXref(this);
this.trailer = trailerDict;
let encrypt;
try {
encrypt = trailerDict.get('Encrypt');
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`);
}
if (isDict(encrypt)) {
2013-02-07 08:19:29 +09:00
var ids = trailerDict.get('ID');
var fileId = (ids && ids.length) ? ids[0] : '';
// The 'Encrypt' dictionary itself should not be encrypted, and by
// setting `suppressEncryption` we can prevent an infinite loop inside
// of `XRef_fetchUncompressed` if the dictionary contains indirect
// objects (fixes issue7665.pdf).
encrypt.suppressEncryption = true;
this.encrypt = new CipherTransformFactory(encrypt, fileId,
this.pdfManager.password);
2013-02-07 08:19:29 +09:00
}
// Get the root dictionary (catalog) object, and do some basic validation.
let root;
try {
root = trailerDict.get('Root');
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn(`XRef.parse - Invalid "Root" reference: "${ex}".`);
}
if (isDict(root) && root.has('Pages')) {
this.root = root;
} else {
if (!recoveryMode) {
throw new XRefParseException();
}
2017-06-29 05:51:31 +09:00
throw new FormatError('Invalid root reference');
2013-02-07 08:19:29 +09:00
}
},
processXRefTable: function XRef_processXRefTable(parser) {
if (!('tableState' in this)) {
// Stores state of the table as we process it so we can resume
// from middle of table in case of missing data error
this.tableState = {
entryNum: 0,
streamPos: parser.lexer.stream.pos,
parserBuf1: parser.buf1,
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on *Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.* http://eslint.org/docs/rules/comma-dangle http://eslint.org/docs/rules/object-curly-spacing Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead. Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch. ```diff diff --git a/src/core/evaluator.js b/src/core/evaluator.js index abab9027..dcd3594b 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() { t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, }; t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, }; t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, }; - t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, }; + t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, + variableArgs: false, }; t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, }; t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, }; t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, }; diff --git a/src/core/jbig2.js b/src/core/jbig2.js index 5a17d482..71671541 100644 --- a/src/core/jbig2.js +++ b/src/core/jbig2.js @@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() { { x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, }, { x: -1, y: 0, }], [{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, }, - { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }] + { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, + { x: -1, y: 0, }] ]; var RefinementTemplates = [ { coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, - { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }], + reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, + { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, }, + { x: 0, y: 1, }, { x: 1, y: 1, }], }, { - coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, }, - { x: 0, y: 1, }, { x: 1, y: 1, }], + coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, + { x: -1, y: 0, }], + reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, + { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }], } ]; ```
2017-06-02 18:16:24 +09:00
parserBuf2: parser.buf2,
2013-02-07 08:19:29 +09:00
};
}
var obj = this.readXRefTable(parser);
// Sanity check
if (!isCmd(obj, 'trailer')) {
2017-06-29 05:51:31 +09:00
throw new FormatError(
'Invalid XRef table: could not find trailer dictionary');
}
2013-02-07 08:19:29 +09:00
// Read trailer dictionary, e.g.
// trailer
// << /Size 22
// /Root 20R
// /Info 10R
// /ID [ <81b14aafa313db63dbd6f981e49f94f4> ]
// >>
// The parser goes through the entire stream << ... >> and provides
// a getter interface for the key-value table
var dict = parser.getObj();
// The pdflib PDF generator can generate a nested trailer dictionary
if (!isDict(dict) && dict.dict) {
dict = dict.dict;
}
if (!isDict(dict)) {
2017-06-29 05:51:31 +09:00
throw new FormatError(
'Invalid XRef table: could not parse trailer dictionary');
}
2013-02-07 08:19:29 +09:00
delete this.tableState;
return dict;
},
readXRefTable: function XRef_readXRefTable(parser) {
2012-01-31 23:01:04 +09:00
// Example of cross-reference table:
// xref
// 0 1 <-- subsection header (first obj #, obj count)
// 0000000000 65535 f <-- actual object (offset, generation #, f/n)
// 23 2 <-- subsection header ... and so on ...
2012-02-01 00:57:32 +09:00
// 0000025518 00002 n
2012-01-31 23:01:04 +09:00
// 0000025635 00000 n
// trailer
// ...
2012-02-01 00:57:32 +09:00
2013-02-07 08:19:29 +09:00
var stream = parser.lexer.stream;
var tableState = this.tableState;
stream.pos = tableState.streamPos;
parser.buf1 = tableState.parserBuf1;
parser.buf2 = tableState.parserBuf2;
2012-01-31 23:01:04 +09:00
// Outer loop is over subsection headers
2011-10-25 08:55:23 +09:00
var obj;
2012-01-31 23:01:04 +09:00
2013-02-07 08:19:29 +09:00
while (true) {
if (!('firstEntryNum' in tableState) || !('entryCount' in tableState)) {
if (isCmd(obj = parser.getObj(), 'trailer')) {
break;
}
tableState.firstEntryNum = obj;
tableState.entryCount = parser.getObj();
}
var first = tableState.firstEntryNum;
var count = tableState.entryCount;
if (!Number.isInteger(first) || !Number.isInteger(count)) {
2017-06-29 05:51:31 +09:00
throw new FormatError(
'Invalid XRef table: wrong types in subsection header');
}
2012-01-31 23:01:04 +09:00
// Inner loop is over objects themselves
2013-02-07 08:19:29 +09:00
for (var i = tableState.entryNum; i < count; i++) {
tableState.streamPos = stream.pos;
tableState.entryNum = i;
tableState.parserBuf1 = parser.buf1;
tableState.parserBuf2 = parser.buf2;
2011-10-25 08:55:23 +09:00
var entry = {};
2012-01-31 23:01:04 +09:00
entry.offset = parser.getObj();
entry.gen = parser.getObj();
var type = parser.getObj();
if (type instanceof Cmd) {
switch (type.cmd) {
case 'f':
entry.free = true;
break;
case 'n':
entry.uncompressed = true;
break;
}
}
2011-10-25 08:55:23 +09:00
2012-01-31 23:01:04 +09:00
// Validate entry obj
if (!Number.isInteger(entry.offset) || !Number.isInteger(entry.gen) ||
2012-02-01 00:57:32 +09:00
!(entry.free || entry.uncompressed)) {
2017-06-29 05:51:31 +09:00
throw new FormatError(
`Invalid entry in XRef subsection: ${first}, ${count}`);
2011-10-25 08:55:23 +09:00
}
// The first xref table entry, i.e. obj 0, should be free. Attempting
// to adjust an incorrect first obj # (fixes issue 3248 and 7229).
if (i === 0 && entry.free && first === 1) {
first = 0;
}
if (!this.entries[i + first]) {
2012-01-31 23:57:12 +09:00
this.entries[i + first] = entry;
}
2011-10-25 08:55:23 +09:00
}
2013-02-07 08:19:29 +09:00
tableState.entryNum = 0;
tableState.streamPos = stream.pos;
tableState.parserBuf1 = parser.buf1;
tableState.parserBuf2 = parser.buf2;
delete tableState.firstEntryNum;
delete tableState.entryCount;
2011-10-25 08:55:23 +09:00
}
// Sanity check: as per spec, first object must be free
if (this.entries[0] && !this.entries[0].free) {
2017-06-29 05:51:31 +09:00
throw new FormatError(
'Invalid XRef table: unexpected first object');
}
2013-02-07 08:19:29 +09:00
return obj;
},
2012-02-01 00:49:06 +09:00
2013-02-07 08:19:29 +09:00
processXRefStream: function XRef_processXRefStream(stream) {
if (!('streamState' in this)) {
// Stores state of the stream as we process it so we can resume
// from middle of stream in case of missing data error
var streamParameters = stream.dict;
2013-02-07 08:19:29 +09:00
var byteWidths = streamParameters.get('W');
var range = streamParameters.get('Index');
if (!range) {
range = [0, streamParameters.get('Size')];
}
2011-10-25 08:55:23 +09:00
2013-02-07 08:19:29 +09:00
this.streamState = {
entryRanges: range,
byteWidths,
2013-02-07 08:19:29 +09:00
entryNum: 0,
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on *Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.* http://eslint.org/docs/rules/comma-dangle http://eslint.org/docs/rules/object-curly-spacing Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead. Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch. ```diff diff --git a/src/core/evaluator.js b/src/core/evaluator.js index abab9027..dcd3594b 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() { t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, }; t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, }; t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, }; - t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, }; + t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, + variableArgs: false, }; t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, }; t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, }; t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, }; diff --git a/src/core/jbig2.js b/src/core/jbig2.js index 5a17d482..71671541 100644 --- a/src/core/jbig2.js +++ b/src/core/jbig2.js @@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() { { x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, }, { x: -1, y: 0, }], [{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, }, - { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }] + { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, + { x: -1, y: 0, }] ]; var RefinementTemplates = [ { coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, - { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }], + reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, + { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, }, + { x: 0, y: 1, }, { x: 1, y: 1, }], }, { - coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, }, - { x: 0, y: 1, }, { x: 1, y: 1, }], + coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, + { x: -1, y: 0, }], + reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, + { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }], } ]; ```
2017-06-02 18:16:24 +09:00
streamPos: stream.pos,
2013-02-07 08:19:29 +09:00
};
}
this.readXRefStream(stream);
delete this.streamState;
return stream.dict;
2011-10-25 08:55:23 +09:00
},
2013-02-07 08:19:29 +09:00
readXRefStream: function XRef_readXRefStream(stream) {
2011-10-25 08:55:23 +09:00
var i, j;
2013-02-07 08:19:29 +09:00
var streamState = this.streamState;
stream.pos = streamState.streamPos;
var byteWidths = streamState.byteWidths;
var typeFieldWidth = byteWidths[0];
var offsetFieldWidth = byteWidths[1];
var generationFieldWidth = byteWidths[2];
var entryRanges = streamState.entryRanges;
while (entryRanges.length > 0) {
var first = entryRanges[0];
var n = entryRanges[1];
if (!Number.isInteger(first) || !Number.isInteger(n)) {
2017-06-29 05:51:31 +09:00
throw new FormatError(
`Invalid XRef range fields: ${first}, ${n}`);
}
if (!Number.isInteger(typeFieldWidth) ||
!Number.isInteger(offsetFieldWidth) ||
!Number.isInteger(generationFieldWidth)) {
2017-06-29 05:51:31 +09:00
throw new FormatError(
`Invalid XRef entry fields length: ${first}, ${n}`);
2011-10-25 08:55:23 +09:00
}
2013-02-07 08:19:29 +09:00
for (i = streamState.entryNum; i < n; ++i) {
streamState.entryNum = i;
streamState.streamPos = stream.pos;
2011-10-25 08:55:23 +09:00
var type = 0, offset = 0, generation = 0;
for (j = 0; j < typeFieldWidth; ++j) {
2011-10-25 08:55:23 +09:00
type = (type << 8) | stream.getByte();
}
// if type field is absent, its default value is 1
if (typeFieldWidth === 0) {
2011-10-25 08:55:23 +09:00
type = 1;
}
for (j = 0; j < offsetFieldWidth; ++j) {
2011-10-25 08:55:23 +09:00
offset = (offset << 8) | stream.getByte();
}
for (j = 0; j < generationFieldWidth; ++j) {
2011-10-25 08:55:23 +09:00
generation = (generation << 8) | stream.getByte();
}
2011-10-25 08:55:23 +09:00
var entry = {};
entry.offset = offset;
entry.gen = generation;
switch (type) {
case 0:
entry.free = true;
break;
case 1:
entry.uncompressed = true;
break;
case 2:
break;
default:
2017-06-29 05:51:31 +09:00
throw new FormatError(`Invalid XRef entry type: ${type}`);
2011-10-25 08:55:23 +09:00
}
if (!this.entries[first + i]) {
2011-10-25 08:55:23 +09:00
this.entries[first + i] = entry;
}
2011-10-25 08:55:23 +09:00
}
2013-02-07 08:19:29 +09:00
streamState.entryNum = 0;
streamState.streamPos = stream.pos;
entryRanges.splice(0, 2);
2011-10-25 08:55:23 +09:00
}
},
indexObjects: function XRef_indexObjects() {
2011-10-25 08:55:23 +09:00
// Simple scan through the PDF content to find objects,
// trailers and XRef streams.
var TAB = 0x9, LF = 0xA, CR = 0xD, SPACE = 0x20;
var PERCENT = 0x25, LT = 0x3C;
2011-10-25 08:55:23 +09:00
function readToken(data, offset) {
var token = '', ch = data[offset];
while (ch !== LF && ch !== CR && ch !== LT) {
if (++offset >= data.length) {
2011-10-25 08:55:23 +09:00
break;
}
2011-10-25 08:55:23 +09:00
token += String.fromCharCode(ch);
ch = data[offset];
}
return token;
}
function skipUntil(data, offset, what) {
var length = what.length, dataLength = data.length;
var skipped = 0;
// finding byte sequence
while (offset < dataLength) {
var i = 0;
while (i < length && data[offset + i] === what[i]) {
2011-10-25 08:55:23 +09:00
++i;
}
if (i >= length) {
2011-10-25 08:55:23 +09:00
break; // sequence found
}
2011-10-25 08:55:23 +09:00
offset++;
skipped++;
}
return skipped;
}
var objRegExp = /^(\d+)\s+(\d+)\s+obj\b/;
const endobjRegExp = /\bendobj[\b\s]$/;
const nestedObjRegExp = /\s+(\d+\s+\d+\s+obj[\b\s<])$/;
const CHECK_CONTENT_LENGTH = 25;
2011-10-25 08:55:23 +09:00
var trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]);
var startxrefBytes = new Uint8Array([115, 116, 97, 114, 116, 120, 114,
101, 102]);
const objBytes = new Uint8Array([111, 98, 106]);
2011-10-25 08:55:23 +09:00
var xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
// Clear out any existing entries, since they may be bogus.
this.entries.length = 0;
2011-10-25 08:55:23 +09:00
var stream = this.stream;
stream.pos = 0;
var buffer = stream.getBytes();
var position = stream.start, length = buffer.length;
var trailers = [], xrefStms = [];
while (position < length) {
var ch = buffer[position];
if (ch === TAB || ch === LF || ch === CR || ch === SPACE) {
2011-10-25 08:55:23 +09:00
++position;
continue;
}
if (ch === PERCENT) { // %-comment
2011-10-25 08:55:23 +09:00
do {
++position;
2013-08-24 02:57:11 +09:00
if (position >= length) {
break;
}
2011-10-25 08:55:23 +09:00
ch = buffer[position];
} while (ch !== LF && ch !== CR);
2011-10-25 08:55:23 +09:00
continue;
}
var token = readToken(buffer, position);
var m;
if (token.startsWith('xref') &&
(token.length === 4 || /\s/.test(token[4]))) {
2011-10-25 08:55:23 +09:00
position += skipUntil(buffer, position, trailerBytes);
trailers.push(position);
position += skipUntil(buffer, position, startxrefBytes);
} else if ((m = objRegExp.exec(token))) {
const num = m[1] | 0, gen = m[2] | 0;
if (typeof this.entries[num] === 'undefined') {
this.entries[num] = {
offset: position - stream.start,
gen,
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on *Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.* http://eslint.org/docs/rules/comma-dangle http://eslint.org/docs/rules/object-curly-spacing Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead. Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch. ```diff diff --git a/src/core/evaluator.js b/src/core/evaluator.js index abab9027..dcd3594b 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() { t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, }; t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, }; t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, }; - t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, }; + t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, + variableArgs: false, }; t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, }; t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, }; t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, }; diff --git a/src/core/jbig2.js b/src/core/jbig2.js index 5a17d482..71671541 100644 --- a/src/core/jbig2.js +++ b/src/core/jbig2.js @@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() { { x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, }, { x: -1, y: 0, }], [{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, }, - { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }] + { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, + { x: -1, y: 0, }] ]; var RefinementTemplates = [ { coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, - { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }], + reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, + { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, }, + { x: 0, y: 1, }, { x: 1, y: 1, }], }, { - coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, }, - { x: 0, y: 1, }, { x: 1, y: 1, }], + coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, + { x: -1, y: 0, }], + reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, + { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }], } ]; ```
2017-06-02 18:16:24 +09:00
uncompressed: true,
2015-04-04 15:15:31 +09:00
};
}
let contentLength, startPos = position + token.length;
// Find the next "obj" string, rather than "endobj", to ensure that
// we won't skip over a new 'obj' operator in corrupt files where
// 'endobj' operators are missing (fixes issue9105_reduced.pdf).
while (startPos < buffer.length) {
let endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4;
contentLength = endPos - position;
let checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, startPos);
let tokenStr = bytesToString(buffer.subarray(checkPos, endPos));
// Check if the current object ends with an 'endobj' operator.
if (endobjRegExp.test(tokenStr)) {
break;
} else {
2018-04-02 06:20:41 +09:00
// Check if an "obj" occurrence is actually a new object,
// i.e. the current object is missing the 'endobj' operator.
let objToken = nestedObjRegExp.exec(tokenStr);
if (objToken && objToken[1]) {
warn('indexObjects: Found new "obj" inside of another "obj", ' +
'caused by missing "endobj" -- trying to recover.');
contentLength -= objToken[1].length;
break;
}
}
startPos = endPos;
}
let content = buffer.subarray(position, position + contentLength);
2011-10-25 08:55:23 +09:00
// checking XRef stream suspect
// (it shall have '/XRef' and next char is not a letter)
var xrefTagOffset = skipUntil(content, 0, xrefBytes);
if (xrefTagOffset < contentLength &&
content[xrefTagOffset + 5] < 64) {
xrefStms.push(position - stream.start);
this.xrefstms[position - stream.start] = 1; // Avoid recursion
2011-10-25 08:55:23 +09:00
}
position += contentLength;
} else if (token.startsWith('trailer') &&
(token.length === 7 || /\s/.test(token[7]))) {
trailers.push(position);
position += skipUntil(buffer, position, startxrefBytes);
} else {
2011-10-25 08:55:23 +09:00
position += token.length + 1;
}
2011-10-25 08:55:23 +09:00
}
// reading XRef streams
2014-04-08 06:42:54 +09:00
var i, ii;
for (i = 0, ii = xrefStms.length; i < ii; ++i) {
2013-02-07 08:19:29 +09:00
this.startXRefQueue.push(xrefStms[i]);
this.readXRef(/* recoveryMode */ true);
2011-10-25 08:55:23 +09:00
}
// finding main trailer
let trailerDict;
2014-04-08 06:42:54 +09:00
for (i = 0, ii = trailers.length; i < ii; ++i) {
2011-10-25 08:55:23 +09:00
stream.pos = trailers[i];
const parser = new Parser({
lexer: new Lexer(stream),
xref: this,
allowStreams: true,
recoveryMode: true,
});
2011-10-25 08:55:23 +09:00
var obj = parser.getObj();
if (!isCmd(obj, 'trailer')) {
2011-10-25 08:55:23 +09:00
continue;
}
2011-10-25 08:55:23 +09:00
// read the trailer dictionary
let dict = parser.getObj();
if (!isDict(dict)) {
2011-10-25 08:55:23 +09:00
continue;
}
// Do some basic validation of the trailer/root dictionary candidate.
let rootDict;
try {
rootDict = dict.get('Root');
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
continue;
}
if (!isDict(rootDict) || !rootDict.has('Pages')) {
continue;
}
2011-10-25 08:55:23 +09:00
// taking the first one with 'ID'
if (dict.has('ID')) {
2011-10-25 08:55:23 +09:00
return dict;
}
// The current dictionary is a candidate, but continue searching.
trailerDict = dict;
2011-10-25 08:55:23 +09:00
}
// No trailer with 'ID', taking last one (if exists).
if (trailerDict) {
return trailerDict;
}
2011-10-25 08:55:23 +09:00
// nothing helps
throw new InvalidPDFException('Invalid PDF structure');
2011-10-25 08:55:23 +09:00
},
2013-02-07 08:19:29 +09:00
readXRef: function XRef_readXRef(recoveryMode) {
2011-10-25 08:55:23 +09:00
var stream = this.stream;
// Keep track of already parsed XRef tables, to prevent an infinite loop
// when parsing corrupt PDF files where e.g. the /Prev entries create a
// circular dependency between tables (fixes bug1393476.pdf).
let startXRefParsedCache = Object.create(null);
2011-12-03 06:35:18 +09:00
try {
2013-02-07 08:19:29 +09:00
while (this.startXRefQueue.length) {
var startXRef = this.startXRefQueue[0];
2012-02-01 00:49:06 +09:00
if (startXRefParsedCache[startXRef]) {
warn('readXRef - skipping XRef table since it was already parsed.');
this.startXRefQueue.shift();
continue;
}
startXRefParsedCache[startXRef] = true;
stream.pos = startXRef + stream.start;
2012-02-01 00:49:06 +09:00
const parser = new Parser({
lexer: new Lexer(stream),
xref: this,
allowStreams: true,
});
2013-02-07 08:19:29 +09:00
var obj = parser.getObj();
var dict;
// Get dictionary
if (isCmd(obj, 'xref')) {
// Parse end-of-file XRef
dict = this.processXRefTable(parser);
if (!this.topDict) {
this.topDict = dict;
2012-02-01 00:49:06 +09:00
}
2013-02-07 08:19:29 +09:00
// Recursively get other XRefs 'XRefStm', if any
obj = dict.get('XRefStm');
if (Number.isInteger(obj)) {
2013-02-07 08:19:29 +09:00
var pos = obj;
// ignore previously loaded xref streams
// (possible infinite recursion)
if (!(pos in this.xrefstms)) {
this.xrefstms[pos] = 1;
this.startXRefQueue.push(pos);
}
}
} else if (Number.isInteger(obj)) {
2013-02-07 08:19:29 +09:00
// Parse in-stream XRef
if (!Number.isInteger(parser.getObj()) ||
2013-02-07 08:19:29 +09:00
!isCmd(parser.getObj(), 'obj') ||
!isStream(obj = parser.getObj())) {
2017-06-29 05:51:31 +09:00
throw new FormatError('Invalid XRef stream');
2013-02-07 08:19:29 +09:00
}
dict = this.processXRefStream(obj);
if (!this.topDict) {
this.topDict = dict;
}
if (!dict) {
2017-06-29 05:51:31 +09:00
throw new FormatError('Failed to read XRef stream');
}
} else {
2017-06-29 05:51:31 +09:00
throw new FormatError('Invalid XRef stream header');
2012-02-01 00:49:06 +09:00
}
2013-02-07 08:19:29 +09:00
// Recursively get previous dictionary, if any
obj = dict.get('Prev');
if (Number.isInteger(obj)) {
2013-02-07 08:19:29 +09:00
this.startXRefQueue.push(obj);
} else if (isRef(obj)) {
// The spec says Prev must not be a reference, i.e. "/Prev NNN"
// This is a fallback for non-compliant PDFs, i.e. "/Prev NNN 0 R"
this.startXRefQueue.push(obj.num);
}
2012-02-01 00:49:06 +09:00
2013-02-07 08:19:29 +09:00
this.startXRefQueue.shift();
2011-10-25 08:55:23 +09:00
}
2012-02-01 00:49:06 +09:00
2013-02-07 08:19:29 +09:00
return this.topDict;
2011-12-03 06:35:18 +09:00
} catch (e) {
2013-02-07 08:19:29 +09:00
if (e instanceof MissingDataException) {
throw e;
}
2014-01-16 06:28:31 +09:00
info('(while reading XRef): ' + e);
2011-10-25 08:55:23 +09:00
}
2011-12-03 06:35:18 +09:00
if (recoveryMode) {
return undefined;
}
2013-02-07 08:19:29 +09:00
throw new XRefParseException();
2011-10-25 08:55:23 +09:00
},
2013-02-07 08:19:29 +09:00
getEntry: function XRef_getEntry(i) {
var xrefEntry = this.entries[i];
if (xrefEntry && !xrefEntry.free && xrefEntry.offset) {
return xrefEntry;
}
return null;
2011-10-25 08:55:23 +09:00
},
fetchIfRef: function XRef_fetchIfRef(obj, suppressEncryption) {
if (obj instanceof Ref) {
return this.fetch(obj, suppressEncryption);
}
return obj;
2011-10-25 08:55:23 +09:00
},
fetch: function XRef_fetch(ref, suppressEncryption) {
if (!(ref instanceof Ref)) {
throw new Error('ref object is not a reference');
}
const num = ref.num;
if (this._cacheMap.has(num)) {
const cacheEntry = this._cacheMap.get(num);
// In documents with Object Streams, it's possible that cached `Dict`s
// have not been assigned an `objId` yet (see e.g. issue3115r.pdf).
if (cacheEntry instanceof Dict && !cacheEntry.objId) {
cacheEntry.objId = ref.toString();
}
return cacheEntry;
}
let xrefEntry = this.getEntry(num);
if (xrefEntry === null) { // The referenced entry can be free.
this._cacheMap.set(num, xrefEntry);
return xrefEntry;
}
if (xrefEntry.uncompressed) {
xrefEntry = this.fetchUncompressed(ref, xrefEntry, suppressEncryption);
} else {
Check that the first page can be successfully loaded, to try and ascertain the validity of the XRef table (issue 7496, issue 10326) For PDF documents with sufficiently broken XRef tables, it's usually quite obvious when you need to fallback to indexing the entire file. However, for certain kinds of corrupted PDF documents the XRef table will, for all intents and purposes, appear to be valid. It's not until you actually try to fetch various objects that things will start to break, which is the case in the referenced issues[1]. Since there's generally a real effort being in made PDF.js to load even corrupt PDF documents, this patch contains a suggested approach to attempt to do a bit more validation of the XRef table during the initial document loading phase. Here the choice is made to attempt to load the *first* page, as a basic sanity check of the validity of the XRef table. Please note that attempting to load a more-or-less arbitrarily chosen object without any context of what it's supposed to be isn't a very useful, which is why this particular choice was made. Obviously, just because the first page can be loaded successfully that doesn't guarantee that the *entire* XRef table is valid, however if even the first page fails to load you can be reasonably sure that the document is *not* valid[2]. Even though this patch won't cause any significant increase in the amount of parsing required during initial loading of the document[3], it will require loading of more data upfront which thus delays the initial `getDocument` call. Whether or not this is a problem depends very much on what you actually measure, please consider the following examples: ```javascript console.time('first'); getDocument(...).promise.then((pdfDocument) => { console.timeEnd('first'); }); console.time('second'); getDocument(...).promise.then((pdfDocument) => { pdfDocument.getPage(1).then((pdfPage) => { // Note: the API uses `pageNumber >= 1`, the Worker uses `pageIndex >= 0`. console.timeEnd('second'); }); }); ``` The first case is pretty much guaranteed to show a small regression, however the second case won't be affected at all since the Worker caches the result of `getPage` calls. Again, please remember that the second case is what matters for the standard PDF.js use-case which is why I'm hoping that this patch is deemed acceptable. --- [1] In issue 7496, the problem is that the document is edited without the XRef table being correctly updated. In issue 10326, the generator was sorting the XRef table according to the offsets rather than the objects. [2] The idea of checking the first page in particular came from the "standard" use-case for the PDF.js library, i.e. the default viewer, where a failure to load the first page basically means that nothing will work; note how `{BaseViewer, PDFThumbnailViewer}.setDocument` depends completely on being able to fetch the *first* page. [3] The only extra parsing is caused by, potentially, having to traverse *part* of the `Pages` tree to find the first page.
2018-12-05 05:51:27 +09:00
xrefEntry = this.fetchCompressed(ref, xrefEntry, suppressEncryption);
}
if (isDict(xrefEntry)) {
xrefEntry.objId = ref.toString();
} else if (isStream(xrefEntry)) {
xrefEntry.dict.objId = ref.toString();
}
return xrefEntry;
},
2011-10-25 08:55:23 +09:00
Check that the first page can be successfully loaded, to try and ascertain the validity of the XRef table (issue 7496, issue 10326) For PDF documents with sufficiently broken XRef tables, it's usually quite obvious when you need to fallback to indexing the entire file. However, for certain kinds of corrupted PDF documents the XRef table will, for all intents and purposes, appear to be valid. It's not until you actually try to fetch various objects that things will start to break, which is the case in the referenced issues[1]. Since there's generally a real effort being in made PDF.js to load even corrupt PDF documents, this patch contains a suggested approach to attempt to do a bit more validation of the XRef table during the initial document loading phase. Here the choice is made to attempt to load the *first* page, as a basic sanity check of the validity of the XRef table. Please note that attempting to load a more-or-less arbitrarily chosen object without any context of what it's supposed to be isn't a very useful, which is why this particular choice was made. Obviously, just because the first page can be loaded successfully that doesn't guarantee that the *entire* XRef table is valid, however if even the first page fails to load you can be reasonably sure that the document is *not* valid[2]. Even though this patch won't cause any significant increase in the amount of parsing required during initial loading of the document[3], it will require loading of more data upfront which thus delays the initial `getDocument` call. Whether or not this is a problem depends very much on what you actually measure, please consider the following examples: ```javascript console.time('first'); getDocument(...).promise.then((pdfDocument) => { console.timeEnd('first'); }); console.time('second'); getDocument(...).promise.then((pdfDocument) => { pdfDocument.getPage(1).then((pdfPage) => { // Note: the API uses `pageNumber >= 1`, the Worker uses `pageIndex >= 0`. console.timeEnd('second'); }); }); ``` The first case is pretty much guaranteed to show a small regression, however the second case won't be affected at all since the Worker caches the result of `getPage` calls. Again, please remember that the second case is what matters for the standard PDF.js use-case which is why I'm hoping that this patch is deemed acceptable. --- [1] In issue 7496, the problem is that the document is edited without the XRef table being correctly updated. In issue 10326, the generator was sorting the XRef table according to the offsets rather than the objects. [2] The idea of checking the first page in particular came from the "standard" use-case for the PDF.js library, i.e. the default viewer, where a failure to load the first page basically means that nothing will work; note how `{BaseViewer, PDFThumbnailViewer}.setDocument` depends completely on being able to fetch the *first* page. [3] The only extra parsing is caused by, potentially, having to traverse *part* of the `Pages` tree to find the first page.
2018-12-05 05:51:27 +09:00
fetchUncompressed(ref, xrefEntry, suppressEncryption = false) {
2011-10-25 08:55:23 +09:00
var gen = ref.gen;
var num = ref.num;
if (xrefEntry.gen !== gen) {
Check that the first page can be successfully loaded, to try and ascertain the validity of the XRef table (issue 7496, issue 10326) For PDF documents with sufficiently broken XRef tables, it's usually quite obvious when you need to fallback to indexing the entire file. However, for certain kinds of corrupted PDF documents the XRef table will, for all intents and purposes, appear to be valid. It's not until you actually try to fetch various objects that things will start to break, which is the case in the referenced issues[1]. Since there's generally a real effort being in made PDF.js to load even corrupt PDF documents, this patch contains a suggested approach to attempt to do a bit more validation of the XRef table during the initial document loading phase. Here the choice is made to attempt to load the *first* page, as a basic sanity check of the validity of the XRef table. Please note that attempting to load a more-or-less arbitrarily chosen object without any context of what it's supposed to be isn't a very useful, which is why this particular choice was made. Obviously, just because the first page can be loaded successfully that doesn't guarantee that the *entire* XRef table is valid, however if even the first page fails to load you can be reasonably sure that the document is *not* valid[2]. Even though this patch won't cause any significant increase in the amount of parsing required during initial loading of the document[3], it will require loading of more data upfront which thus delays the initial `getDocument` call. Whether or not this is a problem depends very much on what you actually measure, please consider the following examples: ```javascript console.time('first'); getDocument(...).promise.then((pdfDocument) => { console.timeEnd('first'); }); console.time('second'); getDocument(...).promise.then((pdfDocument) => { pdfDocument.getPage(1).then((pdfPage) => { // Note: the API uses `pageNumber >= 1`, the Worker uses `pageIndex >= 0`. console.timeEnd('second'); }); }); ``` The first case is pretty much guaranteed to show a small regression, however the second case won't be affected at all since the Worker caches the result of `getPage` calls. Again, please remember that the second case is what matters for the standard PDF.js use-case which is why I'm hoping that this patch is deemed acceptable. --- [1] In issue 7496, the problem is that the document is edited without the XRef table being correctly updated. In issue 10326, the generator was sorting the XRef table according to the offsets rather than the objects. [2] The idea of checking the first page in particular came from the "standard" use-case for the PDF.js library, i.e. the default viewer, where a failure to load the first page basically means that nothing will work; note how `{BaseViewer, PDFThumbnailViewer}.setDocument` depends completely on being able to fetch the *first* page. [3] The only extra parsing is caused by, potentially, having to traverse *part* of the `Pages` tree to find the first page.
2018-12-05 05:51:27 +09:00
throw new XRefEntryException(`Inconsistent generation in XRef: ${ref}`);
}
var stream = this.stream.makeSubStream(xrefEntry.offset +
this.stream.start);
const parser = new Parser({
lexer: new Lexer(stream),
xref: this,
allowStreams: true,
});
var obj1 = parser.getObj();
var obj2 = parser.getObj();
var obj3 = parser.getObj();
if (!Number.isInteger(obj1)) {
obj1 = parseInt(obj1, 10);
}
if (!Number.isInteger(obj2)) {
obj2 = parseInt(obj2, 10);
}
if (obj1 !== num || obj2 !== gen || !(obj3 instanceof Cmd)) {
Check that the first page can be successfully loaded, to try and ascertain the validity of the XRef table (issue 7496, issue 10326) For PDF documents with sufficiently broken XRef tables, it's usually quite obvious when you need to fallback to indexing the entire file. However, for certain kinds of corrupted PDF documents the XRef table will, for all intents and purposes, appear to be valid. It's not until you actually try to fetch various objects that things will start to break, which is the case in the referenced issues[1]. Since there's generally a real effort being in made PDF.js to load even corrupt PDF documents, this patch contains a suggested approach to attempt to do a bit more validation of the XRef table during the initial document loading phase. Here the choice is made to attempt to load the *first* page, as a basic sanity check of the validity of the XRef table. Please note that attempting to load a more-or-less arbitrarily chosen object without any context of what it's supposed to be isn't a very useful, which is why this particular choice was made. Obviously, just because the first page can be loaded successfully that doesn't guarantee that the *entire* XRef table is valid, however if even the first page fails to load you can be reasonably sure that the document is *not* valid[2]. Even though this patch won't cause any significant increase in the amount of parsing required during initial loading of the document[3], it will require loading of more data upfront which thus delays the initial `getDocument` call. Whether or not this is a problem depends very much on what you actually measure, please consider the following examples: ```javascript console.time('first'); getDocument(...).promise.then((pdfDocument) => { console.timeEnd('first'); }); console.time('second'); getDocument(...).promise.then((pdfDocument) => { pdfDocument.getPage(1).then((pdfPage) => { // Note: the API uses `pageNumber >= 1`, the Worker uses `pageIndex >= 0`. console.timeEnd('second'); }); }); ``` The first case is pretty much guaranteed to show a small regression, however the second case won't be affected at all since the Worker caches the result of `getPage` calls. Again, please remember that the second case is what matters for the standard PDF.js use-case which is why I'm hoping that this patch is deemed acceptable. --- [1] In issue 7496, the problem is that the document is edited without the XRef table being correctly updated. In issue 10326, the generator was sorting the XRef table according to the offsets rather than the objects. [2] The idea of checking the first page in particular came from the "standard" use-case for the PDF.js library, i.e. the default viewer, where a failure to load the first page basically means that nothing will work; note how `{BaseViewer, PDFThumbnailViewer}.setDocument` depends completely on being able to fetch the *first* page. [3] The only extra parsing is caused by, potentially, having to traverse *part* of the `Pages` tree to find the first page.
2018-12-05 05:51:27 +09:00
throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`);
}
if (obj3.cmd !== 'obj') {
// some bad PDFs use "obj1234" and really mean 1234
if (obj3.cmd.startsWith('obj')) {
num = parseInt(obj3.cmd.substring(3), 10);
if (!Number.isNaN(num)) {
return num;
2011-10-25 08:55:23 +09:00
}
}
Check that the first page can be successfully loaded, to try and ascertain the validity of the XRef table (issue 7496, issue 10326) For PDF documents with sufficiently broken XRef tables, it's usually quite obvious when you need to fallback to indexing the entire file. However, for certain kinds of corrupted PDF documents the XRef table will, for all intents and purposes, appear to be valid. It's not until you actually try to fetch various objects that things will start to break, which is the case in the referenced issues[1]. Since there's generally a real effort being in made PDF.js to load even corrupt PDF documents, this patch contains a suggested approach to attempt to do a bit more validation of the XRef table during the initial document loading phase. Here the choice is made to attempt to load the *first* page, as a basic sanity check of the validity of the XRef table. Please note that attempting to load a more-or-less arbitrarily chosen object without any context of what it's supposed to be isn't a very useful, which is why this particular choice was made. Obviously, just because the first page can be loaded successfully that doesn't guarantee that the *entire* XRef table is valid, however if even the first page fails to load you can be reasonably sure that the document is *not* valid[2]. Even though this patch won't cause any significant increase in the amount of parsing required during initial loading of the document[3], it will require loading of more data upfront which thus delays the initial `getDocument` call. Whether or not this is a problem depends very much on what you actually measure, please consider the following examples: ```javascript console.time('first'); getDocument(...).promise.then((pdfDocument) => { console.timeEnd('first'); }); console.time('second'); getDocument(...).promise.then((pdfDocument) => { pdfDocument.getPage(1).then((pdfPage) => { // Note: the API uses `pageNumber >= 1`, the Worker uses `pageIndex >= 0`. console.timeEnd('second'); }); }); ``` The first case is pretty much guaranteed to show a small regression, however the second case won't be affected at all since the Worker caches the result of `getPage` calls. Again, please remember that the second case is what matters for the standard PDF.js use-case which is why I'm hoping that this patch is deemed acceptable. --- [1] In issue 7496, the problem is that the document is edited without the XRef table being correctly updated. In issue 10326, the generator was sorting the XRef table according to the offsets rather than the objects. [2] The idea of checking the first page in particular came from the "standard" use-case for the PDF.js library, i.e. the default viewer, where a failure to load the first page basically means that nothing will work; note how `{BaseViewer, PDFThumbnailViewer}.setDocument` depends completely on being able to fetch the *first* page. [3] The only extra parsing is caused by, potentially, having to traverse *part* of the `Pages` tree to find the first page.
2018-12-05 05:51:27 +09:00
throw new XRefEntryException(`Bad (uncompressed) XRef entry: ${ref}`);
}
if (this.encrypt && !suppressEncryption) {
xrefEntry = parser.getObj(this.encrypt.createCipherTransform(num, gen));
} else {
xrefEntry = parser.getObj();
2011-10-25 08:55:23 +09:00
}
if (!isStream(xrefEntry)) {
this._cacheMap.set(num, xrefEntry);
}
return xrefEntry;
},
2011-10-25 08:55:23 +09:00
Check that the first page can be successfully loaded, to try and ascertain the validity of the XRef table (issue 7496, issue 10326) For PDF documents with sufficiently broken XRef tables, it's usually quite obvious when you need to fallback to indexing the entire file. However, for certain kinds of corrupted PDF documents the XRef table will, for all intents and purposes, appear to be valid. It's not until you actually try to fetch various objects that things will start to break, which is the case in the referenced issues[1]. Since there's generally a real effort being in made PDF.js to load even corrupt PDF documents, this patch contains a suggested approach to attempt to do a bit more validation of the XRef table during the initial document loading phase. Here the choice is made to attempt to load the *first* page, as a basic sanity check of the validity of the XRef table. Please note that attempting to load a more-or-less arbitrarily chosen object without any context of what it's supposed to be isn't a very useful, which is why this particular choice was made. Obviously, just because the first page can be loaded successfully that doesn't guarantee that the *entire* XRef table is valid, however if even the first page fails to load you can be reasonably sure that the document is *not* valid[2]. Even though this patch won't cause any significant increase in the amount of parsing required during initial loading of the document[3], it will require loading of more data upfront which thus delays the initial `getDocument` call. Whether or not this is a problem depends very much on what you actually measure, please consider the following examples: ```javascript console.time('first'); getDocument(...).promise.then((pdfDocument) => { console.timeEnd('first'); }); console.time('second'); getDocument(...).promise.then((pdfDocument) => { pdfDocument.getPage(1).then((pdfPage) => { // Note: the API uses `pageNumber >= 1`, the Worker uses `pageIndex >= 0`. console.timeEnd('second'); }); }); ``` The first case is pretty much guaranteed to show a small regression, however the second case won't be affected at all since the Worker caches the result of `getPage` calls. Again, please remember that the second case is what matters for the standard PDF.js use-case which is why I'm hoping that this patch is deemed acceptable. --- [1] In issue 7496, the problem is that the document is edited without the XRef table being correctly updated. In issue 10326, the generator was sorting the XRef table according to the offsets rather than the objects. [2] The idea of checking the first page in particular came from the "standard" use-case for the PDF.js library, i.e. the default viewer, where a failure to load the first page basically means that nothing will work; note how `{BaseViewer, PDFThumbnailViewer}.setDocument` depends completely on being able to fetch the *first* page. [3] The only extra parsing is caused by, potentially, having to traverse *part* of the `Pages` tree to find the first page.
2018-12-05 05:51:27 +09:00
fetchCompressed(ref, xrefEntry, suppressEncryption = false) {
var tableOffset = xrefEntry.offset;
var stream = this.fetch(Ref.get(tableOffset, 0));
if (!isStream(stream)) {
2017-06-29 05:51:31 +09:00
throw new FormatError('bad ObjStm stream');
}
var first = stream.dict.get('First');
var n = stream.dict.get('N');
if (!Number.isInteger(first) || !Number.isInteger(n)) {
2017-06-29 05:51:31 +09:00
throw new FormatError(
'invalid first and n parameters for ObjStm stream');
2011-10-25 08:55:23 +09:00
}
const parser = new Parser({
lexer: new Lexer(stream),
xref: this,
allowStreams: true,
});
var i, entries = [], num, nums = [];
2011-10-25 08:55:23 +09:00
// read the object numbers to populate cache
for (i = 0; i < n; ++i) {
num = parser.getObj();
if (!Number.isInteger(num)) {
2017-06-29 05:51:31 +09:00
throw new FormatError(
`invalid object number in the ObjStm stream: ${num}`);
2011-10-25 08:55:23 +09:00
}
nums.push(num);
var offset = parser.getObj();
if (!Number.isInteger(offset)) {
2017-06-29 05:51:31 +09:00
throw new FormatError(
`invalid object offset in the ObjStm stream: ${offset}`);
2011-10-25 08:55:23 +09:00
}
}
// read stream objects for cache
for (i = 0; i < n; ++i) {
entries.push(parser.getObj());
// The ObjStm should not contain 'endobj'. If it's present, skip over it
// to support corrupt PDFs (fixes issue 5241, bug 898610, bug 1037816).
if (isCmd(parser.buf1, 'endobj')) {
parser.shift();
}
num = nums[i];
var entry = this.entries[num];
if (entry && entry.offset === tableOffset && entry.gen === i) {
this._cacheMap.set(num, entries[i]);
}
2011-10-25 08:55:23 +09:00
}
xrefEntry = entries[xrefEntry.gen];
if (xrefEntry === undefined) {
Check that the first page can be successfully loaded, to try and ascertain the validity of the XRef table (issue 7496, issue 10326) For PDF documents with sufficiently broken XRef tables, it's usually quite obvious when you need to fallback to indexing the entire file. However, for certain kinds of corrupted PDF documents the XRef table will, for all intents and purposes, appear to be valid. It's not until you actually try to fetch various objects that things will start to break, which is the case in the referenced issues[1]. Since there's generally a real effort being in made PDF.js to load even corrupt PDF documents, this patch contains a suggested approach to attempt to do a bit more validation of the XRef table during the initial document loading phase. Here the choice is made to attempt to load the *first* page, as a basic sanity check of the validity of the XRef table. Please note that attempting to load a more-or-less arbitrarily chosen object without any context of what it's supposed to be isn't a very useful, which is why this particular choice was made. Obviously, just because the first page can be loaded successfully that doesn't guarantee that the *entire* XRef table is valid, however if even the first page fails to load you can be reasonably sure that the document is *not* valid[2]. Even though this patch won't cause any significant increase in the amount of parsing required during initial loading of the document[3], it will require loading of more data upfront which thus delays the initial `getDocument` call. Whether or not this is a problem depends very much on what you actually measure, please consider the following examples: ```javascript console.time('first'); getDocument(...).promise.then((pdfDocument) => { console.timeEnd('first'); }); console.time('second'); getDocument(...).promise.then((pdfDocument) => { pdfDocument.getPage(1).then((pdfPage) => { // Note: the API uses `pageNumber >= 1`, the Worker uses `pageIndex >= 0`. console.timeEnd('second'); }); }); ``` The first case is pretty much guaranteed to show a small regression, however the second case won't be affected at all since the Worker caches the result of `getPage` calls. Again, please remember that the second case is what matters for the standard PDF.js use-case which is why I'm hoping that this patch is deemed acceptable. --- [1] In issue 7496, the problem is that the document is edited without the XRef table being correctly updated. In issue 10326, the generator was sorting the XRef table according to the offsets rather than the objects. [2] The idea of checking the first page in particular came from the "standard" use-case for the PDF.js library, i.e. the default viewer, where a failure to load the first page basically means that nothing will work; note how `{BaseViewer, PDFThumbnailViewer}.setDocument` depends completely on being able to fetch the *first* page. [3] The only extra parsing is caused by, potentially, having to traverse *part* of the `Pages` tree to find the first page.
2018-12-05 05:51:27 +09:00
throw new XRefEntryException(`Bad (compressed) XRef entry: ${ref}`);
2011-10-25 08:55:23 +09:00
}
return xrefEntry;
2011-10-25 08:55:23 +09:00
},
async fetchIfRefAsync(obj, suppressEncryption) {
if (obj instanceof Ref) {
return this.fetchAsync(obj, suppressEncryption);
}
return obj;
},
async fetchAsync(ref, suppressEncryption) {
try {
return this.fetch(ref, suppressEncryption);
} catch (ex) {
if (!(ex instanceof MissingDataException)) {
throw ex;
}
await this.pdfManager.requestRange(ex.begin, ex.end);
return this.fetchAsync(ref, suppressEncryption);
}
},
getCatalogObj: function XRef_getCatalogObj() {
2012-04-06 00:12:48 +09:00
return this.root;
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on *Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.* http://eslint.org/docs/rules/comma-dangle http://eslint.org/docs/rules/object-curly-spacing Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead. Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch. ```diff diff --git a/src/core/evaluator.js b/src/core/evaluator.js index abab9027..dcd3594b 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() { t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, }; t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, }; t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, }; - t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, }; + t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, + variableArgs: false, }; t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, }; t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, }; t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, }; diff --git a/src/core/jbig2.js b/src/core/jbig2.js index 5a17d482..71671541 100644 --- a/src/core/jbig2.js +++ b/src/core/jbig2.js @@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() { { x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, }, { x: -1, y: 0, }], [{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, }, - { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }] + { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, + { x: -1, y: 0, }] ]; var RefinementTemplates = [ { coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, - { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }], + reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, + { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, }, + { x: 0, y: 1, }, { x: 1, y: 1, }], }, { - coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, }, - { x: 0, y: 1, }, { x: 1, y: 1, }], + coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, + { x: -1, y: 0, }], + reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, + { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }], } ]; ```
2017-06-02 18:16:24 +09:00
},
2011-10-25 08:55:23 +09:00
};
2011-12-07 07:18:40 +09:00
return XRef;
2011-10-25 08:55:23 +09:00
})();
/**
2015-12-26 01:35:21 +09:00
* A NameTree/NumberTree is like a Dict but has some advantageous properties,
* see the specification (7.9.6 and 7.9.7) for additional details.
* TODO: implement all the Dict functions and make this more efficient.
*/
class NameOrNumberTree {
constructor(root, xref, type) {
if (this.constructor === NameOrNumberTree) {
unreachable('Cannot initialize NameOrNumberTree.');
}
this.root = root;
this.xref = xref;
this._type = type;
}
getAll() {
const dict = Object.create(null);
if (!this.root) {
return dict;
}
const xref = this.xref;
// Reading Name/Number tree.
const processed = new RefSet();
processed.put(this.root);
const queue = [this.root];
while (queue.length > 0) {
const obj = xref.fetchIfRef(queue.shift());
if (!isDict(obj)) {
continue;
}
if (obj.has('Kids')) {
const kids = obj.get('Kids');
for (let i = 0, ii = kids.length; i < ii; i++) {
const kid = kids[i];
if (processed.has(kid)) {
throw new FormatError(`Duplicate entry in "${this._type}" tree.`);
}
queue.push(kid);
processed.put(kid);
}
continue;
}
const entries = obj.get(this._type);
if (Array.isArray(entries)) {
for (let i = 0, ii = entries.length; i < ii; i += 2) {
dict[xref.fetchIfRef(entries[i])] = xref.fetchIfRef(entries[i + 1]);
}
}
}
return dict;
}
get(key) {
if (!this.root) {
return null;
}
const xref = this.xref;
let kidsOrEntries = xref.fetchIfRef(this.root);
let loopCount = 0;
const MAX_LEVELS = 10;
// Perform a binary search to quickly find the entry that
// contains the key we are looking for.
while (kidsOrEntries.has('Kids')) {
if (++loopCount > MAX_LEVELS) {
warn(`Search depth limit reached for "${this._type}" tree.`);
return null;
}
const kids = kidsOrEntries.get('Kids');
if (!Array.isArray(kids)) {
return null;
}
2015-02-03 00:12:52 +09:00
let l = 0, r = kids.length - 1;
while (l <= r) {
const m = (l + r) >> 1;
const kid = xref.fetchIfRef(kids[m]);
const limits = kid.get('Limits');
if (key < xref.fetchIfRef(limits[0])) {
r = m - 1;
} else if (key > xref.fetchIfRef(limits[1])) {
l = m + 1;
} else {
kidsOrEntries = xref.fetchIfRef(kids[m]);
break;
}
}
if (l > r) {
return null;
}
}
// If we get here, then we have found the right entry. Now go through the
// entries in the dictionary until we find the key we're looking for.
const entries = kidsOrEntries.get(this._type);
if (Array.isArray(entries)) {
// Perform a binary search to reduce the lookup time.
let l = 0, r = entries.length - 2;
while (l <= r) {
// Check only even indices (0, 2, 4, ...) because the
// odd indices contain the actual data.
const tmp = (l + r) >> 1, m = tmp + (tmp & 1);
const currentKey = xref.fetchIfRef(entries[m]);
if (key < currentKey) {
r = m - 2;
} else if (key > currentKey) {
l = m + 2;
} else {
return xref.fetchIfRef(entries[m + 1]);
}
}
// Fallback to an exhaustive search, in an attempt to handle corrupt
// PDF files where keys are not correctly ordered (fixes issue 10272).
info(`Falling back to an exhaustive search, for key "${key}", ` +
`in "${this._type}" tree.`);
for (let m = 0, mm = entries.length; m < mm; m += 2) {
const currentKey = xref.fetchIfRef(entries[m]);
if (currentKey === key) {
warn(`The "${key}" key was found at an incorrect, ` +
`i.e. out-of-order, position in "${this._type}" tree.`);
return xref.fetchIfRef(entries[m + 1]);
}
}
}
return null;
2015-12-26 01:35:21 +09:00
}
}
2015-12-26 01:35:21 +09:00
class NameTree extends NameOrNumberTree {
constructor(root, xref) {
super(root, xref, 'Names');
2015-12-26 01:35:21 +09:00
}
}
2015-12-26 01:35:21 +09:00
class NumberTree extends NameOrNumberTree {
constructor(root, xref) {
super(root, xref, 'Nums');
}
}
2015-12-26 01:35:21 +09:00
/**
2015-02-03 00:12:52 +09:00
* "A PDF file can refer to the contents of another file by using a File
* Specification (PDF 1.1)", see the spec (7.11) for more details.
* NOTE: Only embedded files are supported (as part of the attachments support)
2015-02-03 00:12:52 +09:00
* TODO: support the 'URL' file system (with caching if !/V), portable
* collections attributes and related files (/RF)
*/
var FileSpec = (function FileSpecClosure() {
function FileSpec(root, xref) {
if (!root || !isDict(root)) {
return;
}
this.xref = xref;
this.root = root;
if (root.has('FS')) {
this.fs = root.get('FS');
}
this.description = root.has('Desc') ?
stringToPDFString(root.get('Desc')) :
'';
if (root.has('RF')) {
warn('Related file specifications are not supported');
}
this.contentAvailable = true;
if (!root.has('EF')) {
this.contentAvailable = false;
warn('Non-embedded file specifications are not supported');
}
}
function pickPlatformItem(dict) {
// Look for the filename in this order:
// UF, F, Unix, Mac, DOS
if (dict.has('UF')) {
return dict.get('UF');
} else if (dict.has('F')) {
return dict.get('F');
} else if (dict.has('Unix')) {
return dict.get('Unix');
} else if (dict.has('Mac')) {
return dict.get('Mac');
} else if (dict.has('DOS')) {
return dict.get('DOS');
}
return null;
}
FileSpec.prototype = {
get filename() {
if (!this._filename && this.root) {
var filename = pickPlatformItem(this.root) || 'unnamed';
this._filename = stringToPDFString(filename).
replace(/\\\\/g, '\\').
replace(/\\\//g, '/').
replace(/\\/g, '/');
}
return this._filename;
},
get content() {
if (!this.contentAvailable) {
return null;
}
if (!this.contentRef && this.root) {
this.contentRef = pickPlatformItem(this.root.get('EF'));
}
var content = null;
if (this.contentRef) {
var xref = this.xref;
var fileObj = xref.fetchIfRef(this.contentRef);
if (fileObj && isStream(fileObj)) {
content = fileObj.getBytes();
} else {
warn('Embedded file specification points to non-existing/invalid ' +
'content');
}
} else {
warn('Embedded file specification does not have a content');
}
return content;
},
get serializable() {
return {
filename: this.filename,
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on *Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.* http://eslint.org/docs/rules/comma-dangle http://eslint.org/docs/rules/object-curly-spacing Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead. Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch. ```diff diff --git a/src/core/evaluator.js b/src/core/evaluator.js index abab9027..dcd3594b 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() { t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, }; t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, }; t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, }; - t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, }; + t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, + variableArgs: false, }; t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, }; t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, }; t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, }; diff --git a/src/core/jbig2.js b/src/core/jbig2.js index 5a17d482..71671541 100644 --- a/src/core/jbig2.js +++ b/src/core/jbig2.js @@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() { { x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, }, { x: -1, y: 0, }], [{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, }, - { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }] + { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, + { x: -1, y: 0, }] ]; var RefinementTemplates = [ { coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, - { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }], + reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, + { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, }, + { x: 0, y: 1, }, { x: 1, y: 1, }], }, { - coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, }, - { x: 0, y: 1, }, { x: 1, y: 1, }], + coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, + { x: -1, y: 0, }], + reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, + { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }], } ]; ```
2017-06-02 18:16:24 +09:00
content: this.content,
};
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on *Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.* http://eslint.org/docs/rules/comma-dangle http://eslint.org/docs/rules/object-curly-spacing Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead. Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch. ```diff diff --git a/src/core/evaluator.js b/src/core/evaluator.js index abab9027..dcd3594b 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() { t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, }; t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, }; t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, }; - t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, }; + t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, + variableArgs: false, }; t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, }; t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, }; t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, }; diff --git a/src/core/jbig2.js b/src/core/jbig2.js index 5a17d482..71671541 100644 --- a/src/core/jbig2.js +++ b/src/core/jbig2.js @@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() { { x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, }, { x: -1, y: 0, }], [{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, }, - { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }] + { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, + { x: -1, y: 0, }] ]; var RefinementTemplates = [ { coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, - { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }], + reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, + { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, }, + { x: 0, y: 1, }, { x: 1, y: 1, }], }, { - coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, }, - { x: 0, y: 1, }, { x: 1, y: 1, }], + coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, + { x: -1, y: 0, }], + reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, + { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }], } ]; ```
2017-06-02 18:16:24 +09:00
},
};
return FileSpec;
})();
/**
* A helper for loading missing data in `Dict` graphs. It traverses the graph
* depth first and queues up any objects that have missing data. Once it has
* has traversed as many objects that are available it attempts to bundle the
* missing data requests and then resume from the nodes that weren't ready.
*
* NOTE: It provides protection from circular references by keeping track of
* loaded references. However, you must be careful not to load any graphs
* that have references to the catalog or other pages since that will cause the
* entire PDF document object graph to be traversed.
*/
let ObjectLoader = (function() {
function mayHaveChildren(value) {
return isRef(value) || isDict(value) || Array.isArray(value) ||
isStream(value);
}
function addChildren(node, nodesToVisit) {
if (isDict(node) || isStream(node)) {
let dict = isDict(node) ? node : node.dict;
let dictKeys = dict.getKeys();
for (let i = 0, ii = dictKeys.length; i < ii; i++) {
let rawValue = dict.getRaw(dictKeys[i]);
if (mayHaveChildren(rawValue)) {
nodesToVisit.push(rawValue);
}
}
} else if (Array.isArray(node)) {
for (let i = 0, ii = node.length; i < ii; i++) {
let value = node[i];
if (mayHaveChildren(value)) {
nodesToVisit.push(value);
}
}
}
}
function ObjectLoader(dict, keys, xref) {
this.dict = dict;
this.keys = keys;
this.xref = xref;
this.refSet = null;
this.capability = null;
}
ObjectLoader.prototype = {
load() {
this.capability = createPromiseCapability();
// Don't walk the graph if all the data is already loaded.
if (!(this.xref.stream instanceof ChunkedStream) ||
this.xref.stream.getMissingChunks().length === 0) {
this.capability.resolve();
return this.capability.promise;
}
let { keys, dict, } = this;
this.refSet = new RefSet();
// Setup the initial nodes to visit.
let nodesToVisit = [];
for (let i = 0, ii = keys.length; i < ii; i++) {
let rawValue = dict.getRaw(keys[i]);
// Skip nodes that are guaranteed to be empty.
if (rawValue !== undefined) {
nodesToVisit.push(rawValue);
}
}
this._walk(nodesToVisit);
return this.capability.promise;
},
_walk(nodesToVisit) {
let nodesToRevisit = [];
let pendingRequests = [];
// DFS walk of the object graph.
while (nodesToVisit.length) {
let currentNode = nodesToVisit.pop();
// Only references or chunked streams can cause missing data exceptions.
if (isRef(currentNode)) {
// Skip nodes that have already been visited.
if (this.refSet.has(currentNode)) {
continue;
}
try {
this.refSet.put(currentNode);
currentNode = this.xref.fetch(currentNode);
} catch (ex) {
if (!(ex instanceof MissingDataException)) {
throw ex;
}
nodesToRevisit.push(currentNode);
pendingRequests.push({ begin: ex.begin, end: ex.end, });
}
}
if (currentNode && currentNode.getBaseStreams) {
let baseStreams = currentNode.getBaseStreams();
let foundMissingData = false;
for (let i = 0, ii = baseStreams.length; i < ii; i++) {
let stream = baseStreams[i];
if (stream.getMissingChunks && stream.getMissingChunks().length) {
foundMissingData = true;
pendingRequests.push({ begin: stream.start, end: stream.end, });
}
}
if (foundMissingData) {
nodesToRevisit.push(currentNode);
}
}
addChildren(currentNode, nodesToVisit);
}
if (pendingRequests.length) {
this.xref.stream.manager.requestRanges(pendingRequests).then(() => {
for (let i = 0, ii = nodesToRevisit.length; i < ii; i++) {
let node = nodesToRevisit[i];
// Remove any reference nodes from the current `RefSet` so they
// aren't skipped when we revist them.
if (isRef(node)) {
this.refSet.remove(node);
}
}
this._walk(nodesToRevisit);
}, this.capability.reject);
return;
}
// Everything is loaded.
this.refSet = null;
this.capability.resolve();
Fix inconsistent spacing and trailing commas in objects in `src/core/` files, so we can enable the `comma-dangle` and `object-curly-spacing` ESLint rules later on *Unfortunately this patch is fairly big, even though it only covers the `src/core` folder, but splitting it even further seemed difficult.* http://eslint.org/docs/rules/comma-dangle http://eslint.org/docs/rules/object-curly-spacing Given that we currently have quite inconsistent object formatting, fixing this in *one* big patch probably wouldn't be feasible (since I cannot imagine anyone wanting to review that); hence I've opted to try and do this piecewise instead. Please note: This patch was created automatically, using the ESLint --fix command line option. In a couple of places this caused lines to become too long, and I've fixed those manually; please refer to the interdiff below for the only hand-edits in this patch. ```diff diff --git a/src/core/evaluator.js b/src/core/evaluator.js index abab9027..dcd3594b 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2785,7 +2785,8 @@ var EvaluatorPreprocessor = (function EvaluatorPreprocessorClosure() { t['Tz'] = { id: OPS.setHScale, numArgs: 1, variableArgs: false, }; t['TL'] = { id: OPS.setLeading, numArgs: 1, variableArgs: false, }; t['Tf'] = { id: OPS.setFont, numArgs: 2, variableArgs: false, }; - t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false, }; + t['Tr'] = { id: OPS.setTextRenderingMode, numArgs: 1, + variableArgs: false, }; t['Ts'] = { id: OPS.setTextRise, numArgs: 1, variableArgs: false, }; t['Td'] = { id: OPS.moveText, numArgs: 2, variableArgs: false, }; t['TD'] = { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false, }; diff --git a/src/core/jbig2.js b/src/core/jbig2.js index 5a17d482..71671541 100644 --- a/src/core/jbig2.js +++ b/src/core/jbig2.js @@ -123,19 +123,22 @@ var Jbig2Image = (function Jbig2ImageClosure() { { x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -2, y: 0, }, { x: -1, y: 0, }], [{ x: -3, y: -1, }, { x: -2, y: -1, }, { x: -1, y: -1, }, { x: 0, y: -1, }, - { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, { x: -1, y: 0, }] + { x: 1, y: -1, }, { x: -4, y: 0, }, { x: -3, y: 0, }, { x: -2, y: 0, }, + { x: -1, y: 0, }] ]; var RefinementTemplates = [ { coding: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, - { x: 1, y: 0, }, { x: -1, y: 1, }, { x: 0, y: 1, }, { x: 1, y: 1, }], + reference: [{ x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }, + { x: 0, y: 0, }, { x: 1, y: 0, }, { x: -1, y: 1, }, + { x: 0, y: 1, }, { x: 1, y: 1, }], }, { - coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, { x: -1, y: 0, }], - reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, { x: 1, y: 0, }, - { x: 0, y: 1, }, { x: 1, y: 1, }], + coding: [{ x: -1, y: -1, }, { x: 0, y: -1, }, { x: 1, y: -1, }, + { x: -1, y: 0, }], + reference: [{ x: 0, y: -1, }, { x: -1, y: 0, }, { x: 0, y: 0, }, + { x: 1, y: 0, }, { x: 0, y: 1, }, { x: 1, y: 1, }], } ]; ```
2017-06-02 18:16:24 +09:00
},
};
return ObjectLoader;
})();
export {
Catalog,
ObjectLoader,
XRef,
FileSpec,
};