When there is no tree, the tags for the new annotions are just put under the root element. When there is a tree, we insert the new tags at the right place in using the value of structTreeParentId (added in PR #16916).
916 lines
28 KiB
JavaScript
916 lines
28 KiB
JavaScript
/* Copyright 2012 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
import {
|
|
AbortException,
|
|
assert,
|
|
getVerbosityLevel,
|
|
info,
|
|
InvalidPDFException,
|
|
isNodeJS,
|
|
MissingPDFException,
|
|
PasswordException,
|
|
PromiseCapability,
|
|
setVerbosityLevel,
|
|
stringToPDFString,
|
|
UnexpectedResponseException,
|
|
UnknownErrorException,
|
|
VerbosityLevel,
|
|
warn,
|
|
} from "../shared/util.js";
|
|
import {
|
|
arrayBuffersToBytes,
|
|
getNewAnnotationsMap,
|
|
XRefParseException,
|
|
} from "./core_utils.js";
|
|
import { Dict, Ref } from "./primitives.js";
|
|
import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
|
|
import { AnnotationFactory } from "./annotation.js";
|
|
import { clearGlobalCaches } from "./cleanup_helper.js";
|
|
import { incrementalUpdate } from "./writer.js";
|
|
import { MessageHandler } from "../shared/message_handler.js";
|
|
import { PDFWorkerStream } from "./worker_stream.js";
|
|
import { StructTreeRoot } from "./struct_tree.js";
|
|
|
|
class WorkerTask {
|
|
constructor(name) {
|
|
this.name = name;
|
|
this.terminated = false;
|
|
this._capability = new PromiseCapability();
|
|
}
|
|
|
|
get finished() {
|
|
return this._capability.promise;
|
|
}
|
|
|
|
finish() {
|
|
this._capability.resolve();
|
|
}
|
|
|
|
terminate() {
|
|
this.terminated = true;
|
|
}
|
|
|
|
ensureNotTerminated() {
|
|
if (this.terminated) {
|
|
throw new Error("Worker task was terminated");
|
|
}
|
|
}
|
|
}
|
|
|
|
class WorkerMessageHandler {
|
|
static setup(handler, port) {
|
|
let testMessageProcessed = false;
|
|
handler.on("test", function (data) {
|
|
if (testMessageProcessed) {
|
|
return; // we already processed 'test' message once
|
|
}
|
|
testMessageProcessed = true;
|
|
|
|
// Ensure that `TypedArray`s can be sent to the worker.
|
|
handler.send("test", data instanceof Uint8Array);
|
|
});
|
|
|
|
handler.on("configure", function (data) {
|
|
setVerbosityLevel(data.verbosity);
|
|
});
|
|
|
|
handler.on("GetDocRequest", function (data) {
|
|
return WorkerMessageHandler.createDocumentHandler(data, port);
|
|
});
|
|
}
|
|
|
|
static createDocumentHandler(docParams, port) {
|
|
// This context is actually holds references on pdfManager and handler,
|
|
// until the latter is destroyed.
|
|
let pdfManager;
|
|
let terminated = false;
|
|
let cancelXHRs = null;
|
|
const WorkerTasks = new Set();
|
|
const verbosity = getVerbosityLevel();
|
|
|
|
const { docId, apiVersion } = docParams;
|
|
const workerVersion =
|
|
typeof PDFJSDev !== "undefined" && !PDFJSDev.test("TESTING")
|
|
? PDFJSDev.eval("BUNDLE_VERSION")
|
|
: null;
|
|
if (apiVersion !== workerVersion) {
|
|
throw new Error(
|
|
`The API version "${apiVersion}" does not match ` +
|
|
`the Worker version "${workerVersion}".`
|
|
);
|
|
}
|
|
|
|
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) {
|
|
// Fail early, and predictably, rather than having (some) fonts fail to
|
|
// load/render with slightly cryptic error messages in environments where
|
|
// the `Array.prototype` has been *incorrectly* extended.
|
|
//
|
|
// PLEASE NOTE: We do *not* want to slow down font parsing by adding
|
|
// `hasOwnProperty` checks all over the code-base.
|
|
const enumerableProperties = [];
|
|
for (const property in []) {
|
|
enumerableProperties.push(property);
|
|
}
|
|
if (enumerableProperties.length) {
|
|
throw new Error(
|
|
"The `Array.prototype` contains unexpected enumerable properties: " +
|
|
enumerableProperties.join(", ") +
|
|
"; thus breaking e.g. `for...in` iteration of `Array`s."
|
|
);
|
|
}
|
|
}
|
|
const workerHandlerName = docId + "_worker";
|
|
let handler = new MessageHandler(workerHandlerName, docId, port);
|
|
|
|
function ensureNotTerminated() {
|
|
if (terminated) {
|
|
throw new Error("Worker was terminated");
|
|
}
|
|
}
|
|
|
|
function startWorkerTask(task) {
|
|
WorkerTasks.add(task);
|
|
}
|
|
|
|
function finishWorkerTask(task) {
|
|
task.finish();
|
|
WorkerTasks.delete(task);
|
|
}
|
|
|
|
async function loadDocument(recoveryMode) {
|
|
await pdfManager.ensureDoc("checkHeader");
|
|
await pdfManager.ensureDoc("parseStartXRef");
|
|
await pdfManager.ensureDoc("parse", [recoveryMode]);
|
|
|
|
// Check that at least the first page can be successfully loaded,
|
|
// since otherwise the XRef table is definitely not valid.
|
|
await pdfManager.ensureDoc("checkFirstPage", [recoveryMode]);
|
|
// Check that the last page can be successfully loaded, to ensure that
|
|
// `numPages` is correct, and fallback to walking the entire /Pages-tree.
|
|
await pdfManager.ensureDoc("checkLastPage", [recoveryMode]);
|
|
|
|
const isPureXfa = await pdfManager.ensureDoc("isPureXfa");
|
|
if (isPureXfa) {
|
|
const task = new WorkerTask("loadXfaFonts");
|
|
startWorkerTask(task);
|
|
await Promise.all([
|
|
pdfManager
|
|
.loadXfaFonts(handler, task)
|
|
.catch(reason => {
|
|
// Ignore errors, to allow the document to load.
|
|
})
|
|
.then(() => finishWorkerTask(task)),
|
|
pdfManager.loadXfaImages(),
|
|
]);
|
|
}
|
|
|
|
const [numPages, fingerprints] = await Promise.all([
|
|
pdfManager.ensureDoc("numPages"),
|
|
pdfManager.ensureDoc("fingerprints"),
|
|
]);
|
|
|
|
// Get htmlForXfa after numPages to avoid to create HTML twice.
|
|
const htmlForXfa = isPureXfa
|
|
? await pdfManager.ensureDoc("htmlForXfa")
|
|
: null;
|
|
|
|
return { numPages, fingerprints, htmlForXfa };
|
|
}
|
|
|
|
function getPdfManager({
|
|
data,
|
|
password,
|
|
disableAutoFetch,
|
|
rangeChunkSize,
|
|
length,
|
|
docBaseUrl,
|
|
enableXfa,
|
|
evaluatorOptions,
|
|
}) {
|
|
const pdfManagerArgs = {
|
|
source: null,
|
|
disableAutoFetch,
|
|
docBaseUrl,
|
|
docId,
|
|
enableXfa,
|
|
evaluatorOptions,
|
|
handler,
|
|
length,
|
|
password,
|
|
rangeChunkSize,
|
|
};
|
|
const pdfManagerCapability = new PromiseCapability();
|
|
let newPdfManager;
|
|
|
|
if (data) {
|
|
try {
|
|
pdfManagerArgs.source = data;
|
|
|
|
newPdfManager = new LocalPdfManager(pdfManagerArgs);
|
|
pdfManagerCapability.resolve(newPdfManager);
|
|
} catch (ex) {
|
|
pdfManagerCapability.reject(ex);
|
|
}
|
|
return pdfManagerCapability.promise;
|
|
}
|
|
|
|
let pdfStream,
|
|
cachedChunks = [];
|
|
try {
|
|
pdfStream = new PDFWorkerStream(handler);
|
|
} catch (ex) {
|
|
pdfManagerCapability.reject(ex);
|
|
return pdfManagerCapability.promise;
|
|
}
|
|
|
|
const fullRequest = pdfStream.getFullReader();
|
|
fullRequest.headersReady
|
|
.then(function () {
|
|
if (!fullRequest.isRangeSupported) {
|
|
return;
|
|
}
|
|
pdfManagerArgs.source = pdfStream;
|
|
pdfManagerArgs.length = fullRequest.contentLength;
|
|
// We don't need auto-fetch when streaming is enabled.
|
|
pdfManagerArgs.disableAutoFetch ||= fullRequest.isStreamingSupported;
|
|
|
|
newPdfManager = new NetworkPdfManager(pdfManagerArgs);
|
|
// There may be a chance that `newPdfManager` is not initialized for
|
|
// the first few runs of `readchunk` block of code. Be sure to send
|
|
// all cached chunks, if any, to chunked_stream via pdf_manager.
|
|
for (const chunk of cachedChunks) {
|
|
newPdfManager.sendProgressiveData(chunk);
|
|
}
|
|
|
|
cachedChunks = [];
|
|
pdfManagerCapability.resolve(newPdfManager);
|
|
cancelXHRs = null;
|
|
})
|
|
.catch(function (reason) {
|
|
pdfManagerCapability.reject(reason);
|
|
cancelXHRs = null;
|
|
});
|
|
|
|
let loaded = 0;
|
|
const flushChunks = function () {
|
|
const pdfFile = arrayBuffersToBytes(cachedChunks);
|
|
if (length && pdfFile.length !== length) {
|
|
warn("reported HTTP length is different from actual");
|
|
}
|
|
// the data is array, instantiating directly from it
|
|
try {
|
|
pdfManagerArgs.source = pdfFile;
|
|
|
|
newPdfManager = new LocalPdfManager(pdfManagerArgs);
|
|
pdfManagerCapability.resolve(newPdfManager);
|
|
} catch (ex) {
|
|
pdfManagerCapability.reject(ex);
|
|
}
|
|
cachedChunks = [];
|
|
};
|
|
new Promise(function (resolve, reject) {
|
|
const readChunk = function ({ value, done }) {
|
|
try {
|
|
ensureNotTerminated();
|
|
if (done) {
|
|
if (!newPdfManager) {
|
|
flushChunks();
|
|
}
|
|
cancelXHRs = null;
|
|
return;
|
|
}
|
|
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
|
|
assert(
|
|
value instanceof ArrayBuffer,
|
|
"readChunk (getPdfManager) - expected an ArrayBuffer."
|
|
);
|
|
}
|
|
loaded += value.byteLength;
|
|
|
|
if (!fullRequest.isStreamingSupported) {
|
|
handler.send("DocProgress", {
|
|
loaded,
|
|
total: Math.max(loaded, fullRequest.contentLength || 0),
|
|
});
|
|
}
|
|
|
|
if (newPdfManager) {
|
|
newPdfManager.sendProgressiveData(value);
|
|
} else {
|
|
cachedChunks.push(value);
|
|
}
|
|
fullRequest.read().then(readChunk, reject);
|
|
} catch (e) {
|
|
reject(e);
|
|
}
|
|
};
|
|
fullRequest.read().then(readChunk, reject);
|
|
}).catch(function (e) {
|
|
pdfManagerCapability.reject(e);
|
|
cancelXHRs = null;
|
|
});
|
|
|
|
cancelXHRs = function (reason) {
|
|
pdfStream.cancelAllRequests(reason);
|
|
};
|
|
|
|
return pdfManagerCapability.promise;
|
|
}
|
|
|
|
function setupDoc(data) {
|
|
function onSuccess(doc) {
|
|
ensureNotTerminated();
|
|
handler.send("GetDoc", { pdfInfo: doc });
|
|
}
|
|
|
|
function onFailure(ex) {
|
|
ensureNotTerminated();
|
|
|
|
if (ex instanceof PasswordException) {
|
|
const task = new WorkerTask(`PasswordException: response ${ex.code}`);
|
|
startWorkerTask(task);
|
|
|
|
handler
|
|
.sendWithPromise("PasswordRequest", ex)
|
|
.then(function ({ password }) {
|
|
finishWorkerTask(task);
|
|
pdfManager.updatePassword(password);
|
|
pdfManagerReady();
|
|
})
|
|
.catch(function () {
|
|
finishWorkerTask(task);
|
|
handler.send("DocException", ex);
|
|
});
|
|
} else if (
|
|
ex instanceof InvalidPDFException ||
|
|
ex instanceof MissingPDFException ||
|
|
ex instanceof UnexpectedResponseException ||
|
|
ex instanceof UnknownErrorException
|
|
) {
|
|
handler.send("DocException", ex);
|
|
} else {
|
|
handler.send(
|
|
"DocException",
|
|
new UnknownErrorException(ex.message, ex.toString())
|
|
);
|
|
}
|
|
}
|
|
|
|
function pdfManagerReady() {
|
|
ensureNotTerminated();
|
|
|
|
loadDocument(false).then(onSuccess, function (reason) {
|
|
ensureNotTerminated();
|
|
|
|
// Try again with recoveryMode == true
|
|
if (!(reason instanceof XRefParseException)) {
|
|
onFailure(reason);
|
|
return;
|
|
}
|
|
pdfManager.requestLoadedStream().then(function () {
|
|
ensureNotTerminated();
|
|
|
|
loadDocument(true).then(onSuccess, onFailure);
|
|
});
|
|
});
|
|
}
|
|
|
|
ensureNotTerminated();
|
|
|
|
getPdfManager(data)
|
|
.then(function (newPdfManager) {
|
|
if (terminated) {
|
|
// We were in a process of setting up the manager, but it got
|
|
// terminated in the middle.
|
|
newPdfManager.terminate(
|
|
new AbortException("Worker was terminated.")
|
|
);
|
|
throw new Error("Worker was terminated");
|
|
}
|
|
pdfManager = newPdfManager;
|
|
|
|
pdfManager.requestLoadedStream(/* noFetch = */ true).then(stream => {
|
|
handler.send("DataLoaded", { length: stream.bytes.byteLength });
|
|
});
|
|
})
|
|
.then(pdfManagerReady, onFailure);
|
|
}
|
|
|
|
handler.on("GetPage", function (data) {
|
|
return pdfManager.getPage(data.pageIndex).then(function (page) {
|
|
return Promise.all([
|
|
pdfManager.ensure(page, "rotate"),
|
|
pdfManager.ensure(page, "ref"),
|
|
pdfManager.ensure(page, "userUnit"),
|
|
pdfManager.ensure(page, "view"),
|
|
]).then(function ([rotate, ref, userUnit, view]) {
|
|
return {
|
|
rotate,
|
|
ref,
|
|
userUnit,
|
|
view,
|
|
};
|
|
});
|
|
});
|
|
});
|
|
|
|
handler.on("GetPageIndex", function (data) {
|
|
const pageRef = Ref.get(data.num, data.gen);
|
|
return pdfManager.ensureCatalog("getPageIndex", [pageRef]);
|
|
});
|
|
|
|
handler.on("GetDestinations", function (data) {
|
|
return pdfManager.ensureCatalog("destinations");
|
|
});
|
|
|
|
handler.on("GetDestination", function (data) {
|
|
return pdfManager.ensureCatalog("getDestination", [data.id]);
|
|
});
|
|
|
|
handler.on("GetPageLabels", function (data) {
|
|
return pdfManager.ensureCatalog("pageLabels");
|
|
});
|
|
|
|
handler.on("GetPageLayout", function (data) {
|
|
return pdfManager.ensureCatalog("pageLayout");
|
|
});
|
|
|
|
handler.on("GetPageMode", function (data) {
|
|
return pdfManager.ensureCatalog("pageMode");
|
|
});
|
|
|
|
handler.on("GetViewerPreferences", function (data) {
|
|
return pdfManager.ensureCatalog("viewerPreferences");
|
|
});
|
|
|
|
handler.on("GetOpenAction", function (data) {
|
|
return pdfManager.ensureCatalog("openAction");
|
|
});
|
|
|
|
handler.on("GetAttachments", function (data) {
|
|
return pdfManager.ensureCatalog("attachments");
|
|
});
|
|
|
|
handler.on("GetDocJSActions", function (data) {
|
|
return pdfManager.ensureCatalog("jsActions");
|
|
});
|
|
|
|
handler.on("GetPageJSActions", function ({ pageIndex }) {
|
|
return pdfManager.getPage(pageIndex).then(function (page) {
|
|
return pdfManager.ensure(page, "jsActions");
|
|
});
|
|
});
|
|
|
|
handler.on("GetOutline", function (data) {
|
|
return pdfManager.ensureCatalog("documentOutline");
|
|
});
|
|
|
|
handler.on("GetOptionalContentConfig", function (data) {
|
|
return pdfManager.ensureCatalog("optionalContentConfig");
|
|
});
|
|
|
|
handler.on("GetPermissions", function (data) {
|
|
return pdfManager.ensureCatalog("permissions");
|
|
});
|
|
|
|
handler.on("GetMetadata", function (data) {
|
|
return Promise.all([
|
|
pdfManager.ensureDoc("documentInfo"),
|
|
pdfManager.ensureCatalog("metadata"),
|
|
]);
|
|
});
|
|
|
|
handler.on("GetMarkInfo", function (data) {
|
|
return pdfManager.ensureCatalog("markInfo");
|
|
});
|
|
|
|
handler.on("GetData", function (data) {
|
|
return pdfManager.requestLoadedStream().then(function (stream) {
|
|
return stream.bytes;
|
|
});
|
|
});
|
|
|
|
handler.on("GetAnnotations", function ({ pageIndex, intent }) {
|
|
return pdfManager.getPage(pageIndex).then(function (page) {
|
|
const task = new WorkerTask(`GetAnnotations: page ${pageIndex}`);
|
|
startWorkerTask(task);
|
|
|
|
return page.getAnnotationsData(handler, task, intent).then(
|
|
data => {
|
|
finishWorkerTask(task);
|
|
return data;
|
|
},
|
|
reason => {
|
|
finishWorkerTask(task);
|
|
throw reason;
|
|
}
|
|
);
|
|
});
|
|
});
|
|
|
|
handler.on("GetFieldObjects", function (data) {
|
|
return pdfManager.ensureDoc("fieldObjects");
|
|
});
|
|
|
|
handler.on("HasJSActions", function (data) {
|
|
return pdfManager.ensureDoc("hasJSActions");
|
|
});
|
|
|
|
handler.on("GetCalculationOrderIds", function (data) {
|
|
return pdfManager.ensureDoc("calculationOrderIds");
|
|
});
|
|
|
|
handler.on(
|
|
"SaveDocument",
|
|
async function ({ isPureXfa, numPages, annotationStorage, filename }) {
|
|
const globalPromises = [
|
|
pdfManager.requestLoadedStream(),
|
|
pdfManager.ensureCatalog("acroForm"),
|
|
pdfManager.ensureCatalog("acroFormRef"),
|
|
pdfManager.ensureDoc("startXRef"),
|
|
pdfManager.ensureDoc("xref"),
|
|
pdfManager.ensureDoc("linearization"),
|
|
pdfManager.ensureCatalog("structTreeRoot"),
|
|
];
|
|
const promises = [];
|
|
|
|
const newAnnotationsByPage = !isPureXfa
|
|
? getNewAnnotationsMap(annotationStorage)
|
|
: null;
|
|
const [
|
|
stream,
|
|
acroForm,
|
|
acroFormRef,
|
|
startXRef,
|
|
xref,
|
|
linearization,
|
|
_structTreeRoot,
|
|
] = await Promise.all(globalPromises);
|
|
const catalogRef = xref.trailer.getRaw("Root") || null;
|
|
let structTreeRoot;
|
|
|
|
if (newAnnotationsByPage) {
|
|
if (!_structTreeRoot) {
|
|
if (
|
|
await StructTreeRoot.canCreateStructureTree({
|
|
catalogRef,
|
|
pdfManager,
|
|
newAnnotationsByPage,
|
|
})
|
|
) {
|
|
structTreeRoot = null;
|
|
}
|
|
} else if (
|
|
await _structTreeRoot.canUpdateStructTree({
|
|
pdfManager,
|
|
newAnnotationsByPage,
|
|
})
|
|
) {
|
|
structTreeRoot = _structTreeRoot;
|
|
}
|
|
|
|
const imagePromises = AnnotationFactory.generateImages(
|
|
annotationStorage.values(),
|
|
xref,
|
|
pdfManager.evaluatorOptions.isOffscreenCanvasSupported
|
|
);
|
|
const newAnnotationPromises =
|
|
structTreeRoot === undefined ? promises : [];
|
|
for (const [pageIndex, annotations] of newAnnotationsByPage) {
|
|
newAnnotationPromises.push(
|
|
pdfManager.getPage(pageIndex).then(page => {
|
|
const task = new WorkerTask(`Save (editor): page ${pageIndex}`);
|
|
return page
|
|
.saveNewAnnotations(handler, task, annotations, imagePromises)
|
|
.finally(function () {
|
|
finishWorkerTask(task);
|
|
});
|
|
})
|
|
);
|
|
}
|
|
if (structTreeRoot === null) {
|
|
// No structTreeRoot exists, so we need to create one.
|
|
promises.push(
|
|
Promise.all(newAnnotationPromises).then(async newRefs => {
|
|
await StructTreeRoot.createStructureTree({
|
|
newAnnotationsByPage,
|
|
xref,
|
|
catalogRef,
|
|
pdfManager,
|
|
newRefs,
|
|
});
|
|
return newRefs;
|
|
})
|
|
);
|
|
} else if (structTreeRoot) {
|
|
promises.push(
|
|
Promise.all(newAnnotationPromises).then(async newRefs => {
|
|
await structTreeRoot.updateStructureTree({
|
|
newAnnotationsByPage,
|
|
pdfManager,
|
|
newRefs,
|
|
});
|
|
return newRefs;
|
|
})
|
|
);
|
|
}
|
|
}
|
|
|
|
if (isPureXfa) {
|
|
promises.push(pdfManager.serializeXfaData(annotationStorage));
|
|
} else {
|
|
for (let pageIndex = 0; pageIndex < numPages; pageIndex++) {
|
|
promises.push(
|
|
pdfManager.getPage(pageIndex).then(function (page) {
|
|
const task = new WorkerTask(`Save: page ${pageIndex}`);
|
|
return page
|
|
.save(handler, task, annotationStorage)
|
|
.finally(function () {
|
|
finishWorkerTask(task);
|
|
});
|
|
})
|
|
);
|
|
}
|
|
}
|
|
const refs = await Promise.all(promises);
|
|
|
|
let newRefs = [];
|
|
let xfaData = null;
|
|
if (isPureXfa) {
|
|
xfaData = refs[0];
|
|
if (!xfaData) {
|
|
return stream.bytes;
|
|
}
|
|
} else {
|
|
newRefs = refs.flat(2);
|
|
|
|
if (newRefs.length === 0) {
|
|
// No new refs so just return the initial bytes
|
|
return stream.bytes;
|
|
}
|
|
}
|
|
|
|
const needAppearances =
|
|
acroFormRef &&
|
|
acroForm instanceof Dict &&
|
|
newRefs.some(ref => ref.needAppearances);
|
|
|
|
const xfa = (acroForm instanceof Dict && acroForm.get("XFA")) || null;
|
|
let xfaDatasetsRef = null;
|
|
let hasXfaDatasetsEntry = false;
|
|
if (Array.isArray(xfa)) {
|
|
for (let i = 0, ii = xfa.length; i < ii; i += 2) {
|
|
if (xfa[i] === "datasets") {
|
|
xfaDatasetsRef = xfa[i + 1];
|
|
hasXfaDatasetsEntry = true;
|
|
}
|
|
}
|
|
if (xfaDatasetsRef === null) {
|
|
xfaDatasetsRef = xref.getNewTemporaryRef();
|
|
}
|
|
} else if (xfa) {
|
|
// TODO: Support XFA streams.
|
|
warn("Unsupported XFA type.");
|
|
}
|
|
|
|
let newXrefInfo = Object.create(null);
|
|
if (xref.trailer) {
|
|
// Get string info from Info in order to compute fileId.
|
|
const infoObj = Object.create(null);
|
|
const xrefInfo = xref.trailer.get("Info") || null;
|
|
if (xrefInfo instanceof Dict) {
|
|
xrefInfo.forEach((key, value) => {
|
|
if (typeof value === "string") {
|
|
infoObj[key] = stringToPDFString(value);
|
|
}
|
|
});
|
|
}
|
|
|
|
newXrefInfo = {
|
|
rootRef: catalogRef,
|
|
encryptRef: xref.trailer.getRaw("Encrypt") || null,
|
|
newRef: xref.getNewTemporaryRef(),
|
|
infoRef: xref.trailer.getRaw("Info") || null,
|
|
info: infoObj,
|
|
fileIds: xref.trailer.get("ID") || null,
|
|
startXRef: linearization
|
|
? startXRef
|
|
: xref.lastXRefStreamPos ?? startXRef,
|
|
filename,
|
|
};
|
|
}
|
|
|
|
return incrementalUpdate({
|
|
originalData: stream.bytes,
|
|
xrefInfo: newXrefInfo,
|
|
newRefs,
|
|
xref,
|
|
hasXfa: !!xfa,
|
|
xfaDatasetsRef,
|
|
hasXfaDatasetsEntry,
|
|
needAppearances,
|
|
acroFormRef,
|
|
acroForm,
|
|
xfaData,
|
|
}).finally(() => {
|
|
xref.resetNewTemporaryRef();
|
|
});
|
|
}
|
|
);
|
|
|
|
handler.on("GetOperatorList", function (data, sink) {
|
|
const pageIndex = data.pageIndex;
|
|
pdfManager.getPage(pageIndex).then(function (page) {
|
|
const task = new WorkerTask(`GetOperatorList: page ${pageIndex}`);
|
|
startWorkerTask(task);
|
|
|
|
// NOTE: Keep this condition in sync with the `info` helper function.
|
|
const start = verbosity >= VerbosityLevel.INFOS ? Date.now() : 0;
|
|
|
|
// Pre compile the pdf page and fetch the fonts/images.
|
|
page
|
|
.getOperatorList({
|
|
handler,
|
|
sink,
|
|
task,
|
|
intent: data.intent,
|
|
cacheKey: data.cacheKey,
|
|
annotationStorage: data.annotationStorage,
|
|
})
|
|
.then(
|
|
function (operatorListInfo) {
|
|
finishWorkerTask(task);
|
|
|
|
if (start) {
|
|
info(
|
|
`page=${pageIndex + 1} - getOperatorList: time=` +
|
|
`${Date.now() - start}ms, len=${operatorListInfo.length}`
|
|
);
|
|
}
|
|
sink.close();
|
|
},
|
|
function (reason) {
|
|
finishWorkerTask(task);
|
|
if (task.terminated) {
|
|
return; // ignoring errors from the terminated thread
|
|
}
|
|
sink.error(reason);
|
|
|
|
// TODO: Should `reason` be re-thrown here (currently that casues
|
|
// "Uncaught exception: ..." messages in the console)?
|
|
}
|
|
);
|
|
});
|
|
});
|
|
|
|
handler.on("GetTextContent", function (data, sink) {
|
|
const { pageIndex, includeMarkedContent, disableNormalization } = data;
|
|
|
|
pdfManager.getPage(pageIndex).then(function (page) {
|
|
const task = new WorkerTask("GetTextContent: page " + pageIndex);
|
|
startWorkerTask(task);
|
|
|
|
// NOTE: Keep this condition in sync with the `info` helper function.
|
|
const start = verbosity >= VerbosityLevel.INFOS ? Date.now() : 0;
|
|
|
|
page
|
|
.extractTextContent({
|
|
handler,
|
|
task,
|
|
sink,
|
|
includeMarkedContent,
|
|
disableNormalization,
|
|
})
|
|
.then(
|
|
function () {
|
|
finishWorkerTask(task);
|
|
|
|
if (start) {
|
|
info(
|
|
`page=${pageIndex + 1} - getTextContent: time=` +
|
|
`${Date.now() - start}ms`
|
|
);
|
|
}
|
|
sink.close();
|
|
},
|
|
function (reason) {
|
|
finishWorkerTask(task);
|
|
if (task.terminated) {
|
|
return; // ignoring errors from the terminated thread
|
|
}
|
|
sink.error(reason);
|
|
|
|
// TODO: Should `reason` be re-thrown here (currently that casues
|
|
// "Uncaught exception: ..." messages in the console)?
|
|
}
|
|
);
|
|
});
|
|
});
|
|
|
|
handler.on("GetStructTree", function (data) {
|
|
return pdfManager.getPage(data.pageIndex).then(function (page) {
|
|
return pdfManager.ensure(page, "getStructTree");
|
|
});
|
|
});
|
|
|
|
handler.on("FontFallback", function (data) {
|
|
return pdfManager.fontFallback(data.id, handler);
|
|
});
|
|
|
|
handler.on("Cleanup", function (data) {
|
|
return pdfManager.cleanup(/* manuallyTriggered = */ true);
|
|
});
|
|
|
|
handler.on("Terminate", function (data) {
|
|
terminated = true;
|
|
|
|
const waitOn = [];
|
|
if (pdfManager) {
|
|
pdfManager.terminate(new AbortException("Worker was terminated."));
|
|
|
|
const cleanupPromise = pdfManager.cleanup();
|
|
waitOn.push(cleanupPromise);
|
|
|
|
pdfManager = null;
|
|
} else {
|
|
clearGlobalCaches();
|
|
}
|
|
if (cancelXHRs) {
|
|
cancelXHRs(new AbortException("Worker was terminated."));
|
|
}
|
|
|
|
for (const task of WorkerTasks) {
|
|
waitOn.push(task.finished);
|
|
task.terminate();
|
|
}
|
|
|
|
return Promise.all(waitOn).then(function () {
|
|
// Notice that even if we destroying handler, resolved response promise
|
|
// must be sent back.
|
|
handler.destroy();
|
|
handler = null;
|
|
});
|
|
});
|
|
|
|
handler.on("Ready", function (data) {
|
|
setupDoc(docParams);
|
|
docParams = null; // we don't need docParams anymore -- saving memory.
|
|
});
|
|
|
|
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
|
|
handler.on("GetXFADatasets", function (data) {
|
|
return pdfManager.ensureDoc("xfaDatasets");
|
|
});
|
|
handler.on("GetXRefPrevValue", function (data) {
|
|
return pdfManager
|
|
.ensureXRef("trailer")
|
|
.then(trailer => trailer.get("Prev"));
|
|
});
|
|
handler.on("GetAnnotArray", function (data) {
|
|
return pdfManager.getPage(data.pageIndex).then(function (page) {
|
|
return page.annotations.map(a => a.toString());
|
|
});
|
|
});
|
|
}
|
|
|
|
return workerHandlerName;
|
|
}
|
|
|
|
static initializeFromPort(port) {
|
|
const handler = new MessageHandler("worker", "main", port);
|
|
WorkerMessageHandler.setup(handler, port);
|
|
handler.send("ready", null);
|
|
}
|
|
}
|
|
|
|
function isMessagePort(maybePort) {
|
|
return (
|
|
typeof maybePort.postMessage === "function" && "onmessage" in maybePort
|
|
);
|
|
}
|
|
|
|
// Worker thread (and not Node.js)?
|
|
if (
|
|
typeof window === "undefined" &&
|
|
!isNodeJS &&
|
|
typeof self !== "undefined" &&
|
|
isMessagePort(self)
|
|
) {
|
|
WorkerMessageHandler.initializeFromPort(self);
|
|
}
|
|
|
|
export { WorkerMessageHandler, WorkerTask };
|