[api-minor] Use the Fetch API, when supported, to load PDF documents in Node.js environments

Given that modern Node.js versions now implement support for a fair number of "browser" APIs, we can utilize the standard Fetch API to load PDF documents that are specified via http/https URLs.

Please find compatibility information at:
 - https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API#browser_compatibility
 - https://nodejs.org/dist/latest-v18.x/docs/api/globals.html#fetch
 - https://developer.mozilla.org/en-US/docs/Web/API/Response#browser_compatibility
 - https://nodejs.org/dist/latest-v18.x/docs/api/globals.html#response
This commit is contained in:
Jonas Jenwald 2024-02-21 16:04:13 +01:00
parent 72b8b29147
commit eded037d06
6 changed files with 123 additions and 54 deletions

View File

@ -419,7 +419,16 @@ function getDocument(src) {
PDFJSDev.test("GENERIC") && PDFJSDev.test("GENERIC") &&
isNodeJS isNodeJS
) { ) {
return new PDFNodeStream(params); const isFetchSupported = function () {
return (
typeof fetch !== "undefined" &&
typeof Response !== "undefined" &&
"body" in Response.prototype
);
};
return isFetchSupported() && isValidFetchUrl(params.url)
? new PDFFetchStream(params)
: new PDFNodeStream(params);
} }
return isValidFetchUrl(params.url) return isValidFetchUrl(params.url)
? new PDFFetchStream(params) ? new PDFFetchStream(params)
@ -762,6 +771,9 @@ class PDFDocumentProxy {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
// For testing purposes. // For testing purposes.
Object.defineProperty(this, "getNetworkStreamName", {
value: () => this._transport.getNetworkStreamName(),
});
Object.defineProperty(this, "getXFADatasets", { Object.defineProperty(this, "getXFADatasets", {
value: () => this._transport.getXFADatasets(), value: () => this._transport.getXFADatasets(),
}); });
@ -2344,6 +2356,9 @@ class WorkerTransport {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
// For testing purposes. // For testing purposes.
Object.defineProperty(this, "getNetworkStreamName", {
value: () => networkStream?.constructor?.name || null,
});
Object.defineProperty(this, "getXFADatasets", { Object.defineProperty(this, "getXFADatasets", {
value: () => value: () =>
this.messageHandler.sendWithPromise("GetXFADatasets", null), this.messageHandler.sendWithPromise("GetXFADatasets", null),

View File

@ -32,6 +32,7 @@ import {
import { import {
buildGetDocumentParams, buildGetDocumentParams,
CMAP_URL, CMAP_URL,
createTemporaryNodeServer,
DefaultFileReaderFactory, DefaultFileReaderFactory,
TEST_PDFS_PATH, TEST_PDFS_PATH,
} from "./test_utils.js"; } from "./test_utils.js";
@ -67,13 +68,27 @@ describe("api", function () {
buildGetDocumentParams(tracemonkeyFileName); buildGetDocumentParams(tracemonkeyFileName);
let CanvasFactory; let CanvasFactory;
let tempServer = null;
beforeAll(function () { beforeAll(function () {
CanvasFactory = new DefaultCanvasFactory(); CanvasFactory = new DefaultCanvasFactory();
if (isNodeJS) {
tempServer = createTemporaryNodeServer();
}
}); });
afterAll(function () { afterAll(function () {
CanvasFactory = null; CanvasFactory = null;
if (isNodeJS) {
// Close the server from accepting new connections after all test
// finishes.
const { server } = tempServer;
server.close();
tempServer = null;
}
}); });
function waitSome(callback) { function waitSome(callback) {
@ -119,13 +134,10 @@ describe("api", function () {
}); });
it("creates pdf doc from URL-object", async function () { it("creates pdf doc from URL-object", async function () {
if (isNodeJS) { const urlObj = isNodeJS
pending("window.location is not supported in Node.js."); ? new URL(`http://127.0.0.1:${tempServer.port}/${basicApiFileName}`)
} : new URL(TEST_PDFS_PATH + basicApiFileName, window.location);
const urlObj = new URL(
TEST_PDFS_PATH + basicApiFileName,
window.location
);
const loadingTask = getDocument(urlObj); const loadingTask = getDocument(urlObj);
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true); expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument = await loadingTask.promise; const pdfDocument = await loadingTask.promise;
@ -134,6 +146,9 @@ describe("api", function () {
expect(pdfDocument instanceof PDFDocumentProxy).toEqual(true); expect(pdfDocument instanceof PDFDocumentProxy).toEqual(true);
expect(pdfDocument.numPages).toEqual(3); expect(pdfDocument.numPages).toEqual(3);
// Ensure that the Fetch API was used to load the PDF document.
expect(pdfDocument.getNetworkStreamName()).toEqual("PDFFetchStream");
await loadingTask.destroy(); await loadingTask.destroy();
}); });

View File

@ -21,6 +21,7 @@
"encodings_spec.js", "encodings_spec.js",
"evaluator_spec.js", "evaluator_spec.js",
"event_utils_spec.js", "event_utils_spec.js",
"fetch_stream_spec.js",
"font_substitutions_spec.js", "font_substitutions_spec.js",
"function_spec.js", "function_spec.js",
"message_handler_spec.js", "message_handler_spec.js",

View File

@ -13,16 +13,40 @@
* limitations under the License. * limitations under the License.
*/ */
import { AbortException } from "../../src/shared/util.js"; import { AbortException, isNodeJS } from "../../src/shared/util.js";
import { createTemporaryNodeServer } from "./test_utils.js";
import { PDFFetchStream } from "../../src/display/fetch_stream.js"; import { PDFFetchStream } from "../../src/display/fetch_stream.js";
describe("fetch_stream", function () { describe("fetch_stream", function () {
const pdfUrl = new URL("../pdfs/tracemonkey.pdf", window.location).href; let tempServer = null;
function getPdfUrl() {
return isNodeJS
? `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`
: new URL("../pdfs/tracemonkey.pdf", window.location).href;
}
const pdfLength = 1016315; const pdfLength = 1016315;
beforeAll(function () {
if (isNodeJS) {
tempServer = createTemporaryNodeServer();
}
});
afterAll(function () {
if (isNodeJS) {
// Close the server from accepting new connections after all test
// finishes.
const { server } = tempServer;
server.close();
tempServer = null;
}
});
it("read with streaming", async function () { it("read with streaming", async function () {
const stream = new PDFFetchStream({ const stream = new PDFFetchStream({
url: pdfUrl, url: getPdfUrl(),
disableStream: false, disableStream: false,
disableRange: true, disableRange: true,
}); });
@ -57,7 +81,7 @@ describe("fetch_stream", function () {
it("read ranges with streaming", async function () { it("read ranges with streaming", async function () {
const rangeSize = 32768; const rangeSize = 32768;
const stream = new PDFFetchStream({ const stream = new PDFFetchStream({
url: pdfUrl, url: getPdfUrl(),
rangeChunkSize: rangeSize, rangeChunkSize: rangeSize,
disableStream: false, disableStream: false,
disableRange: false, disableRange: false,

View File

@ -14,6 +14,7 @@
*/ */
import { AbortException, isNodeJS } from "../../src/shared/util.js"; import { AbortException, isNodeJS } from "../../src/shared/util.js";
import { createTemporaryNodeServer } from "./test_utils.js";
import { PDFNodeStream } from "../../src/display/node_stream.js"; import { PDFNodeStream } from "../../src/display/node_stream.js";
// Ensure that these tests only run in Node.js environments. // Ensure that these tests only run in Node.js environments.
@ -25,12 +26,10 @@ if (!isNodeJS) {
const path = await __non_webpack_import__("path"); const path = await __non_webpack_import__("path");
const url = await __non_webpack_import__("url"); const url = await __non_webpack_import__("url");
const http = await __non_webpack_import__("http");
const fs = await __non_webpack_import__("fs");
describe("node_stream", function () { describe("node_stream", function () {
let server = null; let tempServer = null;
let port = null;
const pdf = url.parse( const pdf = url.parse(
encodeURI( encodeURI(
"file://" + path.join(process.cwd(), "./test/pdfs/tracemonkey.pdf") "file://" + path.join(process.cwd(), "./test/pdfs/tracemonkey.pdf")
@ -39,50 +38,20 @@ describe("node_stream", function () {
const pdfLength = 1016315; const pdfLength = 1016315;
beforeAll(function () { beforeAll(function () {
// Create http server to serve pdf data for tests. tempServer = createTemporaryNodeServer();
server = http
.createServer((request, response) => {
const filePath = process.cwd() + "/test/pdfs" + request.url;
fs.lstat(filePath, (error, stat) => {
if (error) {
response.writeHead(404);
response.end(`File ${request.url} not found!`);
return;
}
if (!request.headers.range) {
const contentLength = stat.size;
const stream = fs.createReadStream(filePath);
response.writeHead(200, {
"Content-Type": "application/pdf",
"Content-Length": contentLength,
"Accept-Ranges": "bytes",
});
stream.pipe(response);
} else {
const [start, end] = request.headers.range
.split("=")[1]
.split("-")
.map(x => Number(x));
const stream = fs.createReadStream(filePath, { start, end });
response.writeHead(206, {
"Content-Type": "application/pdf",
});
stream.pipe(response);
}
});
})
.listen(0); /* Listen on a random free port */
port = server.address().port;
}); });
afterAll(function () { afterAll(function () {
// Close the server from accepting new connections after all test finishes. // Close the server from accepting new connections after all test finishes.
const { server } = tempServer;
server.close(); server.close();
tempServer = null;
}); });
it("read both http(s) and filesystem pdf files", async function () { it("read both http(s) and filesystem pdf files", async function () {
const stream1 = new PDFNodeStream({ const stream1 = new PDFNodeStream({
url: `http://127.0.0.1:${port}/tracemonkey.pdf`, url: `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`,
rangeChunkSize: 65536, rangeChunkSize: 65536,
disableStream: true, disableStream: true,
disableRange: true, disableRange: true,
@ -144,7 +113,7 @@ describe("node_stream", function () {
it("read custom ranges for both http(s) and filesystem urls", async function () { it("read custom ranges for both http(s) and filesystem urls", async function () {
const rangeSize = 32768; const rangeSize = 32768;
const stream1 = new PDFNodeStream({ const stream1 = new PDFNodeStream({
url: `http://127.0.0.1:${port}/tracemonkey.pdf`, url: `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`,
length: pdfLength, length: pdfLength,
rangeChunkSize: rangeSize, rangeChunkSize: rangeSize,
disableStream: true, disableStream: true,

View File

@ -13,15 +13,16 @@
* limitations under the License. * limitations under the License.
*/ */
import { assert, isNodeJS } from "../../src/shared/util.js";
import { NullStream, StringStream } from "../../src/core/stream.js"; import { NullStream, StringStream } from "../../src/core/stream.js";
import { Page, PDFDocument } from "../../src/core/document.js"; import { Page, PDFDocument } from "../../src/core/document.js";
import { isNodeJS } from "../../src/shared/util.js";
import { Ref } from "../../src/core/primitives.js"; import { Ref } from "../../src/core/primitives.js";
let fs; let fs, http;
if (isNodeJS) { if (isNodeJS) {
// Native packages. // Native packages.
fs = await __non_webpack_import__("fs"); fs = await __non_webpack_import__("fs");
http = await __non_webpack_import__("http");
} }
const TEST_PDFS_PATH = isNodeJS ? "./test/pdfs/" : "../pdfs/"; const TEST_PDFS_PATH = isNodeJS ? "./test/pdfs/" : "../pdfs/";
@ -144,10 +145,54 @@ function createIdFactory(pageIndex) {
return page._localIdFactory; return page._localIdFactory;
} }
function createTemporaryNodeServer() {
assert(isNodeJS, "Should only be used in Node.js environments.");
// Create http server to serve pdf data for tests.
const server = http
.createServer((request, response) => {
const filePath = process.cwd() + "/test/pdfs" + request.url;
fs.lstat(filePath, (error, stat) => {
if (error) {
response.writeHead(404);
response.end(`File ${request.url} not found!`);
return;
}
if (!request.headers.range) {
const contentLength = stat.size;
const stream = fs.createReadStream(filePath);
response.writeHead(200, {
"Content-Type": "application/pdf",
"Content-Length": contentLength,
"Accept-Ranges": "bytes",
});
stream.pipe(response);
} else {
const [start, end] = request.headers.range
.split("=")[1]
.split("-")
.map(x => Number(x));
const stream = fs.createReadStream(filePath, { start, end });
response.writeHead(206, {
"Content-Type": "application/pdf",
});
stream.pipe(response);
}
});
})
.listen(0); /* Listen on a random free port */
return {
server,
port: server.address().port,
};
}
export { export {
buildGetDocumentParams, buildGetDocumentParams,
CMAP_URL, CMAP_URL,
createIdFactory, createIdFactory,
createTemporaryNodeServer,
DefaultFileReaderFactory, DefaultFileReaderFactory,
STANDARD_FONT_DATA_URL, STANDARD_FONT_DATA_URL,
TEST_PDFS_PATH, TEST_PDFS_PATH,