[api-minor] Use the Fetch API, when supported, to load PDF documents in Node.js environments

Given that modern Node.js versions now implement support for a fair number of "browser" APIs, we can utilize the standard Fetch API to load PDF documents that are specified via http/https URLs.

Please find compatibility information at:
 - https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API#browser_compatibility
 - https://nodejs.org/dist/latest-v18.x/docs/api/globals.html#fetch
 - https://developer.mozilla.org/en-US/docs/Web/API/Response#browser_compatibility
 - https://nodejs.org/dist/latest-v18.x/docs/api/globals.html#response
This commit is contained in:
Jonas Jenwald 2024-02-21 16:04:13 +01:00
parent 72b8b29147
commit eded037d06
6 changed files with 123 additions and 54 deletions

View File

@ -419,7 +419,16 @@ function getDocument(src) {
PDFJSDev.test("GENERIC") &&
isNodeJS
) {
return new PDFNodeStream(params);
const isFetchSupported = function () {
return (
typeof fetch !== "undefined" &&
typeof Response !== "undefined" &&
"body" in Response.prototype
);
};
return isFetchSupported() && isValidFetchUrl(params.url)
? new PDFFetchStream(params)
: new PDFNodeStream(params);
}
return isValidFetchUrl(params.url)
? new PDFFetchStream(params)
@ -762,6 +771,9 @@ class PDFDocumentProxy {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
// For testing purposes.
Object.defineProperty(this, "getNetworkStreamName", {
value: () => this._transport.getNetworkStreamName(),
});
Object.defineProperty(this, "getXFADatasets", {
value: () => this._transport.getXFADatasets(),
});
@ -2344,6 +2356,9 @@ class WorkerTransport {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
// For testing purposes.
Object.defineProperty(this, "getNetworkStreamName", {
value: () => networkStream?.constructor?.name || null,
});
Object.defineProperty(this, "getXFADatasets", {
value: () =>
this.messageHandler.sendWithPromise("GetXFADatasets", null),

View File

@ -32,6 +32,7 @@ import {
import {
buildGetDocumentParams,
CMAP_URL,
createTemporaryNodeServer,
DefaultFileReaderFactory,
TEST_PDFS_PATH,
} from "./test_utils.js";
@ -67,13 +68,27 @@ describe("api", function () {
buildGetDocumentParams(tracemonkeyFileName);
let CanvasFactory;
let tempServer = null;
beforeAll(function () {
CanvasFactory = new DefaultCanvasFactory();
if (isNodeJS) {
tempServer = createTemporaryNodeServer();
}
});
afterAll(function () {
CanvasFactory = null;
if (isNodeJS) {
// Close the server from accepting new connections after all test
// finishes.
const { server } = tempServer;
server.close();
tempServer = null;
}
});
function waitSome(callback) {
@ -119,13 +134,10 @@ describe("api", function () {
});
it("creates pdf doc from URL-object", async function () {
if (isNodeJS) {
pending("window.location is not supported in Node.js.");
}
const urlObj = new URL(
TEST_PDFS_PATH + basicApiFileName,
window.location
);
const urlObj = isNodeJS
? new URL(`http://127.0.0.1:${tempServer.port}/${basicApiFileName}`)
: new URL(TEST_PDFS_PATH + basicApiFileName, window.location);
const loadingTask = getDocument(urlObj);
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument = await loadingTask.promise;
@ -134,6 +146,9 @@ describe("api", function () {
expect(pdfDocument instanceof PDFDocumentProxy).toEqual(true);
expect(pdfDocument.numPages).toEqual(3);
// Ensure that the Fetch API was used to load the PDF document.
expect(pdfDocument.getNetworkStreamName()).toEqual("PDFFetchStream");
await loadingTask.destroy();
});

View File

@ -21,6 +21,7 @@
"encodings_spec.js",
"evaluator_spec.js",
"event_utils_spec.js",
"fetch_stream_spec.js",
"font_substitutions_spec.js",
"function_spec.js",
"message_handler_spec.js",

View File

@ -13,16 +13,40 @@
* limitations under the License.
*/
import { AbortException } from "../../src/shared/util.js";
import { AbortException, isNodeJS } from "../../src/shared/util.js";
import { createTemporaryNodeServer } from "./test_utils.js";
import { PDFFetchStream } from "../../src/display/fetch_stream.js";
describe("fetch_stream", function () {
const pdfUrl = new URL("../pdfs/tracemonkey.pdf", window.location).href;
let tempServer = null;
function getPdfUrl() {
return isNodeJS
? `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`
: new URL("../pdfs/tracemonkey.pdf", window.location).href;
}
const pdfLength = 1016315;
beforeAll(function () {
if (isNodeJS) {
tempServer = createTemporaryNodeServer();
}
});
afterAll(function () {
if (isNodeJS) {
// Close the server from accepting new connections after all test
// finishes.
const { server } = tempServer;
server.close();
tempServer = null;
}
});
it("read with streaming", async function () {
const stream = new PDFFetchStream({
url: pdfUrl,
url: getPdfUrl(),
disableStream: false,
disableRange: true,
});
@ -57,7 +81,7 @@ describe("fetch_stream", function () {
it("read ranges with streaming", async function () {
const rangeSize = 32768;
const stream = new PDFFetchStream({
url: pdfUrl,
url: getPdfUrl(),
rangeChunkSize: rangeSize,
disableStream: false,
disableRange: false,

View File

@ -14,6 +14,7 @@
*/
import { AbortException, isNodeJS } from "../../src/shared/util.js";
import { createTemporaryNodeServer } from "./test_utils.js";
import { PDFNodeStream } from "../../src/display/node_stream.js";
// Ensure that these tests only run in Node.js environments.
@ -25,12 +26,10 @@ if (!isNodeJS) {
const path = await __non_webpack_import__("path");
const url = await __non_webpack_import__("url");
const http = await __non_webpack_import__("http");
const fs = await __non_webpack_import__("fs");
describe("node_stream", function () {
let server = null;
let port = null;
let tempServer = null;
const pdf = url.parse(
encodeURI(
"file://" + path.join(process.cwd(), "./test/pdfs/tracemonkey.pdf")
@ -39,50 +38,20 @@ describe("node_stream", function () {
const pdfLength = 1016315;
beforeAll(function () {
// Create http server to serve pdf data for tests.
server = http
.createServer((request, response) => {
const filePath = process.cwd() + "/test/pdfs" + request.url;
fs.lstat(filePath, (error, stat) => {
if (error) {
response.writeHead(404);
response.end(`File ${request.url} not found!`);
return;
}
if (!request.headers.range) {
const contentLength = stat.size;
const stream = fs.createReadStream(filePath);
response.writeHead(200, {
"Content-Type": "application/pdf",
"Content-Length": contentLength,
"Accept-Ranges": "bytes",
});
stream.pipe(response);
} else {
const [start, end] = request.headers.range
.split("=")[1]
.split("-")
.map(x => Number(x));
const stream = fs.createReadStream(filePath, { start, end });
response.writeHead(206, {
"Content-Type": "application/pdf",
});
stream.pipe(response);
}
});
})
.listen(0); /* Listen on a random free port */
port = server.address().port;
tempServer = createTemporaryNodeServer();
});
afterAll(function () {
// Close the server from accepting new connections after all test finishes.
const { server } = tempServer;
server.close();
tempServer = null;
});
it("read both http(s) and filesystem pdf files", async function () {
const stream1 = new PDFNodeStream({
url: `http://127.0.0.1:${port}/tracemonkey.pdf`,
url: `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`,
rangeChunkSize: 65536,
disableStream: true,
disableRange: true,
@ -144,7 +113,7 @@ describe("node_stream", function () {
it("read custom ranges for both http(s) and filesystem urls", async function () {
const rangeSize = 32768;
const stream1 = new PDFNodeStream({
url: `http://127.0.0.1:${port}/tracemonkey.pdf`,
url: `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`,
length: pdfLength,
rangeChunkSize: rangeSize,
disableStream: true,

View File

@ -13,15 +13,16 @@
* limitations under the License.
*/
import { assert, isNodeJS } from "../../src/shared/util.js";
import { NullStream, StringStream } from "../../src/core/stream.js";
import { Page, PDFDocument } from "../../src/core/document.js";
import { isNodeJS } from "../../src/shared/util.js";
import { Ref } from "../../src/core/primitives.js";
let fs;
let fs, http;
if (isNodeJS) {
// Native packages.
fs = await __non_webpack_import__("fs");
http = await __non_webpack_import__("http");
}
const TEST_PDFS_PATH = isNodeJS ? "./test/pdfs/" : "../pdfs/";
@ -144,10 +145,54 @@ function createIdFactory(pageIndex) {
return page._localIdFactory;
}
function createTemporaryNodeServer() {
assert(isNodeJS, "Should only be used in Node.js environments.");
// Create http server to serve pdf data for tests.
const server = http
.createServer((request, response) => {
const filePath = process.cwd() + "/test/pdfs" + request.url;
fs.lstat(filePath, (error, stat) => {
if (error) {
response.writeHead(404);
response.end(`File ${request.url} not found!`);
return;
}
if (!request.headers.range) {
const contentLength = stat.size;
const stream = fs.createReadStream(filePath);
response.writeHead(200, {
"Content-Type": "application/pdf",
"Content-Length": contentLength,
"Accept-Ranges": "bytes",
});
stream.pipe(response);
} else {
const [start, end] = request.headers.range
.split("=")[1]
.split("-")
.map(x => Number(x));
const stream = fs.createReadStream(filePath, { start, end });
response.writeHead(206, {
"Content-Type": "application/pdf",
});
stream.pipe(response);
}
});
})
.listen(0); /* Listen on a random free port */
return {
server,
port: server.address().port,
};
}
export {
buildGetDocumentParams,
CMAP_URL,
createIdFactory,
createTemporaryNodeServer,
DefaultFileReaderFactory,
STANDARD_FONT_DATA_URL,
TEST_PDFS_PATH,