Move the getPdfFilenameFromUrl helper function from web/ui_utils.js and into src/display/display_utils.js

It seems reasonable to place this alongside the *similar* `getFilenameFromUrl` helper function. This way, with the changes in the next patch, we also avoid having to expose the `isDataScheme` function in the API itself and we instead expose `getPdfFilenameFromUrl` in the API (which feels overall more appropriate).
This commit is contained in:
Jonas Jenwald 2021-03-16 11:56:31 +01:00
parent a164941351
commit bd9dee1544
8 changed files with 225 additions and 226 deletions

View File

@ -451,13 +451,23 @@ function addLinkAttributes(link, { url, target, rel, enabled = true } = {}) {
link.rel = typeof rel === "string" ? rel : DEFAULT_LINK_REL;
}
function isDataScheme(url) {
const ii = url.length;
let i = 0;
while (i < ii && url[i].trim() === "") {
i++;
}
return url.substring(i, i + 5).toLowerCase() === "data:";
}
function isPdfFile(filename) {
return typeof filename === "string" && /\.pdf$/i.test(filename);
}
/**
* Gets the file name from a given URL.
* Gets the filename from a given URL.
* @param {string} url
* @returns {string}
*/
function getFilenameFromUrl(url) {
const anchor = url.indexOf("#");
@ -469,6 +479,48 @@ function getFilenameFromUrl(url) {
return url.substring(url.lastIndexOf("/", end) + 1, end);
}
/**
* Returns the filename or guessed filename from the url (see issue 3455).
* @param {string} url - The original PDF location.
* @param {string} defaultFilename - The value returned if the filename is
* unknown, or the protocol is unsupported.
* @returns {string} Guessed PDF filename.
*/
function getPdfFilenameFromUrl(url, defaultFilename = "document.pdf") {
if (typeof url !== "string") {
return defaultFilename;
}
if (isDataScheme(url)) {
warn('getPdfFilenameFromUrl: ignore "data:"-URL for performance reasons.');
return defaultFilename;
}
const reURI = /^(?:(?:[^:]+:)?\/\/[^/]+)?([^?#]*)(\?[^#]*)?(#.*)?$/;
// SCHEME HOST 1.PATH 2.QUERY 3.REF
// Pattern to get last matching NAME.pdf
const reFilename = /[^/?#=]+\.pdf\b(?!.*\.pdf\b)/i;
const splitURI = reURI.exec(url);
let suggestedFilename =
reFilename.exec(splitURI[1]) ||
reFilename.exec(splitURI[2]) ||
reFilename.exec(splitURI[3]);
if (suggestedFilename) {
suggestedFilename = suggestedFilename[0];
if (suggestedFilename.includes("%")) {
// URL-encoded %2Fpath%2Fto%2Ffile.pdf should be file.pdf
try {
suggestedFilename = reFilename.exec(
decodeURIComponent(suggestedFilename)
)[0];
} catch (ex) {
// Possible (extremely rare) errors:
// URIError "Malformed URI", e.g. for "%AA.pdf"
// TypeError "null has no properties", e.g. for "%2F.pdf"
}
}
}
return suggestedFilename || defaultFilename;
}
class StatTimer {
constructor() {
this.started = Object.create(null);
@ -655,6 +707,7 @@ export {
DOMCMapReaderFactory,
DOMSVGFactory,
getFilenameFromUrl,
getPdfFilenameFromUrl,
isFetchSupported,
isPdfFile,
isValidFetchUrl,

View File

@ -17,6 +17,7 @@
import {
addLinkAttributes,
getFilenameFromUrl,
getPdfFilenameFromUrl,
isFetchSupported,
isPdfFile,
isValidFetchUrl,
@ -129,6 +130,7 @@ export {
// From "./display/display_utils.js":
addLinkAttributes,
getFilenameFromUrl,
getPdfFilenameFromUrl,
isPdfFile,
LinkTarget,
loadScript,

View File

@ -17,9 +17,11 @@ import {
DOMCanvasFactory,
DOMSVGFactory,
getFilenameFromUrl,
getPdfFilenameFromUrl,
isValidFetchUrl,
PDFDateString,
} from "../../src/display/display_utils.js";
import { createObjectURL } from "../../src/shared/util.js";
import { isNodeJS } from "../../src/shared/is_node.js";
describe("display_utils", function () {
@ -192,6 +194,162 @@ describe("display_utils", function () {
});
});
describe("getPdfFilenameFromUrl", function () {
it("gets PDF filename", function () {
// Relative URL
expect(getPdfFilenameFromUrl("/pdfs/file1.pdf")).toEqual("file1.pdf");
// Absolute URL
expect(
getPdfFilenameFromUrl("http://www.example.com/pdfs/file2.pdf")
).toEqual("file2.pdf");
});
it("gets fallback filename", function () {
// Relative URL
expect(getPdfFilenameFromUrl("/pdfs/file1.txt")).toEqual("document.pdf");
// Absolute URL
expect(
getPdfFilenameFromUrl("http://www.example.com/pdfs/file2.txt")
).toEqual("document.pdf");
});
it("gets custom fallback filename", function () {
// Relative URL
expect(getPdfFilenameFromUrl("/pdfs/file1.txt", "qwerty1.pdf")).toEqual(
"qwerty1.pdf"
);
// Absolute URL
expect(
getPdfFilenameFromUrl(
"http://www.example.com/pdfs/file2.txt",
"qwerty2.pdf"
)
).toEqual("qwerty2.pdf");
// An empty string should be a valid custom fallback filename.
expect(getPdfFilenameFromUrl("/pdfs/file3.txt", "")).toEqual("");
});
it("gets fallback filename when url is not a string", function () {
expect(getPdfFilenameFromUrl(null)).toEqual("document.pdf");
expect(getPdfFilenameFromUrl(null, "file.pdf")).toEqual("file.pdf");
});
it("gets PDF filename from URL containing leading/trailing whitespace", function () {
// Relative URL
expect(getPdfFilenameFromUrl(" /pdfs/file1.pdf ")).toEqual(
"file1.pdf"
);
// Absolute URL
expect(
getPdfFilenameFromUrl(" http://www.example.com/pdfs/file2.pdf ")
).toEqual("file2.pdf");
});
it("gets PDF filename from query string", function () {
// Relative URL
expect(getPdfFilenameFromUrl("/pdfs/pdfs.html?name=file1.pdf")).toEqual(
"file1.pdf"
);
// Absolute URL
expect(
getPdfFilenameFromUrl("http://www.example.com/pdfs/pdf.html?file2.pdf")
).toEqual("file2.pdf");
});
it("gets PDF filename from hash string", function () {
// Relative URL
expect(getPdfFilenameFromUrl("/pdfs/pdfs.html#name=file1.pdf")).toEqual(
"file1.pdf"
);
// Absolute URL
expect(
getPdfFilenameFromUrl("http://www.example.com/pdfs/pdf.html#file2.pdf")
).toEqual("file2.pdf");
});
it("gets correct PDF filename when multiple ones are present", function () {
// Relative URL
expect(getPdfFilenameFromUrl("/pdfs/file1.pdf?name=file.pdf")).toEqual(
"file1.pdf"
);
// Absolute URL
expect(
getPdfFilenameFromUrl("http://www.example.com/pdfs/file2.pdf#file.pdf")
).toEqual("file2.pdf");
});
it("gets PDF filename from URI-encoded data", function () {
const encodedUrl = encodeURIComponent(
"http://www.example.com/pdfs/file1.pdf"
);
expect(getPdfFilenameFromUrl(encodedUrl)).toEqual("file1.pdf");
const encodedUrlWithQuery = encodeURIComponent(
"http://www.example.com/pdfs/file.txt?file2.pdf"
);
expect(getPdfFilenameFromUrl(encodedUrlWithQuery)).toEqual("file2.pdf");
});
it("gets PDF filename from data mistaken for URI-encoded", function () {
expect(getPdfFilenameFromUrl("/pdfs/%AA.pdf")).toEqual("%AA.pdf");
expect(getPdfFilenameFromUrl("/pdfs/%2F.pdf")).toEqual("%2F.pdf");
});
it("gets PDF filename from (some) standard protocols", function () {
// HTTP
expect(getPdfFilenameFromUrl("http://www.example.com/file1.pdf")).toEqual(
"file1.pdf"
);
// HTTPS
expect(
getPdfFilenameFromUrl("https://www.example.com/file2.pdf")
).toEqual("file2.pdf");
// File
expect(getPdfFilenameFromUrl("file:///path/to/files/file3.pdf")).toEqual(
"file3.pdf"
);
// FTP
expect(getPdfFilenameFromUrl("ftp://www.example.com/file4.pdf")).toEqual(
"file4.pdf"
);
});
it('gets PDF filename from query string appended to "blob:" URL', function () {
if (isNodeJS) {
pending("Blob in not supported in Node.js.");
}
const typedArray = new Uint8Array([1, 2, 3, 4, 5]);
const blobUrl = createObjectURL(typedArray, "application/pdf");
// Sanity check to ensure that a "blob:" URL was returned.
expect(blobUrl.startsWith("blob:")).toEqual(true);
expect(getPdfFilenameFromUrl(blobUrl + "?file.pdf")).toEqual("file.pdf");
});
it('gets fallback filename from query string appended to "data:" URL', function () {
const typedArray = new Uint8Array([1, 2, 3, 4, 5]);
const dataUrl = createObjectURL(
typedArray,
"application/pdf",
/* forceDataSchema = */ true
);
// Sanity check to ensure that a "data:" URL was returned.
expect(dataUrl.startsWith("data:")).toEqual(true);
expect(getPdfFilenameFromUrl(dataUrl + "?file1.pdf")).toEqual(
"document.pdf"
);
// Should correctly detect a "data:" URL with leading whitespace.
expect(getPdfFilenameFromUrl(" " + dataUrl + "?file2.pdf")).toEqual(
"document.pdf"
);
});
});
describe("isValidFetchUrl", function () {
it("handles invalid Fetch URLs", function () {
expect(isValidFetchUrl(null)).toEqual(false);

View File

@ -18,7 +18,6 @@ import {
binarySearchFirstItem,
EventBus,
getPageSizeInches,
getPDFFileNameFromURL,
getVisibleElements,
isPortraitOrientation,
isValidRotation,
@ -26,7 +25,6 @@ import {
waitOnEventOrTimeout,
WaitOnType,
} from "../../web/ui_utils.js";
import { createObjectURL } from "../../src/shared/util.js";
import { isNodeJS } from "../../src/shared/is_node.js";
describe("ui_utils", function () {
@ -58,162 +56,6 @@ describe("ui_utils", function () {
});
});
describe("getPDFFileNameFromURL", function () {
it("gets PDF filename", function () {
// Relative URL
expect(getPDFFileNameFromURL("/pdfs/file1.pdf")).toEqual("file1.pdf");
// Absolute URL
expect(
getPDFFileNameFromURL("http://www.example.com/pdfs/file2.pdf")
).toEqual("file2.pdf");
});
it("gets fallback filename", function () {
// Relative URL
expect(getPDFFileNameFromURL("/pdfs/file1.txt")).toEqual("document.pdf");
// Absolute URL
expect(
getPDFFileNameFromURL("http://www.example.com/pdfs/file2.txt")
).toEqual("document.pdf");
});
it("gets custom fallback filename", function () {
// Relative URL
expect(getPDFFileNameFromURL("/pdfs/file1.txt", "qwerty1.pdf")).toEqual(
"qwerty1.pdf"
);
// Absolute URL
expect(
getPDFFileNameFromURL(
"http://www.example.com/pdfs/file2.txt",
"qwerty2.pdf"
)
).toEqual("qwerty2.pdf");
// An empty string should be a valid custom fallback filename.
expect(getPDFFileNameFromURL("/pdfs/file3.txt", "")).toEqual("");
});
it("gets fallback filename when url is not a string", function () {
expect(getPDFFileNameFromURL(null)).toEqual("document.pdf");
expect(getPDFFileNameFromURL(null, "file.pdf")).toEqual("file.pdf");
});
it("gets PDF filename from URL containing leading/trailing whitespace", function () {
// Relative URL
expect(getPDFFileNameFromURL(" /pdfs/file1.pdf ")).toEqual(
"file1.pdf"
);
// Absolute URL
expect(
getPDFFileNameFromURL(" http://www.example.com/pdfs/file2.pdf ")
).toEqual("file2.pdf");
});
it("gets PDF filename from query string", function () {
// Relative URL
expect(getPDFFileNameFromURL("/pdfs/pdfs.html?name=file1.pdf")).toEqual(
"file1.pdf"
);
// Absolute URL
expect(
getPDFFileNameFromURL("http://www.example.com/pdfs/pdf.html?file2.pdf")
).toEqual("file2.pdf");
});
it("gets PDF filename from hash string", function () {
// Relative URL
expect(getPDFFileNameFromURL("/pdfs/pdfs.html#name=file1.pdf")).toEqual(
"file1.pdf"
);
// Absolute URL
expect(
getPDFFileNameFromURL("http://www.example.com/pdfs/pdf.html#file2.pdf")
).toEqual("file2.pdf");
});
it("gets correct PDF filename when multiple ones are present", function () {
// Relative URL
expect(getPDFFileNameFromURL("/pdfs/file1.pdf?name=file.pdf")).toEqual(
"file1.pdf"
);
// Absolute URL
expect(
getPDFFileNameFromURL("http://www.example.com/pdfs/file2.pdf#file.pdf")
).toEqual("file2.pdf");
});
it("gets PDF filename from URI-encoded data", function () {
const encodedUrl = encodeURIComponent(
"http://www.example.com/pdfs/file1.pdf"
);
expect(getPDFFileNameFromURL(encodedUrl)).toEqual("file1.pdf");
const encodedUrlWithQuery = encodeURIComponent(
"http://www.example.com/pdfs/file.txt?file2.pdf"
);
expect(getPDFFileNameFromURL(encodedUrlWithQuery)).toEqual("file2.pdf");
});
it("gets PDF filename from data mistaken for URI-encoded", function () {
expect(getPDFFileNameFromURL("/pdfs/%AA.pdf")).toEqual("%AA.pdf");
expect(getPDFFileNameFromURL("/pdfs/%2F.pdf")).toEqual("%2F.pdf");
});
it("gets PDF filename from (some) standard protocols", function () {
// HTTP
expect(getPDFFileNameFromURL("http://www.example.com/file1.pdf")).toEqual(
"file1.pdf"
);
// HTTPS
expect(
getPDFFileNameFromURL("https://www.example.com/file2.pdf")
).toEqual("file2.pdf");
// File
expect(getPDFFileNameFromURL("file:///path/to/files/file3.pdf")).toEqual(
"file3.pdf"
);
// FTP
expect(getPDFFileNameFromURL("ftp://www.example.com/file4.pdf")).toEqual(
"file4.pdf"
);
});
it('gets PDF filename from query string appended to "blob:" URL', function () {
if (isNodeJS) {
pending("Blob in not supported in Node.js.");
}
const typedArray = new Uint8Array([1, 2, 3, 4, 5]);
const blobUrl = createObjectURL(typedArray, "application/pdf");
// Sanity check to ensure that a "blob:" URL was returned.
expect(blobUrl.startsWith("blob:")).toEqual(true);
expect(getPDFFileNameFromURL(blobUrl + "?file.pdf")).toEqual("file.pdf");
});
it('gets fallback filename from query string appended to "data:" URL', function () {
const typedArray = new Uint8Array([1, 2, 3, 4, 5]);
const dataUrl = createObjectURL(
typedArray,
"application/pdf",
/* forceDataSchema = */ true
);
// Sanity check to ensure that a "data:" URL was returned.
expect(dataUrl.startsWith("data:")).toEqual(true);
expect(getPDFFileNameFromURL(dataUrl + "?file1.pdf")).toEqual(
"document.pdf"
);
// Should correctly detect a "data:" URL with leading whitespace.
expect(getPDFFileNameFromURL(" " + dataUrl + "?file2.pdf")).toEqual(
"document.pdf"
);
});
});
describe("EventBus", function () {
it("dispatch event", function () {
const eventBus = new EventBus();

View File

@ -22,7 +22,6 @@ import {
DEFAULT_SCALE_VALUE,
EventBus,
getActiveOrFocusedElement,
getPDFFileNameFromURL,
isValidRotation,
isValidScrollMode,
isValidSpreadMode,
@ -44,6 +43,7 @@ import {
createPromiseCapability,
getDocument,
getFilenameFromUrl,
getPdfFilenameFromUrl,
GlobalWorkerOptions,
InvalidPDFException,
isPdfFile,
@ -748,7 +748,7 @@ const PDFViewerApplication = {
setTitleUsingUrl(url = "") {
this.url = url;
this.baseUrl = url.split("#")[0];
let title = getPDFFileNameFromURL(url, "");
let title = getPdfFilenameFromUrl(url, "");
if (!title) {
try {
title = decodeURIComponent(getFilenameFromUrl(url)) || url;
@ -772,7 +772,7 @@ const PDFViewerApplication = {
get _docFilename() {
// Use `this.url` instead of `this.baseUrl` to perform filename detection
// based on the reference fragment as ultimate fallback if needed.
return this._contentDispositionFilename || getPDFFileNameFromURL(this.url);
return this._contentDispositionFilename || getPdfFilenameFromUrl(this.url);
},
/**

View File

@ -13,8 +13,7 @@
* limitations under the License.
*/
import { getPDFFileNameFromURL } from "./ui_utils.js";
import { loadScript } from "pdfjs-lib";
import { getPdfFilenameFromUrl, loadScript } from "pdfjs-lib";
async function docPropertiesLookup(pdfDocument) {
const url = "",
@ -37,7 +36,7 @@ async function docPropertiesLookup(pdfDocument) {
...info,
baseURL: baseUrl,
filesize: contentLength,
filename: contentDispositionFilename || getPDFFileNameFromURL(url),
filename: contentDispositionFilename || getPdfFilenameFromUrl(url),
metadata: metadata?.getRaw(),
authors: metadata?.get("dc:creator"),
numPages: pdfDocument.numPages,

View File

@ -13,12 +13,12 @@
* limitations under the License.
*/
import { createPromiseCapability, PDFDateString } from "pdfjs-lib";
import {
getPageSizeInches,
getPDFFileNameFromURL,
isPortraitOrientation,
} from "./ui_utils.js";
createPromiseCapability,
getPdfFilenameFromUrl,
PDFDateString,
} from "pdfjs-lib";
import { getPageSizeInches, isPortraitOrientation } from "./ui_utils.js";
const DEFAULT_FIELD_CONTENT = "-";
@ -140,7 +140,7 @@ class PDFDocumentProperties {
pageSize,
isLinearized,
] = await Promise.all([
contentDispositionFilename || getPDFFileNameFromURL(this.url),
contentDispositionFilename || getPdfFilenameFromUrl(this.url),
this._parseFileSize(contentLength),
this._parseDate(info.CreationDate),
this._parseDate(info.ModDate),

View File

@ -570,60 +570,6 @@ function noContextMenuHandler(evt) {
evt.preventDefault();
}
function isDataSchema(url) {
let i = 0;
const ii = url.length;
while (i < ii && url[i].trim() === "") {
i++;
}
return url.substring(i, i + 5).toLowerCase() === "data:";
}
/**
* Returns the filename or guessed filename from the url (see issue 3455).
* @param {string} url - The original PDF location.
* @param {string} defaultFilename - The value returned if the filename is
* unknown, or the protocol is unsupported.
* @returns {string} Guessed PDF filename.
*/
function getPDFFileNameFromURL(url, defaultFilename = "document.pdf") {
if (typeof url !== "string") {
return defaultFilename;
}
if (isDataSchema(url)) {
console.warn(
"getPDFFileNameFromURL: " +
'ignoring "data:" URL for performance reasons.'
);
return defaultFilename;
}
const reURI = /^(?:(?:[^:]+:)?\/\/[^/]+)?([^?#]*)(\?[^#]*)?(#.*)?$/;
// SCHEME HOST 1.PATH 2.QUERY 3.REF
// Pattern to get last matching NAME.pdf
const reFilename = /[^/?#=]+\.pdf\b(?!.*\.pdf\b)/i;
const splitURI = reURI.exec(url);
let suggestedFilename =
reFilename.exec(splitURI[1]) ||
reFilename.exec(splitURI[2]) ||
reFilename.exec(splitURI[3]);
if (suggestedFilename) {
suggestedFilename = suggestedFilename[0];
if (suggestedFilename.includes("%")) {
// URL-encoded %2Fpath%2Fto%2Ffile.pdf should be file.pdf
try {
suggestedFilename = reFilename.exec(
decodeURIComponent(suggestedFilename)
)[0];
} catch (ex) {
// Possible (extremely rare) errors:
// URIError "Malformed URI", e.g. for "%AA.pdf"
// TypeError "null has no properties", e.g. for "%2F.pdf"
}
}
}
return suggestedFilename || defaultFilename;
}
function normalizeWheelEventDirection(evt) {
let delta = Math.hypot(evt.deltaX, evt.deltaY);
const angle = Math.atan2(evt.deltaY, evt.deltaX);
@ -1063,7 +1009,6 @@ export {
getActiveOrFocusedElement,
getOutputScale,
getPageSizeInches,
getPDFFileNameFromURL,
getVisibleElements,
isPortraitOrientation,
isValidRotation,