Change getPDFFileNameFromURL to ignore data: URLs for performance reasons (issue 8263)

The patch also changes the `defaultFilename` to use the ES6 default parameter notation, and fixes the formatting of the JSDoc comment.

Finally, since `getPDFFileNameFromURL` currently has no unit-tests, a few basic ones are added to avoid regressions.
This commit is contained in:
Jonas Jenwald 2017-04-15 20:37:06 +02:00
parent 3888a993b1
commit 84472b30ee
2 changed files with 151 additions and 12 deletions

View File

@ -17,16 +17,20 @@
(function (root, factory) {
if (typeof define === 'function' && define.amd) {
define('pdfjs-test/unit/ui_utils_spec', ['exports',
'pdfjs-web/ui_utils'], factory);
'pdfjs-web/ui_utils', 'pdfjs/shared/util'], factory);
} else if (typeof exports !== 'undefined') {
factory(exports, require('../../web/ui_utils.js'));
factory(exports, require('../../web/ui_utils.js'),
require('../../src/shared/util.js'));
} else {
factory((root.pdfjsTestUnitUiUtilsSpec = {}), root.pdfjsWebUiUtils);
factory((root.pdfjsTestUnitUiUtilsSpec = {}), root.pdfjsWebUiUtils,
root.pdfjsSharedUtil);
}
}(this, function (exports, webUiUtils) {
}(this, function (exports, webUiUtils, sharedUtil) {
var binarySearchFirstItem = webUiUtils.binarySearchFirstItem;
var getPDFFileNameFromURL = webUiUtils.getPDFFileNameFromURL;
var EventBus = webUiUtils.EventBus;
var createObjectURL = sharedUtil.createObjectURL;
describe('ui_utils', function() {
describe('binary search', function() {
@ -57,6 +61,130 @@ describe('ui_utils', function() {
});
});
describe('getPDFFileNameFromURL', function() {
it('gets PDF filename', function() {
// Relative URL
expect(getPDFFileNameFromURL('/pdfs/file1.pdf')).toEqual('file1.pdf');
// Absolute URL
expect(getPDFFileNameFromURL(
'http://www.example.com/pdfs/file2.pdf')).toEqual('file2.pdf');
});
it('gets fallback filename', function() {
// Relative URL
expect(getPDFFileNameFromURL('/pdfs/file1.txt')).toEqual('document.pdf');
// Absolute URL
expect(getPDFFileNameFromURL(
'http://www.example.com/pdfs/file2.txt')).toEqual('document.pdf');
});
it('gets custom fallback filename', function() {
// Relative URL
expect(getPDFFileNameFromURL('/pdfs/file1.txt', 'qwerty1.pdf')).
toEqual('qwerty1.pdf');
// Absolute URL
expect(getPDFFileNameFromURL('http://www.example.com/pdfs/file2.txt',
'qwerty2.pdf')).toEqual('qwerty2.pdf');
// An empty string should be a valid custom fallback filename.
expect(getPDFFileNameFromURL('/pdfs/file3.txt', '')).toEqual('');
});
it('gets PDF filename from URL containing leading/trailing whitespace',
function() {
// Relative URL
expect(getPDFFileNameFromURL(
' /pdfs/file1.pdf ')).toEqual('file1.pdf');
// Absolute URL
expect(getPDFFileNameFromURL(
' http://www.example.com/pdfs/file2.pdf ')).toEqual('file2.pdf');
});
it('gets PDF filename from query string', function() {
// Relative URL
expect(getPDFFileNameFromURL(
'/pdfs/pdfs.html?name=file1.pdf')).toEqual('file1.pdf');
// Absolute URL
expect(getPDFFileNameFromURL(
'http://www.example.com/pdfs/pdf.html?file2.pdf')).toEqual('file2.pdf');
});
it('gets PDF filename from hash string', function() {
// Relative URL
expect(getPDFFileNameFromURL(
'/pdfs/pdfs.html#name=file1.pdf')).toEqual('file1.pdf');
// Absolute URL
expect(getPDFFileNameFromURL(
'http://www.example.com/pdfs/pdf.html#file2.pdf')).toEqual('file2.pdf');
});
it('gets correct PDF filename when multiple ones are present', function() {
// Relative URL
expect(getPDFFileNameFromURL(
'/pdfs/file1.pdf?name=file.pdf')).toEqual('file1.pdf');
// Absolute URL
expect(getPDFFileNameFromURL(
'http://www.example.com/pdfs/file2.pdf#file.pdf')).toEqual('file2.pdf');
});
it('gets PDF filename from URI-encoded data', function() {
var encodedUrl = encodeURIComponent(
'http://www.example.com/pdfs/file1.pdf');
expect(getPDFFileNameFromURL(encodedUrl)).toEqual('file1.pdf');
var encodedUrlWithQuery = encodeURIComponent(
'http://www.example.com/pdfs/file.txt?file2.pdf');
expect(getPDFFileNameFromURL(encodedUrlWithQuery)).toEqual('file2.pdf');
});
it('gets PDF filename from data mistaken for URI-encoded', function() {
expect(getPDFFileNameFromURL('/pdfs/%AA.pdf')).toEqual('%AA.pdf');
expect(getPDFFileNameFromURL('/pdfs/%2F.pdf')).toEqual('%2F.pdf');
});
it('gets PDF filename from (some) standard protocols', function() {
// HTTP
expect(getPDFFileNameFromURL('http://www.example.com/file1.pdf')).
toEqual('file1.pdf');
// HTTPS
expect(getPDFFileNameFromURL('https://www.example.com/file2.pdf')).
toEqual('file2.pdf');
// File
expect(getPDFFileNameFromURL('file:///path/to/files/file3.pdf')).
toEqual('file3.pdf');
// FTP
expect(getPDFFileNameFromURL('ftp://www.example.com/file4.pdf')).
toEqual('file4.pdf');
});
it('gets PDF filename from query string appended to "blob:" URL',
function() {
var typedArray = new Uint8Array([1, 2, 3, 4, 5]);
var blobUrl = createObjectURL(typedArray, 'application/pdf');
// Sanity check to ensure that a "blob:" URL was returned.
expect(blobUrl.indexOf('blob:') === 0).toEqual(true);
expect(getPDFFileNameFromURL(blobUrl + '?file.pdf')).toEqual('file.pdf');
});
it('gets fallback filename from query string appended to "data:" URL',
function() {
var typedArray = new Uint8Array([1, 2, 3, 4, 5]);
var dataUrl = createObjectURL(typedArray, 'application/pdf',
/* forceDataSchema = */ true);
// Sanity check to ensure that a "data:" URL was returned.
expect(dataUrl.indexOf('data:') === 0).toEqual(true);
expect(getPDFFileNameFromURL(dataUrl + '?file1.pdf')).
toEqual('document.pdf');
// Should correctly detect a "data:" URL with leading whitespace.
expect(getPDFFileNameFromURL(' ' + dataUrl + '?file2.pdf')).
toEqual('document.pdf');
});
});
describe('EventBus', function () {
it('dispatch event', function () {
var eventBus = new EventBus();

View File

@ -353,15 +353,26 @@ function noContextMenuHandler(e) {
e.preventDefault();
}
function isDataSchema(url) {
var i = 0, ii = url.length;
while (i < ii && url[i].trim() === '') {
i++;
}
return url.substr(i, 5).toLowerCase() === 'data:';
}
/**
* Returns the filename or guessed filename from the url (see issue 3455).
* url {String} The original PDF location.
* defaultFilename {string} The value to return if the file name is unknown.
* @return {String} Guessed PDF file name.
* @param {string} url - The original PDF location.
* @param {string} defaultFilename - The value returned if the filename is
* unknown, or the protocol is unsupported.
* @returns {string} Guessed PDF filename.
*/
function getPDFFileNameFromURL(url, defaultFilename) {
if (typeof defaultFilename === 'undefined') {
defaultFilename = 'document.pdf';
function getPDFFileNameFromURL(url, defaultFilename = 'document.pdf') {
if (isDataSchema(url)) {
console.warn('getPDFFileNameFromURL: ' +
'ignoring "data:" URL for performance reasons.');
return defaultFilename;
}
var reURI = /^(?:(?:[^:]+:)?\/\/[^\/]+)?([^?#]*)(\?[^#]*)?(#.*)?$/;
// SCHEME HOST 1.PATH 2.QUERY 3.REF
@ -369,8 +380,8 @@ function getPDFFileNameFromURL(url, defaultFilename) {
var reFilename = /[^\/?#=]+\.pdf\b(?!.*\.pdf\b)/i;
var splitURI = reURI.exec(url);
var suggestedFilename = reFilename.exec(splitURI[1]) ||
reFilename.exec(splitURI[2]) ||
reFilename.exec(splitURI[3]);
reFilename.exec(splitURI[2]) ||
reFilename.exec(splitURI[3]);
if (suggestedFilename) {
suggestedFilename = suggestedFilename[0];
if (suggestedFilename.indexOf('%') !== -1) {