From 84472b30ee072f6c369abc26049b189d42374b05 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 15 Apr 2017 20:37:06 +0200 Subject: [PATCH] Change `getPDFFileNameFromURL` to ignore `data:` URLs for performance reasons (issue 8263) The patch also changes the `defaultFilename` to use the ES6 default parameter notation, and fixes the formatting of the JSDoc comment. Finally, since `getPDFFileNameFromURL` currently has no unit-tests, a few basic ones are added to avoid regressions. --- test/unit/ui_utils_spec.js | 136 +++++++++++++++++++++++++++++++++++-- web/ui_utils.js | 27 +++++--- 2 files changed, 151 insertions(+), 12 deletions(-) diff --git a/test/unit/ui_utils_spec.js b/test/unit/ui_utils_spec.js index 820edc0bd..67b20489e 100644 --- a/test/unit/ui_utils_spec.js +++ b/test/unit/ui_utils_spec.js @@ -17,16 +17,20 @@ (function (root, factory) { if (typeof define === 'function' && define.amd) { define('pdfjs-test/unit/ui_utils_spec', ['exports', - 'pdfjs-web/ui_utils'], factory); + 'pdfjs-web/ui_utils', 'pdfjs/shared/util'], factory); } else if (typeof exports !== 'undefined') { - factory(exports, require('../../web/ui_utils.js')); + factory(exports, require('../../web/ui_utils.js'), + require('../../src/shared/util.js')); } else { - factory((root.pdfjsTestUnitUiUtilsSpec = {}), root.pdfjsWebUiUtils); + factory((root.pdfjsTestUnitUiUtilsSpec = {}), root.pdfjsWebUiUtils, + root.pdfjsSharedUtil); } -}(this, function (exports, webUiUtils) { +}(this, function (exports, webUiUtils, sharedUtil) { var binarySearchFirstItem = webUiUtils.binarySearchFirstItem; +var getPDFFileNameFromURL = webUiUtils.getPDFFileNameFromURL; var EventBus = webUiUtils.EventBus; +var createObjectURL = sharedUtil.createObjectURL; describe('ui_utils', function() { describe('binary search', function() { @@ -57,6 +61,130 @@ describe('ui_utils', function() { }); }); + describe('getPDFFileNameFromURL', function() { + it('gets PDF filename', function() { + // Relative URL + expect(getPDFFileNameFromURL('/pdfs/file1.pdf')).toEqual('file1.pdf'); + // Absolute URL + expect(getPDFFileNameFromURL( + 'http://www.example.com/pdfs/file2.pdf')).toEqual('file2.pdf'); + }); + + it('gets fallback filename', function() { + // Relative URL + expect(getPDFFileNameFromURL('/pdfs/file1.txt')).toEqual('document.pdf'); + // Absolute URL + expect(getPDFFileNameFromURL( + 'http://www.example.com/pdfs/file2.txt')).toEqual('document.pdf'); + }); + + it('gets custom fallback filename', function() { + // Relative URL + expect(getPDFFileNameFromURL('/pdfs/file1.txt', 'qwerty1.pdf')). + toEqual('qwerty1.pdf'); + // Absolute URL + expect(getPDFFileNameFromURL('http://www.example.com/pdfs/file2.txt', + 'qwerty2.pdf')).toEqual('qwerty2.pdf'); + + // An empty string should be a valid custom fallback filename. + expect(getPDFFileNameFromURL('/pdfs/file3.txt', '')).toEqual(''); + }); + + it('gets PDF filename from URL containing leading/trailing whitespace', + function() { + // Relative URL + expect(getPDFFileNameFromURL( + ' /pdfs/file1.pdf ')).toEqual('file1.pdf'); + // Absolute URL + expect(getPDFFileNameFromURL( + ' http://www.example.com/pdfs/file2.pdf ')).toEqual('file2.pdf'); + }); + + it('gets PDF filename from query string', function() { + // Relative URL + expect(getPDFFileNameFromURL( + '/pdfs/pdfs.html?name=file1.pdf')).toEqual('file1.pdf'); + // Absolute URL + expect(getPDFFileNameFromURL( + 'http://www.example.com/pdfs/pdf.html?file2.pdf')).toEqual('file2.pdf'); + }); + + it('gets PDF filename from hash string', function() { + // Relative URL + expect(getPDFFileNameFromURL( + '/pdfs/pdfs.html#name=file1.pdf')).toEqual('file1.pdf'); + // Absolute URL + expect(getPDFFileNameFromURL( + 'http://www.example.com/pdfs/pdf.html#file2.pdf')).toEqual('file2.pdf'); + }); + + it('gets correct PDF filename when multiple ones are present', function() { + // Relative URL + expect(getPDFFileNameFromURL( + '/pdfs/file1.pdf?name=file.pdf')).toEqual('file1.pdf'); + // Absolute URL + expect(getPDFFileNameFromURL( + 'http://www.example.com/pdfs/file2.pdf#file.pdf')).toEqual('file2.pdf'); + }); + + it('gets PDF filename from URI-encoded data', function() { + var encodedUrl = encodeURIComponent( + 'http://www.example.com/pdfs/file1.pdf'); + expect(getPDFFileNameFromURL(encodedUrl)).toEqual('file1.pdf'); + + var encodedUrlWithQuery = encodeURIComponent( + 'http://www.example.com/pdfs/file.txt?file2.pdf'); + expect(getPDFFileNameFromURL(encodedUrlWithQuery)).toEqual('file2.pdf'); + }); + + it('gets PDF filename from data mistaken for URI-encoded', function() { + expect(getPDFFileNameFromURL('/pdfs/%AA.pdf')).toEqual('%AA.pdf'); + + expect(getPDFFileNameFromURL('/pdfs/%2F.pdf')).toEqual('%2F.pdf'); + }); + + it('gets PDF filename from (some) standard protocols', function() { + // HTTP + expect(getPDFFileNameFromURL('http://www.example.com/file1.pdf')). + toEqual('file1.pdf'); + // HTTPS + expect(getPDFFileNameFromURL('https://www.example.com/file2.pdf')). + toEqual('file2.pdf'); + // File + expect(getPDFFileNameFromURL('file:///path/to/files/file3.pdf')). + toEqual('file3.pdf'); + // FTP + expect(getPDFFileNameFromURL('ftp://www.example.com/file4.pdf')). + toEqual('file4.pdf'); + }); + + it('gets PDF filename from query string appended to "blob:" URL', + function() { + var typedArray = new Uint8Array([1, 2, 3, 4, 5]); + var blobUrl = createObjectURL(typedArray, 'application/pdf'); + // Sanity check to ensure that a "blob:" URL was returned. + expect(blobUrl.indexOf('blob:') === 0).toEqual(true); + + expect(getPDFFileNameFromURL(blobUrl + '?file.pdf')).toEqual('file.pdf'); + }); + + it('gets fallback filename from query string appended to "data:" URL', + function() { + var typedArray = new Uint8Array([1, 2, 3, 4, 5]); + var dataUrl = createObjectURL(typedArray, 'application/pdf', + /* forceDataSchema = */ true); + // Sanity check to ensure that a "data:" URL was returned. + expect(dataUrl.indexOf('data:') === 0).toEqual(true); + + expect(getPDFFileNameFromURL(dataUrl + '?file1.pdf')). + toEqual('document.pdf'); + + // Should correctly detect a "data:" URL with leading whitespace. + expect(getPDFFileNameFromURL(' ' + dataUrl + '?file2.pdf')). + toEqual('document.pdf'); + }); + }); + describe('EventBus', function () { it('dispatch event', function () { var eventBus = new EventBus(); diff --git a/web/ui_utils.js b/web/ui_utils.js index 46eb349d0..432f3aa36 100644 --- a/web/ui_utils.js +++ b/web/ui_utils.js @@ -353,15 +353,26 @@ function noContextMenuHandler(e) { e.preventDefault(); } +function isDataSchema(url) { + var i = 0, ii = url.length; + while (i < ii && url[i].trim() === '') { + i++; + } + return url.substr(i, 5).toLowerCase() === 'data:'; +} + /** * Returns the filename or guessed filename from the url (see issue 3455). - * url {String} The original PDF location. - * defaultFilename {string} The value to return if the file name is unknown. - * @return {String} Guessed PDF file name. + * @param {string} url - The original PDF location. + * @param {string} defaultFilename - The value returned if the filename is + * unknown, or the protocol is unsupported. + * @returns {string} Guessed PDF filename. */ -function getPDFFileNameFromURL(url, defaultFilename) { - if (typeof defaultFilename === 'undefined') { - defaultFilename = 'document.pdf'; +function getPDFFileNameFromURL(url, defaultFilename = 'document.pdf') { + if (isDataSchema(url)) { + console.warn('getPDFFileNameFromURL: ' + + 'ignoring "data:" URL for performance reasons.'); + return defaultFilename; } var reURI = /^(?:(?:[^:]+:)?\/\/[^\/]+)?([^?#]*)(\?[^#]*)?(#.*)?$/; // SCHEME HOST 1.PATH 2.QUERY 3.REF @@ -369,8 +380,8 @@ function getPDFFileNameFromURL(url, defaultFilename) { var reFilename = /[^\/?#=]+\.pdf\b(?!.*\.pdf\b)/i; var splitURI = reURI.exec(url); var suggestedFilename = reFilename.exec(splitURI[1]) || - reFilename.exec(splitURI[2]) || - reFilename.exec(splitURI[3]); + reFilename.exec(splitURI[2]) || + reFilename.exec(splitURI[3]); if (suggestedFilename) { suggestedFilename = suggestedFilename[0]; if (suggestedFilename.indexOf('%') !== -1) {