Address the final round of review comments for Content-Disposition filename extraction

This patch updates the `IPDFStreamReader` interface and ensures that the interface/implementation of `network.js`, `fetch_stream.js`, `node_stream.js`, and `transport_stream.js` all match properly.
The unit-tests are also adjusted, to more closely replicate the actual behaviour of the various actual `IPDFStreamReader` implementations.
Finally, this patch adjusts the use of the Content-Disposition filename when setting the title in the viewer, and adds `PDFDocumentProperties` support as well.
This commit is contained in:
Jonas Jenwald 2018-01-16 16:24:36 +01:00
parent eb1f6f4c24
commit 69a8336cf1
12 changed files with 151 additions and 121 deletions

View File

@ -33,7 +33,7 @@
"uglify-es": "^3.1.2", "uglify-es": "^3.1.2",
"vinyl": "^2.1.0", "vinyl": "^2.1.0",
"vinyl-fs": "^2.4.4", "vinyl-fs": "^2.4.4",
"webpack": "^3.10.0", "webpack": "^3.6.0",
"webpack-stream": "^4.0.0", "webpack-stream": "^4.0.0",
"wintersmith": "^2.4.1", "wintersmith": "^2.4.1",
"yargs": "^9.0.1" "yargs": "^9.0.1"

View File

@ -103,6 +103,16 @@ IPDFStreamReader.prototype = {
return null; return null;
}, },
/**
* Gets the Content-Disposition filename. It is defined after the headersReady
* promise is resolved.
* @returns {string|null} The filename, or `null` if the Content-Disposition
* header is missing/invalid.
*/
get filename() {
return null;
},
/** /**
* Gets PDF binary data length. It is defined after the headersReady promise * Gets PDF binary data length. It is defined after the headersReady promise
* is resolved. * is resolved.

View File

@ -1997,14 +1997,14 @@ var WorkerTransport = (function WorkerTransportClosure() {
getMetadata: function WorkerTransport_getMetadata() { getMetadata: function WorkerTransport_getMetadata() {
return this.messageHandler.sendWithPromise('GetMetadata', null). return this.messageHandler.sendWithPromise('GetMetadata', null).
then(function transportMetadata(results) { then((results) => {
return { return {
info: results[0], info: results[0],
metadata: (results[1] ? new Metadata(results[1]) : null), metadata: (results[1] ? new Metadata(results[1]) : null),
contentDispositionFileName: (this._fullReader ? contentDispositionFilename: (this._fullReader ?
this._fullReader.fileName : null), this._fullReader.filename : null),
}; };
}.bind(this)); });
}, },
getStats: function WorkerTransport_getStats() { getStats: function WorkerTransport_getStats() {

View File

@ -67,9 +67,9 @@ class PDFFetchStream {
class PDFFetchStreamReader { class PDFFetchStreamReader {
constructor(stream) { constructor(stream) {
this._stream = stream; this._stream = stream;
this._fileName = null;
this._reader = null; this._reader = null;
this._loaded = 0; this._loaded = 0;
this._filename = null;
let source = stream.source; let source = stream.source;
this._withCredentials = source.withCredentials; this._withCredentials = source.withCredentials;
this._contentLength = source.length; this._contentLength = source.length;
@ -104,7 +104,6 @@ class PDFFetchStreamReader {
const getResponseHeader = (name) => { const getResponseHeader = (name) => {
return response.headers.get(name); return response.headers.get(name);
}; };
let { allowRangeRequests, suggestedLength, } = let { allowRangeRequests, suggestedLength, } =
validateRangeRequestCapabilities({ validateRangeRequestCapabilities({
getResponseHeader, getResponseHeader,
@ -115,7 +114,8 @@ class PDFFetchStreamReader {
this._contentLength = suggestedLength; this._contentLength = suggestedLength;
this._isRangeSupported = allowRangeRequests; this._isRangeSupported = allowRangeRequests;
this._fileName = extractFilenameFromHeader(getResponseHeader);
this._filename = extractFilenameFromHeader(getResponseHeader);
// We need to stop reading when range is supported and streaming is // We need to stop reading when range is supported and streaming is
// disabled. // disabled.
@ -131,12 +131,12 @@ class PDFFetchStreamReader {
return this._headersCapability.promise; return this._headersCapability.promise;
} }
get contentLength() { get filename() {
return this._contentLength; return this._filename;
} }
get fileName() { get contentLength() {
return this._fileName; return this._contentLength;
} }
get isRangeSupported() { get isRangeSupported() {

View File

@ -341,7 +341,7 @@ function PDFNetworkStreamFullRequestReader(manager, source) {
this._requests = []; this._requests = [];
this._done = false; this._done = false;
this._storedError = undefined; this._storedError = undefined;
this._fileName = null; this._filename = null;
this.onProgress = null; this.onProgress = null;
} }
@ -371,6 +371,8 @@ PDFNetworkStreamFullRequestReader.prototype = {
this._isRangeSupported = true; this._isRangeSupported = true;
} }
this._filename = extractFilenameFromHeader(getResponseHeader);
var networkManager = this._manager; var networkManager = this._manager;
if (networkManager.isStreamingRequest(fullRequestXhrId)) { if (networkManager.isStreamingRequest(fullRequestXhrId)) {
// We can continue fetching when progressive loading is enabled, // We can continue fetching when progressive loading is enabled,
@ -385,11 +387,6 @@ PDFNetworkStreamFullRequestReader.prototype = {
networkManager.abortRequest(fullRequestXhrId); networkManager.abortRequest(fullRequestXhrId);
} }
// Content-Disposition: attachment; filename=Naïve file.txt
if (networkManager.isPendingRequest(fullRequestXhrId)) {
this._fileName = extractFilenameFromHeader(getResponseHeader);
}
this._headersReceivedCapability.resolve(); this._headersReceivedCapability.resolve();
}, },
@ -438,8 +435,8 @@ PDFNetworkStreamFullRequestReader.prototype = {
} }
}, },
get fileName() { get filename() {
return this._fileName; return this._filename;
}, },
get isRangeSupported() { get isRangeSupported() {

View File

@ -53,6 +53,18 @@ function validateRangeRequestCapabilities({ getResponseHeader, isHttp,
return returnValues; return returnValues;
} }
function extractFilenameFromHeader(getResponseHeader) {
const contentDisposition = getResponseHeader('Content-Disposition');
if (contentDisposition) {
let parts =
/.+;\s*filename=(?:'|")(.+\.pdf)(?:'|")/gi.exec(contentDisposition);
if (parts !== null && parts.length > 1) {
return getFilenameFromUrl(parts[1]);
}
}
return null;
}
function createResponseStatusError(status, url) { function createResponseStatusError(status, url) {
if (status === 404 || status === 0 && /^file:/.test(url)) { if (status === 404 || status === 0 && /^file:/.test(url)) {
return new MissingPDFException('Missing PDF "' + url + '".'); return new MissingPDFException('Missing PDF "' + url + '".');
@ -66,23 +78,9 @@ function validateResponseStatus(status) {
return status === 200 || status === 206; return status === 200 || status === 206;
} }
function extractFilenameFromHeader(getResponseHeader) {
const contentDisposition = getResponseHeader('Content-Disposition');
if (contentDisposition) {
let parts =
/.+;\s*filename=(?:"|')(.+\.pdf)(?:"|')/gi.exec(contentDisposition);
if (parts !== null && parts.length > 1) {
return getFilenameFromUrl(parts[1]);
}
}
return null;
}
export { export {
createResponseStatusError, createResponseStatusError,
extractFilenameFromHeader,
validateRangeRequestCapabilities, validateRangeRequestCapabilities,
validateResponseStatus, validateResponseStatus,
extractFilenameFromHeader,
}; };

View File

@ -76,11 +76,11 @@ class BaseFullReader {
this._done = false; this._done = false;
this._errored = false; this._errored = false;
this._reason = null; this._reason = null;
this._fileName = null;
this.onProgress = null; this.onProgress = null;
let source = stream.source; let source = stream.source;
this._contentLength = source.length; // optional this._contentLength = source.length; // optional
this._loaded = 0; this._loaded = 0;
this._filename = null;
this._disableRange = source.disableRange || false; this._disableRange = source.disableRange || false;
this._rangeChunkSize = source.rangeChunkSize; this._rangeChunkSize = source.rangeChunkSize;
@ -100,6 +100,10 @@ class BaseFullReader {
return this._headersCapability.promise; return this._headersCapability.promise;
} }
get filename() {
return this._filename;
}
get contentLength() { get contentLength() {
return this._contentLength; return this._contentLength;
} }
@ -112,10 +116,6 @@ class BaseFullReader {
return this._isStreamingSupported; return this._isStreamingSupported;
} }
get fileName() {
return this._fileName;
}
read() { read() {
return this._readCapability.promise.then(() => { return this._readCapability.promise.then(() => {
if (this._done) { if (this._done) {
@ -296,14 +296,13 @@ class PDFNodeStreamFullReader extends BaseFullReader {
// here: https://nodejs.org/api/http.html#http_message_headers. // here: https://nodejs.org/api/http.html#http_message_headers.
return this._readableStream.headers[name.toLowerCase()]; return this._readableStream.headers[name.toLowerCase()];
}; };
let { allowRangeRequests, suggestedLength, } = let { allowRangeRequests, suggestedLength, } =
validateRangeRequestCapabilities({ validateRangeRequestCapabilities({
getResponseHeader, getResponseHeader,
isHttp: stream.isHttp, isHttp: stream.isHttp,
rangeChunkSize: this._rangeChunkSize, rangeChunkSize: this._rangeChunkSize,
disableRange: this._disableRange, disableRange: this._disableRange,
}); });
if (allowRangeRequests) { if (allowRangeRequests) {
this._isRangeSupported = true; this._isRangeSupported = true;
@ -311,8 +310,7 @@ class PDFNodeStreamFullReader extends BaseFullReader {
// Setting right content length. // Setting right content length.
this._contentLength = suggestedLength; this._contentLength = suggestedLength;
// Setting the file name from the response header this._filename = extractFilenameFromHeader(getResponseHeader);
this._fileName = extractFilenameFromHeader(getResponseHeader);
}; };
this._request = null; this._request = null;

View File

@ -119,6 +119,7 @@ var PDFDataTransportStream = (function PDFDataTransportStreamClosure() {
function PDFDataTransportStreamReader(stream, queuedChunks) { function PDFDataTransportStreamReader(stream, queuedChunks) {
this._stream = stream; this._stream = stream;
this._done = false; this._done = false;
this._filename = null;
this._queuedChunks = queuedChunks || []; this._queuedChunks = queuedChunks || [];
this._requests = []; this._requests = [];
this._headersReady = Promise.resolve(); this._headersReady = Promise.resolve();
@ -143,6 +144,10 @@ var PDFDataTransportStream = (function PDFDataTransportStreamClosure() {
return this._headersReady; return this._headersReady;
}, },
get filename() {
return this._filename;
},
get isRangeSupported() { get isRangeSupported() {
return this._stream._isRangeSupported; return this._stream._isRangeSupported;
}, },

View File

@ -794,6 +794,7 @@ describe('api', function() {
expect(metadata.info['Title']).toEqual('Basic API Test'); expect(metadata.info['Title']).toEqual('Basic API Test');
expect(metadata.info['PDFFormatVersion']).toEqual('1.7'); expect(metadata.info['PDFFormatVersion']).toEqual('1.7');
expect(metadata.metadata.get('dc:title')).toEqual('Basic API Test'); expect(metadata.metadata.get('dc:title')).toEqual('Basic API Test');
expect(metadata.contentDispositionFilename).toEqual(null);
done(); done();
}).catch(function (reason) { }).catch(function (reason) {
done.fail(reason); done.fail(reason);

View File

@ -134,6 +134,84 @@ describe('network_utils', function() {
}); });
}); });
describe('extractFilenameFromHeader', function() {
it('returns null when content disposition header is blank', function() {
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return null;
}
})).toBeNull();
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return undefined;
}
})).toBeNull();
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return '';
}
})).toBeNull();
});
it('gets the filename from the response header', function() {
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'inline';
}
})).toBeNull();
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'attachment';
}
})).toBeNull();
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'attachment; filename="filename.pdf"';
}
})).toEqual('filename.pdf');
});
it('returns null when content disposition is form-data', function() {
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'form-data';
}
})).toBeNull();
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'form-data; name="filename.pdf"';
}
})).toBeNull();
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'form-data; name="filename.pdf"; filename="file.pdf"';
}
})).toEqual('file.pdf');
});
it('only extracts filename with pdf extension', function () {
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'attachment; filename="filename.png"';
}
})).toBeNull();
});
it('extension validation is case insensitive', function () {
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'form-data; name="fieldName"; filename="file.PdF"';
}
})).toEqual('file.PdF');
});
});
describe('createResponseStatusError', function() { describe('createResponseStatusError', function() {
it('handles missing PDF file responses', function() { it('handles missing PDF file responses', function() {
expect(createResponseStatusError(404, 'https://foo.com/bar.pdf')).toEqual( expect(createResponseStatusError(404, 'https://foo.com/bar.pdf')).toEqual(
@ -175,62 +253,4 @@ describe('network_utils', function() {
expect(validateResponseStatus(undefined)).toEqual(false); expect(validateResponseStatus(undefined)).toEqual(false);
}); });
}); });
describe('extractFilenameFromHeader', function () {
it('returns null when content disposition header is blank', function() {
expect(extractFilenameFromHeader(function() {
return null;
})).toBeNull();
expect(extractFilenameFromHeader(function() {
return undefined;
})).toBeNull();
expect(extractFilenameFromHeader(function() {
return '';
})).toBeNull();
});
it('gets the filename from the response header', function () {
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: inline';
})).toBeNull();
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: attachment';
})).toBeNull();
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: attachment; filename="filename.pdf"';
})).toBe('filename.pdf');
});
it('returns null when content disposition is form-data', function () {
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: form-data';
})).toBeNull();
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: form-data; name="filename"';
})).toBeNull();
expect(extractFilenameFromHeader(function () {
return 'Content-Disposition: form-data; ' +
'name="filename"; filename="file.pdf"';
})).toBe('file.pdf');
});
it('Only extracts file names with pdf extension', function () {
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: attachment; filename="filename.png"';
})).toBeNull();
});
it('Extension validation is case insensitive', function () {
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: form-data; ' +
'name="fieldName"; filename="file.PdF"';
})).toBe('file.PdF');
});
});
}); });

View File

@ -154,7 +154,7 @@ let PDFViewerApplication = {
baseUrl: '', baseUrl: '',
externalServices: DefaultExternalServices, externalServices: DefaultExternalServices,
_boundEvents: {}, _boundEvents: {},
contentDispositionFileName: null, contentDispositionFilename: null,
// Called once when the document is loaded. // Called once when the document is loaded.
initialize(appConfig) { initialize(appConfig) {
@ -679,7 +679,7 @@ let PDFViewerApplication = {
this.downloadComplete = false; this.downloadComplete = false;
this.url = ''; this.url = '';
this.baseUrl = ''; this.baseUrl = '';
this.contentDispositionFileName = null; this.contentDispositionFilename = null;
this.pdfSidebar.reset(); this.pdfSidebar.reset();
this.pdfOutlineViewer.reset(); this.pdfOutlineViewer.reset();
@ -803,7 +803,7 @@ let PDFViewerApplication = {
let url = this.baseUrl; let url = this.baseUrl;
// Use this.url instead of this.baseUrl to perform filename detection based // Use this.url instead of this.baseUrl to perform filename detection based
// on the reference fragment as ultimate fallback if needed. // on the reference fragment as ultimate fallback if needed.
let filename = this.contentDispositionFileName || let filename = this.contentDispositionFilename ||
getPDFFileNameFromURL(this.url); getPDFFileNameFromURL(this.url);
let downloadManager = this.downloadManager; let downloadManager = this.downloadManager;
downloadManager.onerror = (err) => { downloadManager.onerror = (err) => {
@ -1157,10 +1157,10 @@ let PDFViewerApplication = {
}); });
pdfDocument.getMetadata().then( pdfDocument.getMetadata().then(
({ info, metadata, contentDispositionFileName, }) => { ({ info, metadata, contentDispositionFilename, }) => {
this.documentInfo = info; this.documentInfo = info;
this.metadata = metadata; this.metadata = metadata;
this.contentDispositionFileName = contentDispositionFileName; this.contentDispositionFilename = contentDispositionFilename;
// Provides some basic debug information // Provides some basic debug information
console.log('PDF ' + pdfDocument.fingerprint + ' [' + console.log('PDF ' + pdfDocument.fingerprint + ' [' +
@ -1183,11 +1183,10 @@ let PDFViewerApplication = {
} }
if (pdfTitle) { if (pdfTitle) {
this.setTitle(pdfTitle + ' - ' + document.title); this.setTitle(
} `${pdfTitle} - ${contentDispositionFilename || document.title}`);
} else if (contentDispositionFilename) {
if (!pdfTitle && contentDispositionFileName) { this.setTitle(contentDispositionFilename);
this.setTitle(contentDispositionFileName);
} }
if (info.IsAcroFormPresent) { if (info.IsAcroFormPresent) {

View File

@ -71,24 +71,26 @@ class PDFDocumentProperties {
return; return;
} }
// Get the document properties. // Get the document properties.
this.pdfDocument.getMetadata().then(({ info, metadata, }) => { this.pdfDocument.getMetadata().then(
({ info, metadata, contentDispositionFilename, }) => {
return Promise.all([ return Promise.all([
info, info,
metadata, metadata,
contentDispositionFilename || getPDFFileNameFromURL(this.url),
this._parseFileSize(this.maybeFileSize), this._parseFileSize(this.maybeFileSize),
this._parseDate(info.CreationDate), this._parseDate(info.CreationDate),
this._parseDate(info.ModDate) this._parseDate(info.ModDate)
]); ]);
}).then(([info, metadata, fileSize, creationDate, modificationDate]) => { }).then(([info, metadata, fileName, fileSize, creationDate, modDate]) => {
freezeFieldData({ freezeFieldData({
'fileName': getPDFFileNameFromURL(this.url), 'fileName': fileName,
'fileSize': fileSize, 'fileSize': fileSize,
'title': info.Title, 'title': info.Title,
'author': info.Author, 'author': info.Author,
'subject': info.Subject, 'subject': info.Subject,
'keywords': info.Keywords, 'keywords': info.Keywords,
'creationDate': creationDate, 'creationDate': creationDate,
'modificationDate': modificationDate, 'modificationDate': modDate,
'creator': info.Creator, 'creator': info.Creator,
'producer': info.Producer, 'producer': info.Producer,
'version': info.PDFFormatVersion, 'version': info.PDFFormatVersion,