Address the final round of review comments for Content-Disposition filename extraction

This patch updates the `IPDFStreamReader` interface and ensures that the interface/implementation of `network.js`, `fetch_stream.js`, `node_stream.js`, and `transport_stream.js` all match properly.
The unit-tests are also adjusted, to more closely replicate the actual behaviour of the various actual `IPDFStreamReader` implementations.
Finally, this patch adjusts the use of the Content-Disposition filename when setting the title in the viewer, and adds `PDFDocumentProperties` support as well.
This commit is contained in:
Jonas Jenwald 2018-01-16 16:24:36 +01:00
parent eb1f6f4c24
commit 69a8336cf1
12 changed files with 151 additions and 121 deletions

View File

@ -33,7 +33,7 @@
"uglify-es": "^3.1.2",
"vinyl": "^2.1.0",
"vinyl-fs": "^2.4.4",
"webpack": "^3.10.0",
"webpack": "^3.6.0",
"webpack-stream": "^4.0.0",
"wintersmith": "^2.4.1",
"yargs": "^9.0.1"

View File

@ -103,6 +103,16 @@ IPDFStreamReader.prototype = {
return null;
},
/**
* Gets the Content-Disposition filename. It is defined after the headersReady
* promise is resolved.
* @returns {string|null} The filename, or `null` if the Content-Disposition
* header is missing/invalid.
*/
get filename() {
return null;
},
/**
* Gets PDF binary data length. It is defined after the headersReady promise
* is resolved.

View File

@ -1997,14 +1997,14 @@ var WorkerTransport = (function WorkerTransportClosure() {
getMetadata: function WorkerTransport_getMetadata() {
return this.messageHandler.sendWithPromise('GetMetadata', null).
then(function transportMetadata(results) {
then((results) => {
return {
info: results[0],
metadata: (results[1] ? new Metadata(results[1]) : null),
contentDispositionFileName: (this._fullReader ?
this._fullReader.fileName : null),
contentDispositionFilename: (this._fullReader ?
this._fullReader.filename : null),
};
}.bind(this));
});
},
getStats: function WorkerTransport_getStats() {

View File

@ -67,9 +67,9 @@ class PDFFetchStream {
class PDFFetchStreamReader {
constructor(stream) {
this._stream = stream;
this._fileName = null;
this._reader = null;
this._loaded = 0;
this._filename = null;
let source = stream.source;
this._withCredentials = source.withCredentials;
this._contentLength = source.length;
@ -104,7 +104,6 @@ class PDFFetchStreamReader {
const getResponseHeader = (name) => {
return response.headers.get(name);
};
let { allowRangeRequests, suggestedLength, } =
validateRangeRequestCapabilities({
getResponseHeader,
@ -115,7 +114,8 @@ class PDFFetchStreamReader {
this._contentLength = suggestedLength;
this._isRangeSupported = allowRangeRequests;
this._fileName = extractFilenameFromHeader(getResponseHeader);
this._filename = extractFilenameFromHeader(getResponseHeader);
// We need to stop reading when range is supported and streaming is
// disabled.
@ -131,12 +131,12 @@ class PDFFetchStreamReader {
return this._headersCapability.promise;
}
get contentLength() {
return this._contentLength;
get filename() {
return this._filename;
}
get fileName() {
return this._fileName;
get contentLength() {
return this._contentLength;
}
get isRangeSupported() {

View File

@ -341,7 +341,7 @@ function PDFNetworkStreamFullRequestReader(manager, source) {
this._requests = [];
this._done = false;
this._storedError = undefined;
this._fileName = null;
this._filename = null;
this.onProgress = null;
}
@ -371,6 +371,8 @@ PDFNetworkStreamFullRequestReader.prototype = {
this._isRangeSupported = true;
}
this._filename = extractFilenameFromHeader(getResponseHeader);
var networkManager = this._manager;
if (networkManager.isStreamingRequest(fullRequestXhrId)) {
// We can continue fetching when progressive loading is enabled,
@ -385,11 +387,6 @@ PDFNetworkStreamFullRequestReader.prototype = {
networkManager.abortRequest(fullRequestXhrId);
}
// Content-Disposition: attachment; filename=Naïve file.txt
if (networkManager.isPendingRequest(fullRequestXhrId)) {
this._fileName = extractFilenameFromHeader(getResponseHeader);
}
this._headersReceivedCapability.resolve();
},
@ -438,8 +435,8 @@ PDFNetworkStreamFullRequestReader.prototype = {
}
},
get fileName() {
return this._fileName;
get filename() {
return this._filename;
},
get isRangeSupported() {

View File

@ -53,6 +53,18 @@ function validateRangeRequestCapabilities({ getResponseHeader, isHttp,
return returnValues;
}
function extractFilenameFromHeader(getResponseHeader) {
const contentDisposition = getResponseHeader('Content-Disposition');
if (contentDisposition) {
let parts =
/.+;\s*filename=(?:'|")(.+\.pdf)(?:'|")/gi.exec(contentDisposition);
if (parts !== null && parts.length > 1) {
return getFilenameFromUrl(parts[1]);
}
}
return null;
}
function createResponseStatusError(status, url) {
if (status === 404 || status === 0 && /^file:/.test(url)) {
return new MissingPDFException('Missing PDF "' + url + '".');
@ -66,23 +78,9 @@ function validateResponseStatus(status) {
return status === 200 || status === 206;
}
function extractFilenameFromHeader(getResponseHeader) {
const contentDisposition = getResponseHeader('Content-Disposition');
if (contentDisposition) {
let parts =
/.+;\s*filename=(?:"|')(.+\.pdf)(?:"|')/gi.exec(contentDisposition);
if (parts !== null && parts.length > 1) {
return getFilenameFromUrl(parts[1]);
}
}
return null;
}
export {
createResponseStatusError,
extractFilenameFromHeader,
validateRangeRequestCapabilities,
validateResponseStatus,
extractFilenameFromHeader,
};

View File

@ -76,11 +76,11 @@ class BaseFullReader {
this._done = false;
this._errored = false;
this._reason = null;
this._fileName = null;
this.onProgress = null;
let source = stream.source;
this._contentLength = source.length; // optional
this._loaded = 0;
this._filename = null;
this._disableRange = source.disableRange || false;
this._rangeChunkSize = source.rangeChunkSize;
@ -100,6 +100,10 @@ class BaseFullReader {
return this._headersCapability.promise;
}
get filename() {
return this._filename;
}
get contentLength() {
return this._contentLength;
}
@ -112,10 +116,6 @@ class BaseFullReader {
return this._isStreamingSupported;
}
get fileName() {
return this._fileName;
}
read() {
return this._readCapability.promise.then(() => {
if (this._done) {
@ -296,14 +296,13 @@ class PDFNodeStreamFullReader extends BaseFullReader {
// here: https://nodejs.org/api/http.html#http_message_headers.
return this._readableStream.headers[name.toLowerCase()];
};
let { allowRangeRequests, suggestedLength, } =
validateRangeRequestCapabilities({
getResponseHeader,
isHttp: stream.isHttp,
rangeChunkSize: this._rangeChunkSize,
disableRange: this._disableRange,
});
validateRangeRequestCapabilities({
getResponseHeader,
isHttp: stream.isHttp,
rangeChunkSize: this._rangeChunkSize,
disableRange: this._disableRange,
});
if (allowRangeRequests) {
this._isRangeSupported = true;
@ -311,8 +310,7 @@ class PDFNodeStreamFullReader extends BaseFullReader {
// Setting right content length.
this._contentLength = suggestedLength;
// Setting the file name from the response header
this._fileName = extractFilenameFromHeader(getResponseHeader);
this._filename = extractFilenameFromHeader(getResponseHeader);
};
this._request = null;

View File

@ -119,6 +119,7 @@ var PDFDataTransportStream = (function PDFDataTransportStreamClosure() {
function PDFDataTransportStreamReader(stream, queuedChunks) {
this._stream = stream;
this._done = false;
this._filename = null;
this._queuedChunks = queuedChunks || [];
this._requests = [];
this._headersReady = Promise.resolve();
@ -143,6 +144,10 @@ var PDFDataTransportStream = (function PDFDataTransportStreamClosure() {
return this._headersReady;
},
get filename() {
return this._filename;
},
get isRangeSupported() {
return this._stream._isRangeSupported;
},

View File

@ -794,6 +794,7 @@ describe('api', function() {
expect(metadata.info['Title']).toEqual('Basic API Test');
expect(metadata.info['PDFFormatVersion']).toEqual('1.7');
expect(metadata.metadata.get('dc:title')).toEqual('Basic API Test');
expect(metadata.contentDispositionFilename).toEqual(null);
done();
}).catch(function (reason) {
done.fail(reason);

View File

@ -134,6 +134,84 @@ describe('network_utils', function() {
});
});
describe('extractFilenameFromHeader', function() {
it('returns null when content disposition header is blank', function() {
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return null;
}
})).toBeNull();
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return undefined;
}
})).toBeNull();
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return '';
}
})).toBeNull();
});
it('gets the filename from the response header', function() {
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'inline';
}
})).toBeNull();
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'attachment';
}
})).toBeNull();
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'attachment; filename="filename.pdf"';
}
})).toEqual('filename.pdf');
});
it('returns null when content disposition is form-data', function() {
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'form-data';
}
})).toBeNull();
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'form-data; name="filename.pdf"';
}
})).toBeNull();
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'form-data; name="filename.pdf"; filename="file.pdf"';
}
})).toEqual('file.pdf');
});
it('only extracts filename with pdf extension', function () {
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'attachment; filename="filename.png"';
}
})).toBeNull();
});
it('extension validation is case insensitive', function () {
expect(extractFilenameFromHeader((headerName) => {
if (headerName === 'Content-Disposition') {
return 'form-data; name="fieldName"; filename="file.PdF"';
}
})).toEqual('file.PdF');
});
});
describe('createResponseStatusError', function() {
it('handles missing PDF file responses', function() {
expect(createResponseStatusError(404, 'https://foo.com/bar.pdf')).toEqual(
@ -175,62 +253,4 @@ describe('network_utils', function() {
expect(validateResponseStatus(undefined)).toEqual(false);
});
});
describe('extractFilenameFromHeader', function () {
it('returns null when content disposition header is blank', function() {
expect(extractFilenameFromHeader(function() {
return null;
})).toBeNull();
expect(extractFilenameFromHeader(function() {
return undefined;
})).toBeNull();
expect(extractFilenameFromHeader(function() {
return '';
})).toBeNull();
});
it('gets the filename from the response header', function () {
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: inline';
})).toBeNull();
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: attachment';
})).toBeNull();
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: attachment; filename="filename.pdf"';
})).toBe('filename.pdf');
});
it('returns null when content disposition is form-data', function () {
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: form-data';
})).toBeNull();
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: form-data; name="filename"';
})).toBeNull();
expect(extractFilenameFromHeader(function () {
return 'Content-Disposition: form-data; ' +
'name="filename"; filename="file.pdf"';
})).toBe('file.pdf');
});
it('Only extracts file names with pdf extension', function () {
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: attachment; filename="filename.png"';
})).toBeNull();
});
it('Extension validation is case insensitive', function () {
expect(extractFilenameFromHeader(function() {
return 'Content-Disposition: form-data; ' +
'name="fieldName"; filename="file.PdF"';
})).toBe('file.PdF');
});
});
});

View File

@ -154,7 +154,7 @@ let PDFViewerApplication = {
baseUrl: '',
externalServices: DefaultExternalServices,
_boundEvents: {},
contentDispositionFileName: null,
contentDispositionFilename: null,
// Called once when the document is loaded.
initialize(appConfig) {
@ -679,7 +679,7 @@ let PDFViewerApplication = {
this.downloadComplete = false;
this.url = '';
this.baseUrl = '';
this.contentDispositionFileName = null;
this.contentDispositionFilename = null;
this.pdfSidebar.reset();
this.pdfOutlineViewer.reset();
@ -803,7 +803,7 @@ let PDFViewerApplication = {
let url = this.baseUrl;
// Use this.url instead of this.baseUrl to perform filename detection based
// on the reference fragment as ultimate fallback if needed.
let filename = this.contentDispositionFileName ||
let filename = this.contentDispositionFilename ||
getPDFFileNameFromURL(this.url);
let downloadManager = this.downloadManager;
downloadManager.onerror = (err) => {
@ -1157,10 +1157,10 @@ let PDFViewerApplication = {
});
pdfDocument.getMetadata().then(
({ info, metadata, contentDispositionFileName, }) => {
({ info, metadata, contentDispositionFilename, }) => {
this.documentInfo = info;
this.metadata = metadata;
this.contentDispositionFileName = contentDispositionFileName;
this.contentDispositionFilename = contentDispositionFilename;
// Provides some basic debug information
console.log('PDF ' + pdfDocument.fingerprint + ' [' +
@ -1183,11 +1183,10 @@ let PDFViewerApplication = {
}
if (pdfTitle) {
this.setTitle(pdfTitle + ' - ' + document.title);
}
if (!pdfTitle && contentDispositionFileName) {
this.setTitle(contentDispositionFileName);
this.setTitle(
`${pdfTitle} - ${contentDispositionFilename || document.title}`);
} else if (contentDispositionFilename) {
this.setTitle(contentDispositionFilename);
}
if (info.IsAcroFormPresent) {

View File

@ -71,24 +71,26 @@ class PDFDocumentProperties {
return;
}
// Get the document properties.
this.pdfDocument.getMetadata().then(({ info, metadata, }) => {
this.pdfDocument.getMetadata().then(
({ info, metadata, contentDispositionFilename, }) => {
return Promise.all([
info,
metadata,
contentDispositionFilename || getPDFFileNameFromURL(this.url),
this._parseFileSize(this.maybeFileSize),
this._parseDate(info.CreationDate),
this._parseDate(info.ModDate)
]);
}).then(([info, metadata, fileSize, creationDate, modificationDate]) => {
}).then(([info, metadata, fileName, fileSize, creationDate, modDate]) => {
freezeFieldData({
'fileName': getPDFFileNameFromURL(this.url),
'fileName': fileName,
'fileSize': fileSize,
'title': info.Title,
'author': info.Author,
'subject': info.Subject,
'keywords': info.Keywords,
'creationDate': creationDate,
'modificationDate': modificationDate,
'modificationDate': modDate,
'creator': info.Creator,
'producer': info.Producer,
'version': info.PDFFormatVersion,