Address the final round of review comments for Content-Disposition filename extraction
This patch updates the `IPDFStreamReader` interface and ensures that the interface/implementation of `network.js`, `fetch_stream.js`, `node_stream.js`, and `transport_stream.js` all match properly. The unit-tests are also adjusted, to more closely replicate the actual behaviour of the various actual `IPDFStreamReader` implementations. Finally, this patch adjusts the use of the Content-Disposition filename when setting the title in the viewer, and adds `PDFDocumentProperties` support as well.
This commit is contained in:
parent
eb1f6f4c24
commit
69a8336cf1
@ -33,7 +33,7 @@
|
||||
"uglify-es": "^3.1.2",
|
||||
"vinyl": "^2.1.0",
|
||||
"vinyl-fs": "^2.4.4",
|
||||
"webpack": "^3.10.0",
|
||||
"webpack": "^3.6.0",
|
||||
"webpack-stream": "^4.0.0",
|
||||
"wintersmith": "^2.4.1",
|
||||
"yargs": "^9.0.1"
|
||||
|
@ -103,6 +103,16 @@ IPDFStreamReader.prototype = {
|
||||
return null;
|
||||
},
|
||||
|
||||
/**
|
||||
* Gets the Content-Disposition filename. It is defined after the headersReady
|
||||
* promise is resolved.
|
||||
* @returns {string|null} The filename, or `null` if the Content-Disposition
|
||||
* header is missing/invalid.
|
||||
*/
|
||||
get filename() {
|
||||
return null;
|
||||
},
|
||||
|
||||
/**
|
||||
* Gets PDF binary data length. It is defined after the headersReady promise
|
||||
* is resolved.
|
||||
|
@ -1997,14 +1997,14 @@ var WorkerTransport = (function WorkerTransportClosure() {
|
||||
|
||||
getMetadata: function WorkerTransport_getMetadata() {
|
||||
return this.messageHandler.sendWithPromise('GetMetadata', null).
|
||||
then(function transportMetadata(results) {
|
||||
then((results) => {
|
||||
return {
|
||||
info: results[0],
|
||||
metadata: (results[1] ? new Metadata(results[1]) : null),
|
||||
contentDispositionFileName: (this._fullReader ?
|
||||
this._fullReader.fileName : null),
|
||||
contentDispositionFilename: (this._fullReader ?
|
||||
this._fullReader.filename : null),
|
||||
};
|
||||
}.bind(this));
|
||||
});
|
||||
},
|
||||
|
||||
getStats: function WorkerTransport_getStats() {
|
||||
|
@ -67,9 +67,9 @@ class PDFFetchStream {
|
||||
class PDFFetchStreamReader {
|
||||
constructor(stream) {
|
||||
this._stream = stream;
|
||||
this._fileName = null;
|
||||
this._reader = null;
|
||||
this._loaded = 0;
|
||||
this._filename = null;
|
||||
let source = stream.source;
|
||||
this._withCredentials = source.withCredentials;
|
||||
this._contentLength = source.length;
|
||||
@ -104,7 +104,6 @@ class PDFFetchStreamReader {
|
||||
const getResponseHeader = (name) => {
|
||||
return response.headers.get(name);
|
||||
};
|
||||
|
||||
let { allowRangeRequests, suggestedLength, } =
|
||||
validateRangeRequestCapabilities({
|
||||
getResponseHeader,
|
||||
@ -115,7 +114,8 @@ class PDFFetchStreamReader {
|
||||
|
||||
this._contentLength = suggestedLength;
|
||||
this._isRangeSupported = allowRangeRequests;
|
||||
this._fileName = extractFilenameFromHeader(getResponseHeader);
|
||||
|
||||
this._filename = extractFilenameFromHeader(getResponseHeader);
|
||||
|
||||
// We need to stop reading when range is supported and streaming is
|
||||
// disabled.
|
||||
@ -131,12 +131,12 @@ class PDFFetchStreamReader {
|
||||
return this._headersCapability.promise;
|
||||
}
|
||||
|
||||
get contentLength() {
|
||||
return this._contentLength;
|
||||
get filename() {
|
||||
return this._filename;
|
||||
}
|
||||
|
||||
get fileName() {
|
||||
return this._fileName;
|
||||
get contentLength() {
|
||||
return this._contentLength;
|
||||
}
|
||||
|
||||
get isRangeSupported() {
|
||||
|
@ -341,7 +341,7 @@ function PDFNetworkStreamFullRequestReader(manager, source) {
|
||||
this._requests = [];
|
||||
this._done = false;
|
||||
this._storedError = undefined;
|
||||
this._fileName = null;
|
||||
this._filename = null;
|
||||
|
||||
this.onProgress = null;
|
||||
}
|
||||
@ -371,6 +371,8 @@ PDFNetworkStreamFullRequestReader.prototype = {
|
||||
this._isRangeSupported = true;
|
||||
}
|
||||
|
||||
this._filename = extractFilenameFromHeader(getResponseHeader);
|
||||
|
||||
var networkManager = this._manager;
|
||||
if (networkManager.isStreamingRequest(fullRequestXhrId)) {
|
||||
// We can continue fetching when progressive loading is enabled,
|
||||
@ -385,11 +387,6 @@ PDFNetworkStreamFullRequestReader.prototype = {
|
||||
networkManager.abortRequest(fullRequestXhrId);
|
||||
}
|
||||
|
||||
// Content-Disposition: attachment; filename=Naïve file.txt
|
||||
if (networkManager.isPendingRequest(fullRequestXhrId)) {
|
||||
this._fileName = extractFilenameFromHeader(getResponseHeader);
|
||||
}
|
||||
|
||||
this._headersReceivedCapability.resolve();
|
||||
},
|
||||
|
||||
@ -438,8 +435,8 @@ PDFNetworkStreamFullRequestReader.prototype = {
|
||||
}
|
||||
},
|
||||
|
||||
get fileName() {
|
||||
return this._fileName;
|
||||
get filename() {
|
||||
return this._filename;
|
||||
},
|
||||
|
||||
get isRangeSupported() {
|
||||
|
@ -53,6 +53,18 @@ function validateRangeRequestCapabilities({ getResponseHeader, isHttp,
|
||||
return returnValues;
|
||||
}
|
||||
|
||||
function extractFilenameFromHeader(getResponseHeader) {
|
||||
const contentDisposition = getResponseHeader('Content-Disposition');
|
||||
if (contentDisposition) {
|
||||
let parts =
|
||||
/.+;\s*filename=(?:'|")(.+\.pdf)(?:'|")/gi.exec(contentDisposition);
|
||||
if (parts !== null && parts.length > 1) {
|
||||
return getFilenameFromUrl(parts[1]);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function createResponseStatusError(status, url) {
|
||||
if (status === 404 || status === 0 && /^file:/.test(url)) {
|
||||
return new MissingPDFException('Missing PDF "' + url + '".');
|
||||
@ -66,23 +78,9 @@ function validateResponseStatus(status) {
|
||||
return status === 200 || status === 206;
|
||||
}
|
||||
|
||||
function extractFilenameFromHeader(getResponseHeader) {
|
||||
const contentDisposition = getResponseHeader('Content-Disposition');
|
||||
|
||||
if (contentDisposition) {
|
||||
let parts =
|
||||
/.+;\s*filename=(?:"|')(.+\.pdf)(?:"|')/gi.exec(contentDisposition);
|
||||
if (parts !== null && parts.length > 1) {
|
||||
return getFilenameFromUrl(parts[1]);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export {
|
||||
createResponseStatusError,
|
||||
extractFilenameFromHeader,
|
||||
validateRangeRequestCapabilities,
|
||||
validateResponseStatus,
|
||||
extractFilenameFromHeader,
|
||||
};
|
||||
|
@ -76,11 +76,11 @@ class BaseFullReader {
|
||||
this._done = false;
|
||||
this._errored = false;
|
||||
this._reason = null;
|
||||
this._fileName = null;
|
||||
this.onProgress = null;
|
||||
let source = stream.source;
|
||||
this._contentLength = source.length; // optional
|
||||
this._loaded = 0;
|
||||
this._filename = null;
|
||||
|
||||
this._disableRange = source.disableRange || false;
|
||||
this._rangeChunkSize = source.rangeChunkSize;
|
||||
@ -100,6 +100,10 @@ class BaseFullReader {
|
||||
return this._headersCapability.promise;
|
||||
}
|
||||
|
||||
get filename() {
|
||||
return this._filename;
|
||||
}
|
||||
|
||||
get contentLength() {
|
||||
return this._contentLength;
|
||||
}
|
||||
@ -112,10 +116,6 @@ class BaseFullReader {
|
||||
return this._isStreamingSupported;
|
||||
}
|
||||
|
||||
get fileName() {
|
||||
return this._fileName;
|
||||
}
|
||||
|
||||
read() {
|
||||
return this._readCapability.promise.then(() => {
|
||||
if (this._done) {
|
||||
@ -296,14 +296,13 @@ class PDFNodeStreamFullReader extends BaseFullReader {
|
||||
// here: https://nodejs.org/api/http.html#http_message_headers.
|
||||
return this._readableStream.headers[name.toLowerCase()];
|
||||
};
|
||||
|
||||
let { allowRangeRequests, suggestedLength, } =
|
||||
validateRangeRequestCapabilities({
|
||||
getResponseHeader,
|
||||
isHttp: stream.isHttp,
|
||||
rangeChunkSize: this._rangeChunkSize,
|
||||
disableRange: this._disableRange,
|
||||
});
|
||||
validateRangeRequestCapabilities({
|
||||
getResponseHeader,
|
||||
isHttp: stream.isHttp,
|
||||
rangeChunkSize: this._rangeChunkSize,
|
||||
disableRange: this._disableRange,
|
||||
});
|
||||
|
||||
if (allowRangeRequests) {
|
||||
this._isRangeSupported = true;
|
||||
@ -311,8 +310,7 @@ class PDFNodeStreamFullReader extends BaseFullReader {
|
||||
// Setting right content length.
|
||||
this._contentLength = suggestedLength;
|
||||
|
||||
// Setting the file name from the response header
|
||||
this._fileName = extractFilenameFromHeader(getResponseHeader);
|
||||
this._filename = extractFilenameFromHeader(getResponseHeader);
|
||||
};
|
||||
|
||||
this._request = null;
|
||||
|
@ -119,6 +119,7 @@ var PDFDataTransportStream = (function PDFDataTransportStreamClosure() {
|
||||
function PDFDataTransportStreamReader(stream, queuedChunks) {
|
||||
this._stream = stream;
|
||||
this._done = false;
|
||||
this._filename = null;
|
||||
this._queuedChunks = queuedChunks || [];
|
||||
this._requests = [];
|
||||
this._headersReady = Promise.resolve();
|
||||
@ -143,6 +144,10 @@ var PDFDataTransportStream = (function PDFDataTransportStreamClosure() {
|
||||
return this._headersReady;
|
||||
},
|
||||
|
||||
get filename() {
|
||||
return this._filename;
|
||||
},
|
||||
|
||||
get isRangeSupported() {
|
||||
return this._stream._isRangeSupported;
|
||||
},
|
||||
|
@ -794,6 +794,7 @@ describe('api', function() {
|
||||
expect(metadata.info['Title']).toEqual('Basic API Test');
|
||||
expect(metadata.info['PDFFormatVersion']).toEqual('1.7');
|
||||
expect(metadata.metadata.get('dc:title')).toEqual('Basic API Test');
|
||||
expect(metadata.contentDispositionFilename).toEqual(null);
|
||||
done();
|
||||
}).catch(function (reason) {
|
||||
done.fail(reason);
|
||||
|
@ -134,6 +134,84 @@ describe('network_utils', function() {
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractFilenameFromHeader', function() {
|
||||
it('returns null when content disposition header is blank', function() {
|
||||
expect(extractFilenameFromHeader((headerName) => {
|
||||
if (headerName === 'Content-Disposition') {
|
||||
return null;
|
||||
}
|
||||
})).toBeNull();
|
||||
|
||||
expect(extractFilenameFromHeader((headerName) => {
|
||||
if (headerName === 'Content-Disposition') {
|
||||
return undefined;
|
||||
}
|
||||
})).toBeNull();
|
||||
|
||||
expect(extractFilenameFromHeader((headerName) => {
|
||||
if (headerName === 'Content-Disposition') {
|
||||
return '';
|
||||
}
|
||||
})).toBeNull();
|
||||
});
|
||||
|
||||
it('gets the filename from the response header', function() {
|
||||
expect(extractFilenameFromHeader((headerName) => {
|
||||
if (headerName === 'Content-Disposition') {
|
||||
return 'inline';
|
||||
}
|
||||
})).toBeNull();
|
||||
|
||||
expect(extractFilenameFromHeader((headerName) => {
|
||||
if (headerName === 'Content-Disposition') {
|
||||
return 'attachment';
|
||||
}
|
||||
})).toBeNull();
|
||||
|
||||
expect(extractFilenameFromHeader((headerName) => {
|
||||
if (headerName === 'Content-Disposition') {
|
||||
return 'attachment; filename="filename.pdf"';
|
||||
}
|
||||
})).toEqual('filename.pdf');
|
||||
});
|
||||
|
||||
it('returns null when content disposition is form-data', function() {
|
||||
expect(extractFilenameFromHeader((headerName) => {
|
||||
if (headerName === 'Content-Disposition') {
|
||||
return 'form-data';
|
||||
}
|
||||
})).toBeNull();
|
||||
|
||||
expect(extractFilenameFromHeader((headerName) => {
|
||||
if (headerName === 'Content-Disposition') {
|
||||
return 'form-data; name="filename.pdf"';
|
||||
}
|
||||
})).toBeNull();
|
||||
|
||||
expect(extractFilenameFromHeader((headerName) => {
|
||||
if (headerName === 'Content-Disposition') {
|
||||
return 'form-data; name="filename.pdf"; filename="file.pdf"';
|
||||
}
|
||||
})).toEqual('file.pdf');
|
||||
});
|
||||
|
||||
it('only extracts filename with pdf extension', function () {
|
||||
expect(extractFilenameFromHeader((headerName) => {
|
||||
if (headerName === 'Content-Disposition') {
|
||||
return 'attachment; filename="filename.png"';
|
||||
}
|
||||
})).toBeNull();
|
||||
});
|
||||
|
||||
it('extension validation is case insensitive', function () {
|
||||
expect(extractFilenameFromHeader((headerName) => {
|
||||
if (headerName === 'Content-Disposition') {
|
||||
return 'form-data; name="fieldName"; filename="file.PdF"';
|
||||
}
|
||||
})).toEqual('file.PdF');
|
||||
});
|
||||
});
|
||||
|
||||
describe('createResponseStatusError', function() {
|
||||
it('handles missing PDF file responses', function() {
|
||||
expect(createResponseStatusError(404, 'https://foo.com/bar.pdf')).toEqual(
|
||||
@ -175,62 +253,4 @@ describe('network_utils', function() {
|
||||
expect(validateResponseStatus(undefined)).toEqual(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractFilenameFromHeader', function () {
|
||||
it('returns null when content disposition header is blank', function() {
|
||||
expect(extractFilenameFromHeader(function() {
|
||||
return null;
|
||||
})).toBeNull();
|
||||
|
||||
expect(extractFilenameFromHeader(function() {
|
||||
return undefined;
|
||||
})).toBeNull();
|
||||
|
||||
expect(extractFilenameFromHeader(function() {
|
||||
return '';
|
||||
})).toBeNull();
|
||||
});
|
||||
|
||||
it('gets the filename from the response header', function () {
|
||||
expect(extractFilenameFromHeader(function() {
|
||||
return 'Content-Disposition: inline';
|
||||
})).toBeNull();
|
||||
|
||||
expect(extractFilenameFromHeader(function() {
|
||||
return 'Content-Disposition: attachment';
|
||||
})).toBeNull();
|
||||
|
||||
expect(extractFilenameFromHeader(function() {
|
||||
return 'Content-Disposition: attachment; filename="filename.pdf"';
|
||||
})).toBe('filename.pdf');
|
||||
});
|
||||
|
||||
it('returns null when content disposition is form-data', function () {
|
||||
expect(extractFilenameFromHeader(function() {
|
||||
return 'Content-Disposition: form-data';
|
||||
})).toBeNull();
|
||||
|
||||
expect(extractFilenameFromHeader(function() {
|
||||
return 'Content-Disposition: form-data; name="filename"';
|
||||
})).toBeNull();
|
||||
|
||||
expect(extractFilenameFromHeader(function () {
|
||||
return 'Content-Disposition: form-data; ' +
|
||||
'name="filename"; filename="file.pdf"';
|
||||
})).toBe('file.pdf');
|
||||
});
|
||||
|
||||
it('Only extracts file names with pdf extension', function () {
|
||||
expect(extractFilenameFromHeader(function() {
|
||||
return 'Content-Disposition: attachment; filename="filename.png"';
|
||||
})).toBeNull();
|
||||
});
|
||||
|
||||
it('Extension validation is case insensitive', function () {
|
||||
expect(extractFilenameFromHeader(function() {
|
||||
return 'Content-Disposition: form-data; ' +
|
||||
'name="fieldName"; filename="file.PdF"';
|
||||
})).toBe('file.PdF');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
19
web/app.js
19
web/app.js
@ -154,7 +154,7 @@ let PDFViewerApplication = {
|
||||
baseUrl: '',
|
||||
externalServices: DefaultExternalServices,
|
||||
_boundEvents: {},
|
||||
contentDispositionFileName: null,
|
||||
contentDispositionFilename: null,
|
||||
|
||||
// Called once when the document is loaded.
|
||||
initialize(appConfig) {
|
||||
@ -679,7 +679,7 @@ let PDFViewerApplication = {
|
||||
this.downloadComplete = false;
|
||||
this.url = '';
|
||||
this.baseUrl = '';
|
||||
this.contentDispositionFileName = null;
|
||||
this.contentDispositionFilename = null;
|
||||
|
||||
this.pdfSidebar.reset();
|
||||
this.pdfOutlineViewer.reset();
|
||||
@ -803,7 +803,7 @@ let PDFViewerApplication = {
|
||||
let url = this.baseUrl;
|
||||
// Use this.url instead of this.baseUrl to perform filename detection based
|
||||
// on the reference fragment as ultimate fallback if needed.
|
||||
let filename = this.contentDispositionFileName ||
|
||||
let filename = this.contentDispositionFilename ||
|
||||
getPDFFileNameFromURL(this.url);
|
||||
let downloadManager = this.downloadManager;
|
||||
downloadManager.onerror = (err) => {
|
||||
@ -1157,10 +1157,10 @@ let PDFViewerApplication = {
|
||||
});
|
||||
|
||||
pdfDocument.getMetadata().then(
|
||||
({ info, metadata, contentDispositionFileName, }) => {
|
||||
({ info, metadata, contentDispositionFilename, }) => {
|
||||
this.documentInfo = info;
|
||||
this.metadata = metadata;
|
||||
this.contentDispositionFileName = contentDispositionFileName;
|
||||
this.contentDispositionFilename = contentDispositionFilename;
|
||||
|
||||
// Provides some basic debug information
|
||||
console.log('PDF ' + pdfDocument.fingerprint + ' [' +
|
||||
@ -1183,11 +1183,10 @@ let PDFViewerApplication = {
|
||||
}
|
||||
|
||||
if (pdfTitle) {
|
||||
this.setTitle(pdfTitle + ' - ' + document.title);
|
||||
}
|
||||
|
||||
if (!pdfTitle && contentDispositionFileName) {
|
||||
this.setTitle(contentDispositionFileName);
|
||||
this.setTitle(
|
||||
`${pdfTitle} - ${contentDispositionFilename || document.title}`);
|
||||
} else if (contentDispositionFilename) {
|
||||
this.setTitle(contentDispositionFilename);
|
||||
}
|
||||
|
||||
if (info.IsAcroFormPresent) {
|
||||
|
@ -71,24 +71,26 @@ class PDFDocumentProperties {
|
||||
return;
|
||||
}
|
||||
// Get the document properties.
|
||||
this.pdfDocument.getMetadata().then(({ info, metadata, }) => {
|
||||
this.pdfDocument.getMetadata().then(
|
||||
({ info, metadata, contentDispositionFilename, }) => {
|
||||
return Promise.all([
|
||||
info,
|
||||
metadata,
|
||||
contentDispositionFilename || getPDFFileNameFromURL(this.url),
|
||||
this._parseFileSize(this.maybeFileSize),
|
||||
this._parseDate(info.CreationDate),
|
||||
this._parseDate(info.ModDate)
|
||||
]);
|
||||
}).then(([info, metadata, fileSize, creationDate, modificationDate]) => {
|
||||
}).then(([info, metadata, fileName, fileSize, creationDate, modDate]) => {
|
||||
freezeFieldData({
|
||||
'fileName': getPDFFileNameFromURL(this.url),
|
||||
'fileName': fileName,
|
||||
'fileSize': fileSize,
|
||||
'title': info.Title,
|
||||
'author': info.Author,
|
||||
'subject': info.Subject,
|
||||
'keywords': info.Keywords,
|
||||
'creationDate': creationDate,
|
||||
'modificationDate': modificationDate,
|
||||
'modificationDate': modDate,
|
||||
'creator': info.Creator,
|
||||
'producer': info.Producer,
|
||||
'version': info.PDFFormatVersion,
|
||||
|
Loading…
x
Reference in New Issue
Block a user