Re-uses logic from9f5fcae11c/extension/content-disposition.js
which is already covered by tests:6f3bbb8bbf
212 lines
7.6 KiB
212 lines
7.6 KiB
/* Copyright 2017 Mozilla Foundation
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
// This getFilenameFromContentDispositionHeader function is adapted from
// with the following changes:
// - Modified to conform to PDF.js's coding style.
// - Support UTF-8 decoding when TextDecoder is unsupported.
// - Replace Array.from with Array + loop for compat with old browsers.
// - Replace "startsWith" with other string method for compat with old browsers.
// - Move return to the end of the function to prevent Babel from dropping the
// function declarations.
* Extract file name from the Content-Disposition HTTP response header.
* @param {string} contentDisposition
* @return {string} Filename, if found in the Content-Disposition header.
function getFilenameFromContentDispositionHeader(contentDisposition) {
let needsEncodingFixup = true;
// filename*=ext-value ("ext-value" from RFC 5987, referenced by RFC 6266).
let tmp = /(?:^|;)\s*filename\*\s*=\s*([^;\s]+)/i.exec(contentDisposition);
if (tmp) {
tmp = tmp[1];
let filename = rfc2616unquote(tmp);
filename = unescape(filename);
filename = rfc5987decode(filename);
filename = rfc2047decode(filename);
return fixupEncoding(filename);
// Continuations (RFC 2231 section 3, referenced by RFC 5987 section 3.1).
// filename*n*=part
// filename*n=part
tmp = rfc2231getparam(contentDisposition);
if (tmp) {
// RFC 2047, section
let filename = rfc2047decode(tmp);
return fixupEncoding(filename);
// filename=value (RFC 5987, section 4.1).
tmp = /(?:^|;)\s*filename\s*=\s*([^;\s]+)/.exec(contentDisposition);
if (tmp) {
tmp = tmp[1];
let filename = rfc2616unquote(tmp);
filename = rfc2047decode(filename);
return fixupEncoding(filename);
// After this line there are only function declarations. We cannot put
// "return" here for readability because babel would then drop the function
// declarations...
function textdecode(encoding, value) {
if (encoding) {
if (!/^[^\x00-\xFF]+$/.test(value)) {
return value;
try {
let decoder = new TextDecoder(encoding, { fatal: true, });
let bytes = new Array(value.length);
for (let i = 0; i < value.length; ++i) {
bytes[i] = value.charCodeAt(0);
value = decoder.decode(new Uint8Array(bytes));
needsEncodingFixup = false;
} catch (e) {
// TextDecoder constructor threw - unrecognized encoding.
// Or TextDecoder API is not available.
if (/^utf-?8$/i.test(encoding)) {
// UTF-8 is commonly used, try to support it in another way:
value = decodeURIComponent(escape(value));
needsEncodingFixup = false;
return value;
function fixupEncoding(value) {
if (needsEncodingFixup && /[\x80-\xff]/.test(value)) {
// Maybe multi-byte UTF-8.
return textdecode('utf-8', value);
return value;
function rfc2231getparam(contentDisposition) {
let matches = [], match;
// Iterate over all filename*n= and filename*n*= with n being an integer
// of at least zero. Any non-zero number must not start with '0'.
let iter = /(?:^|;)\s*filename\*((?!0\d)\d+)(\*?)\s*=\s*([^;\s]+)/ig;
while ((match = iter.exec(contentDisposition)) !== null) {
let [, n, quot, part] = match;
n = parseInt(n, 10);
if (n in matches) {
// Ignore anything after the invalid second filename*0.
if (n === 0) {
matches[n] = [quot, part];
let parts = [];
for (let n = 0; n < matches.length; ++n) {
if (!(n in matches)) {
// Numbers must be consecutive. Truncate when there is a hole.
let [quot, part] = matches[n];
part = rfc2616unquote(part);
if (quot) {
part = unescape(part);
if (n === 0) {
part = rfc5987decode(part);
return parts.join('');
function rfc2616unquote(value) {
if (value.charAt(0) === '"') {
let parts = value.slice(1).split('\\"');
// Find the first unescaped " and terminate there.
for (let i = 0; i < parts.length; ++i) {
let quotindex = parts[i].indexOf('"');
if (quotindex !== -1) {
parts[i] = parts[i].slice(0, quotindex);
parts.length = i + 1; // Truncates and stop the iteration.
parts[i] = parts[i].replace(/\\(.)/g, '$1');
value = parts.join('"');
return value;
function rfc5987decode(extvalue) {
// Decodes "ext-value" from RFC 5987.
let encodingend = extvalue.indexOf('\'');
if (encodingend === -1) {
// Some servers send "filename*=" without encoding'language' prefix,
// e.g. in
// Let's accept the value like Firefox (57) (Chrome 62 rejects it).
return extvalue;
let encoding = extvalue.slice(0, encodingend);
let langvalue = extvalue.slice(encodingend + 1);
// Ignore language (RFC 5987 section 3.2.1, and RFC 6266 section 4.1 ).
let value = langvalue.replace(/^[^']*'/, '');
return textdecode(encoding, value);
function rfc2047decode(value) {
// RFC 2047-decode the result. Firefox tried to drop support for it, but
// backed out because some servers use it -
// Firefox's condition for decoding is here:
// We are more strict and only recognize RFC 2047-encoding if the value
// starts with "=?", since then it is likely that the full value is
// RFC 2047-encoded.
// Firefox also decodes words even where RFC 2047 section 5 states:
// "An 'encoded-word' MUST NOT appear within a 'quoted-string'."
if (value.slice(0, 2) !== '=?' || /[\x00-\x19\x80-\xff]/.test(value)) {
return value;
// RFC 2047, section 2.4
// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
// charset = token (but let's restrict to characters that denote a
// possibly valid encoding).
// encoding = q or b
// encoded-text = any printable ASCII character other than ? or space.
// ... but Firefox permits ? and space.
return value.replace(/=\?([\w\-]*)\?([QqBb])\?((?:[^?]|\?(?!=))*)\?=/g,
function(_, charset, encoding, text) {
if (encoding === 'q' || encoding === 'Q') {
// RFC 2047 section 4.2.
text = text.replace(/_/g, ' ');
text = text.replace(/=([0-9a-fA-F]{2})/g, function(_, hex) {
return String.fromCharCode(parseInt(hex, 16));
return textdecode(charset, text);
} // else encoding is b or B - base64 (RFC 2047 section 4.1)
try {
return atob(text);
} catch (e) {
return text;
return '';
export {