Fix multibyte decoding in content_disposition.js

I made some mistakes when trying to make the content_disposition.js
compatible with non-modern browsers (IE/Edge).
Notably, text decoding was usually skipped because of the inverted
logical check at the top of `textdecode`.

I verified that this new version works as expected, as follows:

1. Visit 55c71eb44e/test/
   and get  test-content-disposition.js
   also get test-content-disposition.node.js if using Node.js,
     or get test-content-disposition.html if you use a browser.
2. Modify `test-content-disposition.node.js` (or the HTML file) and
   change `../extension/content-disposition.js` to `PDFJS-content_disposition.js`
3. Copy the `getFilenameFromContentDispositionHeader` function from
   `content_disposition.js` (i.e. the file without the trailing exports)
   and save it as `PDFJS-content_disposition.js`.
4. Run the tests (`node test-content-disposition.node.js` or by opening
   `test-content-disposition.html` in a browser).
5. Confirm that there are no failures: "Finished all tests (0 failures)"

The code has a best-efforts fallback for Microsoft Edge, which lacks the
TextDecoder API. The fallback only supports the common UTF-8 encoding.
To simulate this in a test, modify `PDFJS-content_disposition.js` and
deliberately throw an error before `new TextDecoder`. There will be two
failures because we don't want to include too much code to support text
decoding for non-UTF-8 encodings in Edge

```
test-content-disposition.js:265 Assertion failed: Input: attachment; filename*=ISO-8859-1''%c3%a4
Expected: "ä"
Actual  : "ä"
test-content-disposition.js:268 Assertion failed: Input: attachment; filename*=ISO-8859-1''%e2%82%ac
Expected: "€"
Actual  : "€"
```
This commit is contained in:
Rob Wu 2018-06-03 15:03:00 +02:00
parent 2921cc0eba
commit e992480baa

View File

@ -78,24 +78,27 @@ function getFilenameFromContentDispositionHeader(contentDisposition) {
}
function textdecode(encoding, value) {
if (encoding) {
if (!/^[^\x00-\xFF]+$/.test(value)) {
if (!/^[\x00-\xFF]+$/.test(value)) {
return value;
}
try {
let decoder = new TextDecoder(encoding, { fatal: true, });
let bytes = new Array(value.length);
for (let i = 0; i < value.length; ++i) {
bytes[i] = value.charCodeAt(0);
bytes[i] = value.charCodeAt(i);
}
value = decoder.decode(new Uint8Array(bytes));
needsEncodingFixup = false;
} catch (e) {
// TextDecoder constructor threw - unrecognized encoding.
// Or TextDecoder API is not available.
// Or TextDecoder API is not available (in IE / Edge).
if (/^utf-?8$/i.test(encoding)) {
// UTF-8 is commonly used, try to support it in another way:
value = decodeURIComponent(escape(value));
needsEncodingFixup = false;
try {
value = decodeURIComponent(escape(value));
needsEncodingFixup = false;
} catch (err) {
}
}
}
}