Merge pull request #8979 from timvandermeij/downloads
Transform Web Archive URLs to avoid downloading an HTML page instead of the PDF file
This commit is contained in:
commit
f9ce904fb7
@ -22,7 +22,22 @@ var crypto = require('crypto');
|
||||
var http = require('http');
|
||||
var https = require('https');
|
||||
|
||||
function rewriteWebArchiveUrl(url) {
|
||||
// Web Archive URLs need to be transformed to add `if_` after the ID.
|
||||
// Without this, an HTML page containing an iframe with the PDF file
|
||||
// will be served instead (issue 8920).
|
||||
var webArchiveRegex =
|
||||
/(^https?:\/\/web\.archive\.org\/web\/)(\d+)(\/https?:\/\/.+)/g;
|
||||
var urlParts = webArchiveRegex.exec(url);
|
||||
if (urlParts) {
|
||||
return urlParts[1] + (urlParts[2] + 'if_') + urlParts[3];
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
function downloadFile(file, url, callback, redirects) {
|
||||
url = rewriteWebArchiveUrl(url);
|
||||
|
||||
var completed = false;
|
||||
var protocol = /^https:\/\//.test(url) ? https : http;
|
||||
protocol.get(url, function (response) {
|
||||
|
@ -1 +1 @@
|
||||
http://web.archive.org/web/20150212141833/http://geothermal.inel.gov/publications/future_of_geothermal_energy.pdf
|
||||
https://web.archive.org/web/20170930174755/https://www.pdf-archive.com/2017/09/30/future-of-geothermal-energy/future-of-geothermal-energy.pdf
|
||||
|
@ -1 +1 @@
|
||||
http://web.archive.org/save/_embed/http://210.243.166.143/prob1.pdf
|
||||
https://web.archive.org/web/20170930161657/http://210.243.166.143/prob1.pdf
|
||||
|
Loading…
x
Reference in New Issue
Block a user