Merge pull request #8979 from timvandermeij/downloads
Transform Web Archive URLs to avoid downloading an HTML page instead of the PDF file
This commit is contained in:
commit
f9ce904fb7
@ -22,7 +22,22 @@ var crypto = require('crypto');
|
|||||||
var http = require('http');
|
var http = require('http');
|
||||||
var https = require('https');
|
var https = require('https');
|
||||||
|
|
||||||
|
function rewriteWebArchiveUrl(url) {
|
||||||
|
// Web Archive URLs need to be transformed to add `if_` after the ID.
|
||||||
|
// Without this, an HTML page containing an iframe with the PDF file
|
||||||
|
// will be served instead (issue 8920).
|
||||||
|
var webArchiveRegex =
|
||||||
|
/(^https?:\/\/web\.archive\.org\/web\/)(\d+)(\/https?:\/\/.+)/g;
|
||||||
|
var urlParts = webArchiveRegex.exec(url);
|
||||||
|
if (urlParts) {
|
||||||
|
return urlParts[1] + (urlParts[2] + 'if_') + urlParts[3];
|
||||||
|
}
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
function downloadFile(file, url, callback, redirects) {
|
function downloadFile(file, url, callback, redirects) {
|
||||||
|
url = rewriteWebArchiveUrl(url);
|
||||||
|
|
||||||
var completed = false;
|
var completed = false;
|
||||||
var protocol = /^https:\/\//.test(url) ? https : http;
|
var protocol = /^https:\/\//.test(url) ? https : http;
|
||||||
protocol.get(url, function (response) {
|
protocol.get(url, function (response) {
|
||||||
|
@ -1 +1 @@
|
|||||||
http://web.archive.org/web/20150212141833/http://geothermal.inel.gov/publications/future_of_geothermal_energy.pdf
|
https://web.archive.org/web/20170930174755/https://www.pdf-archive.com/2017/09/30/future-of-geothermal-energy/future-of-geothermal-energy.pdf
|
||||||
|
@ -1 +1 @@
|
|||||||
http://web.archive.org/save/_embed/http://210.243.166.143/prob1.pdf
|
https://web.archive.org/web/20170930161657/http://210.243.166.143/prob1.pdf
|
||||||
|
Loading…
x
Reference in New Issue
Block a user