pdf.js/test/downloadutils.js
Tim van der Meij 99430225b0
Drop obsolete logic from the downloadFile function in test/downloadutils.js
This code is old and predates the improvements we made to the test
manifest to only contain working URLs (either Web Archive or
GitHub/Bugzilla links), so the fallback logic to try the Web Archive is
no longer necessary. This greatly simplifies the function and also
makes sure that we fail directly in case a bad URL is added to the
manifest, instead of having it work "accidentally" because of this
logic, since we want the manifest to be correct at all times (and
otherwise fail loudly).
2021-05-22 14:45:42 +02:00

182 lines
4.8 KiB
JavaScript

/*
* Copyright 2014 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* eslint-disable no-var */
"use strict";
var fs = require("fs");
var crypto = require("crypto");
var http = require("http");
var https = require("https");
function rewriteWebArchiveUrl(url) {
// Web Archive URLs need to be transformed to add `if_` after the ID.
// Without this, an HTML page containing an iframe with the PDF file
// will be served instead (issue 8920).
var webArchiveRegex =
/(^https?:\/\/web\.archive\.org\/web\/)(\d+)(\/https?:\/\/.+)/g;
var urlParts = webArchiveRegex.exec(url);
if (urlParts) {
return urlParts[1] + (urlParts[2] + "if_") + urlParts[3];
}
return url;
}
function downloadFile(file, url, callback, redirects) {
url = rewriteWebArchiveUrl(url);
var protocol = /^https:\/\//.test(url) ? https : http;
protocol
.get(url, function (response) {
if (
response.statusCode === 301 ||
response.statusCode === 302 ||
response.statusCode === 307 ||
response.statusCode === 308
) {
if (redirects > 10) {
callback("Too many redirects");
}
var redirectTo = response.headers.location;
redirectTo = require("url").resolve(url, redirectTo);
downloadFile(file, redirectTo, callback, (redirects || 0) + 1);
return;
}
if (response.statusCode !== 200) {
callback("HTTP " + response.statusCode);
return;
}
var stream = fs.createWriteStream(file);
stream.on("error", function (err) {
callback(err);
});
response.pipe(stream);
stream.on("finish", function () {
stream.end();
callback();
});
})
.on("error", function (err) {
callback(err);
});
}
function downloadManifestFiles(manifest, callback) {
function downloadNext() {
if (i >= links.length) {
callback();
return;
}
var file = links[i].file;
var url = links[i].url;
console.log("Downloading " + url + " to " + file + "...");
downloadFile(file, url, function (err) {
if (err) {
console.error("Error during downloading of " + url + ": " + err);
fs.writeFileSync(file, ""); // making it empty file
fs.writeFileSync(file + ".error", err);
}
i++;
downloadNext();
});
}
var links = manifest
.filter(function (item) {
return item.link && !fs.existsSync(item.file);
})
.map(function (item) {
var file = item.file;
var linkfile = file + ".link";
var url = fs.readFileSync(linkfile).toString();
url = url.replace(/\s+$/, "");
return { file, url };
});
var i = 0;
downloadNext();
}
function calculateMD5(file, callback) {
var hash = crypto.createHash("md5");
var stream = fs.createReadStream(file);
stream.on("data", function (data) {
hash.update(data);
});
stream.on("error", function (err) {
callback(err);
});
stream.on("end", function () {
var result = hash.digest("hex");
callback(null, result);
});
}
function verifyManifestFiles(manifest, callback) {
function verifyNext() {
if (i >= manifest.length) {
callback(error);
return;
}
var item = manifest[i];
if (fs.existsSync(item.file + ".error")) {
console.error(
'WARNING: File was not downloaded. See "' + item.file + '.error" file.'
);
error = true;
i++;
verifyNext();
return;
}
calculateMD5(item.file, function (err, md5) {
if (err) {
console.log('WARNING: Unable to open file for reading "' + err + '".');
error = true;
} else if (!item.md5) {
console.error(
'WARNING: Missing md5 for file "' +
item.file +
'". ' +
'Hash for current file is "' +
md5 +
'"'
);
error = true;
} else if (md5 !== item.md5) {
console.error(
'WARNING: MD5 of file "' +
item.file +
'" does not match file. Expected "' +
item.md5 +
'" computed "' +
md5 +
'"'
);
error = true;
}
i++;
verifyNext();
});
}
var i = 0;
var error = false;
verifyNext();
}
exports.downloadManifestFiles = downloadManifestFiles;
exports.verifyManifestFiles = verifyManifestFiles;