Download large file with node.js avoiding high memory consumption
I`m trying to create a file downloader as a background service but when a large file is scheduled, it's first put in memory and then, at the end of the download the file is written to disk.
How can I make the file be wrote gradually to the disk preserving memory considering that I may have lots of files being downloaded at the same time?
Here's the code I`m using:
var sys = require("sys"), http = require("http"), url = require("url"), path = require("path"), fs = require("fs"), events = require("events"); var downloadfile = "http://nodejs.org/dist/node-v0.2.6.tar.gz"; var host = url.parse(downloadfile).hostname var filename = url.parse(downloadfile).pathname.split("/").pop() var theurl = http.createClient(80, host); var requestUrl = downloadfile; sys.puts("Downloading file: " + filename); sys.puts("Before download request"); var request = theurl.request('GET', requestUrl, {"host": host}); request.end(); var dlprogress = 0; setInterval(function () { sys.puts("Download progress: " + dlprogress + " bytes"); }, 1000); request.addListener('response', function (response) { response.setEncoding('binary') sys.puts("File size: " + response.headers['content-length'] + " bytes.") var body = ''; response.addListener('data', function (chunk) { dlprogress += chunk.length; body += chunk; }); response.addListener("end", function() { fs.writeFileSync(filename, body, 'binary'); sys.puts("After download finished"); }); });
Answers
I changed the callback to:
request.addListener('response', function (response) { var downloadfile = fs.createWriteStream(filename, {'flags': 'a'}); sys.puts("File size " + filename + ": " + response.headers['content-length'] + " bytes."); response.addListener('data', function (chunk) { dlprogress += chunk.length; downloadfile.write(chunk, encoding='binary'); }); response.addListener("end", function() { downloadfile.end(); sys.puts("Finished downloading " + filename); }); });
This worked perfectly.
does the request package work for your uses?
it lets you do things like this:
request(downloadurl).pipe(fs.createWriteStream(downloadtohere))
Take a look at http-request:
// shorthand syntax, buffered response http.get('http://localhost/get', function (err, res) { if (err) throw err; console.log(res.code, res.headers, res.buffer.toString()); }); // save the response to 'myfile.bin' with a progress callback http.get({ url: 'http://localhost/get', progress: function (current, total) { console.log('downloaded %d bytes from %d', current, total); } }, 'myfile.bin', function (err, res) { if (err) throw err; console.log(res.code, res.headers, res.file); });
When downloading large file please use fs.write and not writeFile as it will override the previous content.
function downloadfile(res) { var requestserver = http.request(options, function(r) { console.log('STATUS: ' + r.statusCode); console.log('HEADERS: ' + JSON.stringify(r.headers)); var fd = fs.openSync('sai.tar.gz', 'w'); r.on('data', function (chunk) { size += chunk.length; console.log(size+'bytes received'); sendstatus(res,size); fs.write(fd, chunk, 0, chunk.length, null, function(er, written) { }); }); r.on('end',function(){ console.log('\nended from server'); fs.closeSync(fd); sendendstatus(res); }); }); }
Instead of holding the content into memory in the "data" event listener you should write to the file in append mode.
Use streams like Carter Cole suggested. Here is a more complete example
var inspect = require('eyespect').inspector(); var request = require('request'); var filed = require('filed'); var temp = require('temp'); var downloadURL = 'http://upload.wikimedia.org/wikipedia/commons/e/ec/Hazard_Creek_Kayaker.JPG'; var downloadPath = temp.path({prefix: 'singlePageRaw', suffix: '.jpg'}); var downloadFile = filed(downloadPath); var r = request(downloadURL).pipe(downloadFile); r.on('data', function(data) { inspect('binary data received'); }); downloadFile.on('end', function () { inspect(downloadPath, 'file downloaded to path'); }); downloadFile.on('error', function (err) { inspect(err, 'error downloading file'); });
You may need to install modules which you can do via npm install filed request eyespect temp