Streaming Large Files in Node.js Efficiently
#nodejs
#streams
#performance
#http
Building systems that move multi‑GB files without melting servers is a lot easier when you lean on Node.js streams. This guide shows how to serve and receive large files efficiently, handle backpressure, implement range requests, and avoid common pitfalls.
What you’ll learn:
- Why streaming beats reading the whole file
- Backpressure and highWaterMark basics
- Production‑ready file download server (with Range support)
- Streaming uploads safely
- Useful transforms (gzip, hashing, throttling)
- Tuning and operational tips
Why stream?
- Constant memory: Streams process data in chunks, so a 20 GB file does not allocate 20 GB of RAM.
- Backpressure-aware: Streams slow down when the destination can’t keep up.
- Early data: Clients start receiving bytes immediately, improving perceived latency.
- Resumable: Range requests allow clients to resume downloads instead of restarting.
Streams and backpressure in a sentence
Readable streams push chunks; writable streams pull when ready. When writable.write(chunk) returns false, pause the readable and wait for writable’s drain before resuming. stream.pipeline() wires this up for you and guarantees teardown on error.
Serving large files (the baseline)
The simplest, safe pattern is fs.createReadStream piped to the HTTP response using stream.pipeline. This automatically handles backpressure and ensures resources are cleaned up on error.
// server-basic.js
const http = require('http');
const fs = require('fs');
const path = require('path');
const { pipeline } = require('stream');
const BASE_DIR = path.resolve(__dirname, 'public'); // serve files from ./public
function safeResolve(base, requestedPath) {
const resolved = path.resolve(base, '.' + requestedPath);
if (!resolved.startsWith(base)) throw new Error('Path traversal');
return resolved;
}
const server = http.createServer(async (req, res) => {
try {
const url = new URL(req.url, `http://${req.headers.host}`);
const filePath = safeResolve(BASE_DIR, url.pathname === '/' ? '/large.bin' : url.pathname);
const stat = await fs.promises.stat(filePath);
res.statusCode = 200;
res.setHeader('Content-Type', 'application/octet-stream');
res.setHeader('Content-Length', stat.size);
res.setHeader('Accept-Ranges', 'bytes'); // advertise range support
res.setHeader('Cache-Control', 'public, max-age=31536000, immutable');
const fileStream = fs.createReadStream(filePath);
pipeline(fileStream, res, (err) => {
if (err) {
// If connection aborts, pipeline receives an error. Just log.
console.error('Stream failed:', err.message);
}
});
} catch (e) {
res.statusCode = e.code === 'ENOENT' ? 404 : 500;
res.end('Error');
}
});
server.listen(8080, () => console.log('Listening on http://localhost:8080'));
Notes:
pipelinehandles backpressure and ensuresfileStreamandresare destroyed on error.- Always validate and confine requested paths to prevent directory traversal.
- For static assets, add
Content-Dispositionor strong cache headers as needed.
Adding HTTP Range requests (resume and partial downloads)
Support for Range: bytes=start-end lets clients resume downloads and seek within media. Implementing this well reduces wasted bandwidth.
// server-range.js
const http = require('http');
const fs = require('fs');
const path = require('path');
const { pipeline } = require('stream');
const BASE_DIR = path.resolve(__dirname, 'public');
function safeResolve(base, requestedPath) {
const resolved = path.resolve(base, '.' + requestedPath);
if (!resolved.startsWith(base)) throw new Error('Path traversal');
return resolved;
}
function parseRange(header, size) {
// Returns { start, end } or null if invalid
if (!header || !header.startsWith('bytes=')) return null;
const rangeSpec = header.slice(6).trim();
// Support common forms: "start-end", "start-", "-suffixLength"
const [startStr, endStr] = rangeSpec.split('-', 2);
let start, end;
if (startStr === '') {
// suffix: "-N" -> last N bytes
const suffixLen = Number(endStr);
if (!Number.isFinite(suffixLen) || suffixLen <= 0) return null;
start = Math.max(size - suffixLen, 0);
end = size - 1;
} else {
start = Number(startStr);
if (!Number.isFinite(start) || start < 0) return null;
if (endStr && endStr !== '') {
end = Number(endStr);
if (!Number.isFinite(end) || end < start) return null;
end = Math.min(end, size - 1);
} else {
end = size - 1;
}
}
if (start >= size) return null;
return { start, end };
}
const server = http.createServer(async (req, res) => {
try {
const url = new URL(req.url, `http://${req.headers.host}`);
const filePath = safeResolve(BASE_DIR, url.pathname === '/' ? '/large.bin' : url.pathname);
const stat = await fs.promises.stat(filePath);
const total = stat.size;
res.setHeader('Accept-Ranges', 'bytes');
res.setHeader('Cache-Control', 'public, max-age=31536000, immutable');
res.setHeader('Content-Type', 'application/octet-stream');
const rangeHeader = req.headers.range;
const range = parseRange(rangeHeader, total);
if (range) {
const { start, end } = range;
const chunkSize = end - start + 1;
res.statusCode = 206;
res.setHeader('Content-Range', `bytes ${start}-${end}/${total}`);
res.setHeader('Content-Length', chunkSize);
const stream = fs.createReadStream(filePath, { start, end });
pipeline(stream, res, (err) => err && console.error('206 stream error:', err.message));
} else if (rangeHeader) {
// Malformed or unsatisfiable
res.statusCode = 416;
res.setHeader('Content-Range', `bytes */${total}`);
res.end();
} else {
res.statusCode = 200;
res.setHeader('Content-Length', total);
const stream = fs.createReadStream(filePath);
pipeline(stream, res, (err) => err && console.error('200 stream error:', err.message));
}
} catch (e) {
res.statusCode = e.code === 'ENOENT' ? 404 : 500;
res.end('Error');
}
});
server.listen(8080, () => console.log('Range server on http://localhost:8080'));
Test it:
- curl -I http://localhost:8080/large.bin
- curl -H “Range: bytes=0-999” http://localhost:8080/large.bin —output part1.bin
- curl -H “Range: bytes=-1024” http://localhost:8080/large.bin —output tail.bin
Production tips:
- Always set
Accept-Ranges: bytes. - Return
206withContent-RangeandContent-Lengthfor partial responses;416for unsatisfiable ranges. - Many CDNs and browsers rely on correct range support for media scrubbing and resuming.
Streaming uploads without buffering entire bodies
For large uploads, parse multipart form data as a stream and write directly to disk or cloud storage.
Example with Busboy (streams file parts):
// upload-stream.js
const http = require('http');
const fs = require('fs');
const path = require('path');
const Busboy = require('busboy');
const { pipeline } = require('stream');
const UPLOAD_DIR = path.resolve(__dirname, 'uploads');
const server = http.createServer((req, res) => {
if (req.method !== 'POST') {
res.writeHead(405).end('Use POST');
return;
}
const bb = Busboy({
headers: req.headers,
limits: {
fileSize: 1024 * 1024 * 1024, // 1 GB limit
files: 1
}
});
let fileSaved = false;
bb.on('file', (name, file, info) => {
const { filename } = info;
const destPath = path.join(UPLOAD_DIR, path.basename(filename));
const out = fs.createWriteStream(destPath, { flags: 'wx' }); // fail if exists
pipeline(file, out, (err) => {
if (err) {
console.error('Upload failed:', err.message);
// Ensure request is aborted so remaining data isn't read
req.unpipe(bb);
res.writeHead(500).end('Upload failed');
} else {
fileSaved = true;
res.writeHead(201).end('OK');
}
});
});
bb.on('error', (err) => {
console.error('Busboy error:', err.message);
res.writeHead(400).end('Bad upload');
});
bb.on('partsLimit', () => {
res.writeHead(413).end('Too many parts');
});
bb.on('filesLimit', () => {
res.writeHead(413).end('Too many files');
});
bb.on('fieldsLimit', () => {
res.writeHead(413).end('Too many fields');
});
bb.on('finish', () => {
if (!fileSaved && !res.writableEnded) {
res.writeHead(400).end('No file received');
}
});
req.pipe(bb);
});
server.listen(8081, () => console.log('Upload server on http://localhost:8081'));
Notes:
pipelineensures the write stream closes on error.- Use
flags: 'wx'to prevent overwriting. - Apply limits (size, file count) to mitigate abuse.
- For cloud storage, pipe directly to the SDK stream (e.g., AWS SDK v3
Uploadsupports streams).
Useful transforms: compression, hashing, throttling
- Compression: For text assets, you can compress on the fly. Avoid compressing formats already compressed (zip, mp4, jpg).
const zlib = require('zlib');
const { pipeline } = require('stream');
pipeline(fs.createReadStream(src), zlib.createBrotliCompress(), res, cb);
- Hashing on the fly: Compute checksums without a second pass.
const crypto = require('crypto');
const { PassThrough, pipeline } = require('stream');
const tee = new PassThrough(); // duplicate stream
const hash = crypto.createHash('sha256');
pipeline(
fs.createReadStream(file),
tee,
(err) => err && console.error(err)
);
tee.pipe(hash).on('finish', () => {
console.log('sha256:', hash.read().toString('hex')); // or hash.digest('hex') if using update/end
});
tee.pipe(res);
- Throttling: Limit throughput for fairness or testing.
const { Transform } = require('stream');
function throttle(bytesPerSec) {
let tokens = bytesPerSec;
setInterval(() => (tokens = bytesPerSec), 1000);
return new Transform({
transform(chunk, enc, cb) {
const allow = Math.min(tokens, chunk.length);
tokens -= allow;
this.push(chunk.slice(0, allow));
if (allow < chunk.length) {
// wait until tokens refill for the remainder
const remaining = chunk.slice(allow);
const wait = () => {
if (tokens <= 0) return setImmediate(wait);
const a = Math.min(tokens, remaining.length);
tokens -= a;
this.push(remaining.slice(0, a));
if (a < remaining.length) setImmediate(wait);
else cb();
};
setImmediate(wait);
} else cb();
}
});
}
Tuning for performance and stability
- Prefer stream.pipeline: It wires backpressure and cleanup correctly.
- Tune highWaterMark:
- Readable (fs.createReadStream): defaults to 64 KB. For very fast disks or networks, increasing to 256 KB–1 MB may help throughput at the cost of memory per connection.
- Writable (HTTP response) is controlled by Node; backpressure handles itself.
- Concurrency control: Limit concurrent downloads/uploads to protect disk and network. Use a semaphore or queue.
- File descriptor limits: Raise
ulimit -nin production to accommodate many concurrent open files. - Caching:
- Set
Cache-Control,ETag, andLast-Modifiedfor static assets. - For large media, immutable URLs + long max-age work well.
- Set
- Content-Types: Serve accurate
Content-Typeto help clients. Use a MIME library or a simple extension map. - Abort handling: Clients drop connections. With
pipeline, streams are destroyed automatically; still log and monitor aborted transfers. - Security:
- Sanitize and constrain paths.
- Don’t leak absolute paths or stack traces.
- For authenticated downloads, stream from a verified, pre-authorized path.
- Observability:
- Log bytes sent, duration, and whether range was used.
- Track error rates (EPIPE, ECONNRESET, 416).
- Monitor disk IO, open FDs, and heap usage.
Common pitfalls
- Reading the whole file with
fs.readFilethenres.end(buf): spikes memory and blocks garbage collection. - Manually looping
read()/write()without honoringwrite()backpressure: can overwhelm memory. - Missing range support for large media downloads: prevents resume and scrubbing.
- Not setting
Content-Length: some clients perform worse without it for non-chunked responses.
Quick checklist
- Use
fs.createReadStream+stream.pipeline. - Implement
Rangefor big downloads. - Constrain paths and set correct headers.
- Add upload limits and stream to destination.
- Monitor and tune
highWaterMark, concurrency, and file descriptor limits.
Further reading:
- Node.js stream docs: https://nodejs.org/api/stream.html
- fs streams: https://nodejs.org/api/fs.html#fs_fs_createreadstreampath-options
- HTTP spec for range requests (RFC 9110): https://www.rfc-editor.org/rfc/rfc9110#name-range-requests
With these patterns, you can move very large files through your Node.js services with constant memory, robust error handling, and great client experience.