Description
This error was reproduced using node.js v16.15.0.
Reproduction
Here is a minimal example to reproduce the above-mentioned memory leak:
const http = require('http');
const { createProxyServer } = require('http-proxy');
const proxy = createProxyServer({ target: 'http://127.0.0.1:5000' });
http
.createServer((req, res) => {
proxy.web(req, res);
})
.listen(7000)
.once('listening', () => {
console.log('server is listenning');
});
I used http-server to create an upstream server:
npx http-server . -p 5000
Memory leak appears when client sends a http request, but then closes connection prematurely after receiving HTTP headers without waiting for HTTP body.
I used cURL to simulate this behavior:
curl http://localhost:7000/15142e1388c685f57c58e0babbede1f1.jpg
Memory leak could be reproduces with cURL when requesting any binary file from a TTY terminal, since cURL doesn't support printing binary data to terminals and just closes the connection.
Cause
In my example memory leak appears on the following line:
but any .pipe
call could cause it.
The problem lies in how Stream#pipe works.
Let's say we create a pipe with the following call:
source.pipe(target);
If source
stream is closed or emits an error event, .pipe
will automatically propagate this state to target
stream.
But if target
stream is closed or emits an error event, then the source
stream is not destroyed. It remains open and active, buffering all incoming data as described in stream buffering docs.
Theoretically, this behavior should not prevent source
stream from being garbage collected after target
stream is destroyed, but for some reason it does.
Ways to fix it
I found at least three ways to fix this memory leak, so for now I'll just describe them here.
Manually handling events on target
stream
This is the most straight forward solution.
Here is a code I used to fix http-proxy
memory leak in my project:
proxy.on('proxyRes', (proxyRes, req, res) => {
const cleanup = (err) => {
// cleanup event listeners to allow clean garbage collection
proxyRes.removeListener('error', cleanup);
proxyRes.removeListener('close', cleanup);
res.removeListener('error', cleanup);
res.removeListener('close', cleanup);
// destroy all source streams to propagate the caught event backward
req.destroy(err);
proxyRes.destroy(err);
};
proxyRes.once('error', cleanup);
proxyRes.once('close', cleanup);
res.once('error', cleanup);
res.once('close', cleanup);
});
But to properly address the issue similar handlers should be attached to every target
stream for every .pipe
call.
The actual code could be simpler since:
- it should be enough to listen only for
'close'
events since they are always emitted after'error'
events (unlessemitClose
isfalse
) - it's probably not necessary to call
removeListener
manually, because those listeners should not prevent streams from being garbage collected (but who knows, I certainly don't want to test it)
Using new stream.pipeline
method instead of a legacy pipe
stream.pipeline handles events significantly better then good old .pipe
.
Here is a patch for lib/http-proxy/passes/web-incoming.js
which I tested myself:
diff --git a/lib/http-proxy/passes/web-incoming.js b/lib/http-proxy/passes/web-incoming.js
index 7ae7355..50547ab 100644
--- a/lib/http-proxy/passes/web-incoming.js
+++ b/lib/http-proxy/passes/web-incoming.js
@@ -1,5 +1,6 @@
var httpNative = require('http'),
httpsNative = require('https'),
+ pipeline = require('stream').pipeline,
web_o = require('./web-outgoing'),
common = require('../common'),
followRedirects = require('follow-redirects');
@@ -118,7 +119,7 @@ module.exports = {
req.on('error', forwardError);
forwardReq.on('error', forwardError);
- (options.buffer || req).pipe(forwardReq);
+ pipeline(options.buffer || req, forwardReq, () => {})
if(!options.target) { return res.end(); }
}
@@ -167,7 +168,7 @@ module.exports = {
}
}
- (options.buffer || req).pipe(proxyReq);
+ pipeline(options.buffer || req, proxyReq, () => {})
proxyReq.on('response', function(proxyRes) {
if(server) { server.emit('proxyRes', proxyRes, req, res); }
@@ -184,7 +185,7 @@ module.exports = {
if (server) server.emit('end', req, res, proxyRes);
});
// We pipe to the response unless its expected to be handled by the user
- if (!options.selfHandleResponse) proxyRes.pipe(res);
+ if (!options.selfHandleResponse) pipeline(proxyRes, res, () => {});
} else {
if (server) server.emit('end', req, res, proxyRes);
}
There are some .pipe
calls in other files which probably should be examined as well.
The only problem here is that stream.pipeline
is not available for old node.js versions, because it was introduced in node.js v10.0.0
Using some third-party library like pumpify
pumpify is a third party solution for piping streams which, unlike build-in .pipe
, handles all events correctly.
Here is a patch for lib/http-proxy/passes/web-incoming.js
which I tested myself:
diff --git a/lib/http-proxy/passes/web-incoming.js b/lib/http-proxy/passes/web-incoming.js
index 7ae7355..6703537 100644
--- a/lib/http-proxy/passes/web-incoming.js
+++ b/lib/http-proxy/passes/web-incoming.js
@@ -1,5 +1,6 @@
var httpNative = require('http'),
httpsNative = require('https'),
+ pumpify = require('pumpify'),
web_o = require('./web-outgoing'),
common = require('../common'),
followRedirects = require('follow-redirects');
@@ -118,7 +119,7 @@ module.exports = {
req.on('error', forwardError);
forwardReq.on('error', forwardError);
- (options.buffer || req).pipe(forwardReq);
+ pumpify(options.buffer || req, forwardReq)
if(!options.target) { return res.end(); }
}
@@ -167,7 +168,7 @@ module.exports = {
}
}
- (options.buffer || req).pipe(proxyReq);
+ pumpify(options.buffer || req, proxyReq)
proxyReq.on('response', function(proxyRes) {
if(server) { server.emit('proxyRes', proxyRes, req, res); }
@@ -184,7 +185,7 @@ module.exports = {
if (server) server.emit('end', req, res, proxyRes);
});
// We pipe to the response unless its expected to be handled by the user
- if (!options.selfHandleResponse) proxyRes.pipe(res);
+ if (!options.selfHandleResponse) pumpify(proxyRes, res);
} else {
if (server) server.emit('end', req, res, proxyRes);
}
pumpify
doesn't rely on new node.js features, so it should work fine with node.js v8.x.x.
Conclusion
This is a serious issue which should be addressed and fixed.
I think that it would be better to use either stream.pipeline
or pumpify
, but all three proposed solutions should work fine.
Personally I prefer stream.pipeline
since it's a build-in tool, but to use it you'll have to deprecate node.js v8.x.x.
So it's probably better to keep current node.js versions support and use pumpify
instead.
I could prepare a PR myself, but fixing this memory leak should be pretty straightforward with the data and examples I provided here.