16
16
start_link (Pool , ListenOpts , AcceptorOpts ) ->
17
17
gen_server :start_link (? MODULE , [Pool , ListenOpts , AcceptorOpts ], []).
18
18
19
-
20
19
% % gen_server api
21
20
22
21
init ([Pool , ListenOpts , PoolOpts ]) ->
23
- % % Trapping exit so can close socket in terminate/2
24
- _ = process_flag (trap_exit , true ),
25
22
Port = maps :get (port , ListenOpts , 8080 ),
26
23
IPAddress = maps :get (ip , ListenOpts , {0 , 0 , 0 , 0 }),
27
24
AcceptorPoolSize = maps :get (size , PoolOpts , 10 ),
28
25
SocketOpts = maps :get (socket_options , ListenOpts , [{reuseaddr , true },
29
26
{nodelay , true },
27
+ {reuseaddr , true },
30
28
{backlog , 32768 },
31
29
{keepalive , true }]),
30
+ % % Trapping exit so can close socket in terminate/2
31
+ _ = process_flag (trap_exit , true ),
32
32
Opts = [{active , false }, {mode , binary }, {packet , raw }, {ip , IPAddress } | SocketOpts ],
33
33
case gen_tcp :listen (Port , Opts ) of
34
34
{ok , Socket } ->
35
35
% % acceptor could close the socket if there is a problem
36
36
MRef = monitor (port , Socket ),
37
- { ok , _ } = grpcbox_pool :accept_socket (Pool , Socket , AcceptorPoolSize ),
37
+ grpcbox_pool :accept_socket (Pool , Socket , AcceptorPoolSize ),
38
38
{ok , {Socket , MRef }};
39
- {error , eaddrinuse } ->
39
+ {error , eaddrinuse } = Error ->
40
40
% % our desired port is already in use
41
- % % its likely this grpcbox_socket server is restarting
41
+ % % its likely this grpcbox_socket server has been killed ( for reason unknown ) and is restarting
42
42
% % previously it would have bound to the port before passing control to our acceptor pool
43
+ % % the socket remains open
43
44
% % in the restart scenario, the socket process would attempt to bind again
44
45
% % to the port and then stop, the sup would keep restarting it
45
46
% % and we would end up breaching the restart strategy of the parent sup
46
47
% % eventually taking down the entire tree
47
48
% % result of which is we have no active listener and grpcbox is effectively down
48
- % % so now if we hit eaddrinuse, we check if our acceptor pool is already the
49
- % % controlling process, if so we reuse the port from its state and
50
- % % allow grpcbox_socket to start cleanly
49
+ % % so now if we hit eaddrinuse, we check if our acceptor pool using it
50
+ % % if so we close the port here and stop this process
51
+ % % NOTE: issuing stop in init wont trigger terminate and so cant rely on
52
+ % % the socket being closed there
53
+ % % This allows the sup to restart things cleanly
54
+ % % We could try to reuse the exising port rather than closing it
55
+ % % but side effects were encountered there, so deliberately avoiding
51
56
52
57
% % NOTE: acceptor_pool has a grace period for connections before it terminates
53
58
% % grpcbox_pool sets this to a default of 5 secs
54
59
% % this needs considered when deciding on related supervisor restart strategies
55
60
% % AND keep in mind the acceptor pool will continue accepting new connections
56
61
% % during this grace period
57
62
58
- % % Other possible fixes here include changing the grpcbox_services_sup from its
59
- % % rest_for_one to a one_for_all strategy. This ensures the pool and thus the
60
- % % current controlling process of the socket is terminated
61
- % % and allows things to restart cleanly if grpcbox_socket dies
62
- % % the disadvantage there however is we will drop all existing grpc connections
63
-
64
- % % Another possible fix is to play with the restart strategy intensity and periods
65
- % % and ensure the top level sup doesnt get breached but...
66
- % % a requirement will be to ensure the grpcbox_service_sup forces a restart
67
- % % of grpcbox_pool and therefore the acceptor_pool process
68
- % % as only by doing that will be free up the socket and allow grpcbox_socket to rebind
69
- % % thus we end up terminating any existing grpc connections
70
-
71
- % % Yet another possible fix is to move the cleanup of closing the socket
72
- % % out of grpcbox_socket's terminate and into acceptor_pool's terminate
73
- % % that however puts two way co-ordination between two distinct libs
74
- % % which is far from ideal and in addition will also result in existing grpc connection
75
- % % being dropped
76
-
77
- % % my view is, if at all possible, its better to restart the grpcbox_socket process without
78
- % % impacting existing connections, the fix below allows for that, albeit a lil messy
79
- % % there is most likely a better solution to all of this, TODO: revisit
80
-
81
63
% % get the current sockets in use by the acceptor pool
82
- % % if one is bound to our target port then reuse
64
+ % % if one is bound to our target port then close it
83
65
% % need to allow for possibility of multiple services, each with its own socket
84
66
% % so we need to identify our interested socket via port number
85
67
PoolSockets = grpcbox_pool :pool_sockets (Pool ),
@@ -89,15 +71,15 @@ init([Pool, ListenOpts, PoolOpts]) ->
89
71
{ok , Socket };
90
72
(_ , Acc ) ->
91
73
Acc
92
- end , { error , eaddrinuse } , PoolSockets ),
74
+ end , socket_not_found , PoolSockets ),
93
75
case MaybeHaveExistingSocket of
94
76
{ok , Socket } ->
95
- MRef = monitor ( port , Socket ),
96
- { ok , { Socket , MRef }};
97
- { error , Reason } ->
98
- { stop , Reason }
99
- end ;
100
- {error , Reason }->
77
+ gen_tcp : close ( Socket );
78
+ socket_not_found ->
79
+ noop
80
+ end ,
81
+ Error ;
82
+ {error , Reason } ->
101
83
{stop , Reason }
102
84
end .
103
85
@@ -115,10 +97,11 @@ handle_info(_, State) ->
115
97
code_change (_ , State , _ ) ->
116
98
{ok , State }.
117
99
118
- terminate (_ , {Socket , MRef }) ->
100
+ terminate (_Reason , {Socket , MRef }) ->
119
101
% % Socket may already be down but need to ensure it is closed to avoid
120
102
% % eaddrinuse error on restart
103
+ % % this takes care of that, unless of course this process is killed...
121
104
case demonitor (MRef , [flush , info ]) of
122
105
true -> gen_tcp :close (Socket );
123
106
false -> ok
124
- end .
107
+ end .
0 commit comments