Skip to content

Commit ae95dcb

Browse files
authored
Merge pull request #20 from codedownio/connect-timeout
Add timeout to connect attempts to fix #19
2 parents 721bcff + 4d6f7be commit ae95dcb

File tree

4 files changed

+45
-9
lines changed

4 files changed

+45
-9
lines changed

src/Network/Wait.hs

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,39 @@ module Network.Wait (
3232

3333
-------------------------------------------------------------------------------
3434

35+
import Control.Exception (throwIO)
3536
import Control.Monad.Catch
3637
import Control.Monad.IO.Class
3738
import Control.Retry
3839
-- Only needed for base < 4.11, redundant otherwise
3940
import Data.Semigroup
41+
import System.IO.Error
42+
import System.Timeout
4043

4144
import Network.Socket
4245

4346
-------------------------------------------------------------------------------
4447

48+
-- | Each individual connect attempt needs a timeout to prevent it from hanging
49+
-- indefinitely. This policy allows us to make that timeout length adaptive,
50+
-- based on the 'RetryStatus' of the outer retry policy.
51+
--
52+
-- Thus, the first attempt to connect will have a short timeout (currently 100ms),
53+
-- and then successive attempts will get longer timeouts via "FullJitter" backoff.
54+
-- The goals of this are twofold:
55+
--
56+
-- 1) If a connect call hangs during the first few attempts, it is timed out quickly
57+
-- and re-attempted, so on a healthy network you aren't penalized too much by the hang.
58+
-- The outer retry policy can control the time between attempts, so the user can set
59+
-- it high enough to make this be the case.
60+
--
61+
-- 2) If the network is slow, we will eventually reach the maximum timeout of 3 seconds,
62+
-- which should be long enough. Note that the popular wait-for script uses 1 second
63+
-- timeouts, so this is extra conservative:
64+
-- https://github.com/eficode/wait-for/blob/7586b3622f010808bb2027c19aaf367221b4ad54/wait-for#L72
65+
connectRetryPolicy :: MonadIO m => RetryPolicyM m
66+
connectRetryPolicy = capDelay (3000000) (fullJitterBackoff 100000)
67+
4568
-- | `waitTcp` @retryPolicy hostName serviceName@ is a variant of `waitTcpWith`
4669
-- which does not install any additional handlers.
4770
--
@@ -141,13 +164,21 @@ waitSocketWith
141164
=> [RetryStatus -> Handler m Bool] -> RetryPolicyM m -> AddrInfo
142165
-> m Socket
143166
waitSocketWith hs policy addr =
144-
recoveringWith hs policy $
167+
recoveringWith hs policy $ \retryStatus ->
145168
-- all of the networking code runs in IO
146169
liftIO $
147170
-- we want to make sure that we close the socket after every attempt;
148171
-- `bracket` will re-throw any error afterwards
149-
bracket initSocket close $
150-
\sock -> connect sock (addrAddress addr) >> pure sock
172+
bracket initSocket close $ \sock -> do
173+
maybeConnectTimeoutUs <- (getRetryPolicyM connectRetryPolicy) retryStatus
174+
connectTimeoutUs <- case maybeConnectTimeoutUs of
175+
Nothing -> throwIO $ userError "Timeout in connect attempt"
176+
Just us -> pure us
177+
178+
maybeResult <- timeout connectTimeoutUs (connect sock (addrAddress addr))
179+
case maybeResult of
180+
Nothing -> throwIO $ userError "Timeout in connect attempt"
181+
Just () -> pure sock
151182
where
152183
initSocket =
153184
socket (addrFamily addr) (addrSocketType addr) (addrProtocol addr)
@@ -163,15 +194,13 @@ waitSocketWith hs policy addr =
163194
-- the standard output or a logger.
164195
recoveringWith
165196
:: (MonadIO m, MonadMask m)
166-
=> [RetryStatus -> Handler m Bool] -> RetryPolicyM m -> m a -> m a
197+
=> [RetryStatus -> Handler m Bool] -> RetryPolicyM m -> (RetryStatus -> m a) -> m a
167198
recoveringWith hs policy action =
168199
-- apply the retry policy to the following code, with the combinations of
169200
-- the `skipAsyncExceptions`, given, and default handlers. The order of
170201
-- the handlers matters as they are checked in order.
171202
recovering policy (skipAsyncExceptions <> hs <> [defHandler]) $
172-
-- we want to make sure that we close the socket after every attempt;
173-
-- `bracket` will re-throw any error afterwards
174-
const action
203+
action
175204
where
176205
-- our default handler, which works with any exception derived from
177206
-- `SomeException`, and signals that we should retry if allowed by

src/Network/Wait/PostgreSQL.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ waitPostgreSqlWith
9393
=> [RetryStatus -> Handler m Bool] -> RetryPolicyM m -> info
9494
-> m Connection
9595
waitPostgreSqlWith hs policy info =
96-
recoveringWith hs policy $
96+
recoveringWith hs policy $ \_ ->
9797
liftIO $
9898
bracket (connectDb info) close $ \con -> do
9999
rs <- query_ @[Int] con "SELECT 1;"

src/Network/Wait/Redis.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,6 @@ waitRedisWith
7979
:: (MonadIO m, MonadMask m)
8080
=> [RetryStatus -> Handler m Bool] -> RetryPolicyM m -> ConnectInfo
8181
-> m Connection
82-
waitRedisWith hs policy = recoveringWith hs policy . liftIO . checkedConnect
82+
waitRedisWith hs policy = recoveringWith hs policy . const . liftIO . checkedConnect
8383

8484
-------------------------------------------------------------------------------

test/Spec.hs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,13 @@ tests = testGroup "network-wait"
5454
res <- try @IO @SomeException $
5555
waitTcp testRetryPolicy "localhost" "5999"
5656

57+
case res of
58+
Left _ -> pure ()
59+
Right _ -> assertFailure "`waitTcp` did not fail"
60+
, localOption (mkTimeout $ 10*1000*1000) $ testCase "Can't connect to non-routable private IP" $ do
61+
res <- try @IO @SomeException $
62+
waitTcp testRetryPolicy "10.255.255.1" "80"
63+
5764
case res of
5865
Left _ -> pure ()
5966
Right _ -> assertFailure "`waitTcp` did not fail"

0 commit comments

Comments
 (0)