Skip to content

Commit

Permalink
Add a backoff mechanism for reconnection with random interval
Browse files Browse the repository at this point in the history
Mechanism is as follow:
If mqtt socket is diconnected, a new attempt is done immediatelly
and then after a rendom delay within [1, 2], then [2, 4], [4, 8]...[32, 64]

And then attemps is always after a delay within [32, 64] interval
until mqtt_reconnect_delay set or forever if no reconnect delay set
  • Loading branch information
GwendalRaoul committed Aug 2, 2024
1 parent aff767a commit 3c82685
Showing 1 changed file with 28 additions and 10 deletions.
38 changes: 28 additions & 10 deletions python_transport/wirepas_gateway/protocol/mqtt_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from threading import Thread, Lock
from time import sleep, monotonic
from datetime import datetime
from random import randrange

from paho.mqtt import client as mqtt
from paho.mqtt.client import connack_string
Expand Down Expand Up @@ -196,27 +197,44 @@ def _get_socket(self):
if self.connected:
logging.error("MQTT Inner loop, unexpected disconnection")

start_disconnection = monotonic()
# Socket is not opened anymore, try to reconnect for timeout if set
loop_forever = self.timeout == 0
delay = 0
if self.timeout == 0:
loop_forever = True
else:
loop_forever = False
loop_until = monotonic() + self.timeout

logging.info("Starting reconnect loop with timeout %d" % self.timeout)
# Loop forever or until timeout is over
while loop_forever or (delay <= self.timeout):
next_attempt_window_s = 1
while loop_forever or (monotonic() <= loop_until):
now = monotonic()
try:
logging.debug("MQTT reconnect attempt delay=%d" % delay)
if loop_forever:
remaining_time = "-"
else:
remaining_time = int(loop_until - now)

logging.info("MQTT reconnect attempt (since: %s, remaining time: %s)"
% (int(now - start_disconnection), remaining_time))

ret = self._client.reconnect()
if ret == mqtt.MQTT_ERR_SUCCESS:
break
except Exception:
# Retry to connect in 1 sec up to timeout if set
sleep(1)
delay += 1
logging.debug("Retrying to connect in 1 sec")
# Retry to connect in current attempt windows range
delay_s = randrange(next_attempt_window_s, next_attempt_window_s * 2)
if next_attempt_window_s < 32:
next_attempt_window_s = next_attempt_window_s * 2
logging.debug("Retrying to connect in %d seconds", delay_s)
# Last sleep may end after max timeout, but not a big issue
sleep(delay_s)

if not loop_forever:
# In case of timeout set, check if it exits because of timeout
if delay > self.timeout:
logging.error("Unable to reconnect after %s seconds", delay)
if monotonic() > loop_until:
logging.error("Unable to reconnect after %s seconds", self.timeout)
return None

# Socket must be available once reconnect is successful
Expand Down

0 comments on commit 3c82685

Please sign in to comment.