From 9c56fafbd1905952fcb1db4e5b96194af8c9095a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 22 Nov 2024 16:24:30 +0100 Subject: [PATCH] Fix worker threads stalling race condition on RESUME command During a RESUME operation, if a 'MySQL_Thread' is bootstrapping listeners (in `MySQL_Thread::run_BootstrapListener`) and detect that `MySQL_Threads_Handler::bootstrapping_listeners` is 'false', the thread prematurely shuts down its own bootstrapping flag from 'mypolls' (`ProxySQL_Poll::bootstrapping_listeners`). Since this thread wont ever bootstrap its corresponding listening sockets, the other working threads will be stalled waiting on it, eventually triggering the watchdog and crashing the instance. Since the bootstrapping operation is sequential, it's expected that all the threads but the one starting their listening sockets are in an active wait. A sensible delay has been introduced to reduce the overhead of such wait. --- include/ProxySQL_Poll.h | 1 - lib/MySQL_Thread.cpp | 16 +++++----------- lib/ProxySQL_Poll.cpp | 1 - 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/include/ProxySQL_Poll.h b/include/ProxySQL_Poll.h index 5cfa3d1a48..8fc08063ca 100644 --- a/include/ProxySQL_Poll.h +++ b/include/ProxySQL_Poll.h @@ -34,7 +34,6 @@ class ProxySQL_Poll { MySQL_Data_Stream **myds; unsigned long long *last_recv; unsigned long long *last_sent; - std::atomic bootstrapping_listeners; volatile int pending_listener_add; volatile int pending_listener_del; unsigned int poll_timeout; diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index e903612138..4da5d9750e 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -3303,15 +3303,11 @@ void MySQL_Thread::run_BootstrapListener() { if (n) { poll_listener_add(n); assert(__sync_bool_compare_and_swap(&mypolls.pending_listener_add,n,0)); - } else { - if (GloMTH->bootstrapping_listeners == false) { - // we stop looping - mypolls.bootstrapping_listeners = false; - } } -#ifdef DEBUG - usleep(5+rand()%10); -#endif + // The delay for the active-wait is a fraction of 'poll_timeout'. Since other + // threads may be waiting on poll for further operations, checks are meaningless + // until that timeout expires (other workers make progress). + usleep(std::min(std::max(mysql_thread___poll_timeout/20, 10000), 40000) + (rand() % 2000)); } } @@ -3432,9 +3428,7 @@ void MySQL_Thread::run() { #endif // IDLE_THREADS pthread_mutex_unlock(&thread_mutex); - if (unlikely(mypolls.bootstrapping_listeners == true)) { - run_BootstrapListener(); - } + run_BootstrapListener(); // flush mysql log file GloMyLogger->flush(); diff --git a/lib/ProxySQL_Poll.cpp b/lib/ProxySQL_Poll.cpp index 8876e5f373..f6fa32b246 100644 --- a/lib/ProxySQL_Poll.cpp +++ b/lib/ProxySQL_Poll.cpp @@ -64,7 +64,6 @@ ProxySQL_Poll::ProxySQL_Poll() { len=0; pending_listener_add=0; pending_listener_del=0; - bootstrapping_listeners = true; size=MIN_POLL_LEN; fds=(struct pollfd *)malloc(size*sizeof(struct pollfd)); myds=(MySQL_Data_Stream **)malloc(size*sizeof(MySQL_Data_Stream *));