Skip to content

Commit

Permalink
Do system_exit on recurring pre-vote failure due to busy connection (
Browse files Browse the repository at this point in the history
…#563)

* If a connection is stuck due to a network black hole or similar
issues, the sender may not receive either a response to the previous
request or an explicit error, preventing any progress through that
connection.

* Such situations are unlikely to resolve on their own, and
sometimes restarting the process is the only solution. If the connection
remains busy beyond a certain threshold, `system_exit` will be invoked
with `N22_unrecoverable_isolation`.
  • Loading branch information
greensky00 authored Jan 20, 2025
1 parent 436df8c commit 386162e
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 12 deletions.
29 changes: 25 additions & 4 deletions include/libnuraft/raft_server.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ public:
, reconnect_limit_(50)
, leave_limit_(5)
, vote_limit_(5)
, busy_connection_limit_(20)
{}

limits(const limits& src) {
Expand Down Expand Up @@ -176,6 +177,19 @@ public:
* Active only when `auto_adjust_quorum_for_small_cluster_` is enabled.
*/
std::atomic<int32> vote_limit_;

/**
* If a connection is stuck due to a network black hole or similar issues,
* the sender may not receive either a response to the previous request or
* an explicit error, preventing any progress through that connection.
* Such situations are unlikely to resolve on their own, and sometimes
* restarting the process is the only solution. If the connection remains
* busy beyond this threshold, `system_exit` will be invoked with
* `N22_unrecoverable_isolation`.
*
* If zero, this feature is disabled.
*/
std::atomic<int32> busy_connection_limit_;
};

raft_server(context* ctx, const init_options& opt = init_options());
Expand Down Expand Up @@ -841,28 +855,35 @@ protected:
struct pre_vote_status_t {
pre_vote_status_t()
: quorum_reject_count_(0)
, failure_count_(0)
, no_response_failure_count_(0)
, busy_connection_failure_count_(0)
{ reset(0); }
void reset(ulong _term) {
term_ = _term;
done_ = false;
live_ = dead_ = abandoned_ = 0;
live_ = dead_ = abandoned_ = connection_busy_ = 0;
}
ulong term_;
std::atomic<bool> done_;
std::atomic<int32> live_;
std::atomic<int32> dead_;
std::atomic<int32> abandoned_;
std::atomic<int32> connection_busy_;

/**
* Number of pre-vote rejections by quorum.
*/
std::atomic<int32> quorum_reject_count_;

/**
* Number of pre-vote failures due to not-responding peers.
* Number of pre-vote failures due to non-responding peers.
*/
std::atomic<int32> no_response_failure_count_;

/**
* Number of pre-vote failures due to busy connections.
*/
std::atomic<int32> failure_count_;
std::atomic<int32> busy_connection_failure_count_;
};

/**
Expand Down
33 changes: 27 additions & 6 deletions src/handle_vote.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ See the License for the specific language governing permissions and
limitations under the License.
**************************************************************************/

#include "error_code.hxx"
#include "raft_server.hxx"

#include "cluster_config.hxx"
Expand Down Expand Up @@ -86,7 +87,7 @@ void raft_server::request_prevote() {
if (pre_vote_.live_ + pre_vote_.dead_ > 0) {
if (pre_vote_.live_ + pre_vote_.dead_ < quorum_size + 1) {
// Pre-vote failed due to non-responding voters.
pre_vote_.failure_count_++;
pre_vote_.no_response_failure_count_++;
p_wn("total %d nodes (including this node) responded for pre-vote "
"(term %" PRIu64 ", live %d, dead %d), at least %d nodes should "
"respond. failure count %d",
Expand All @@ -95,15 +96,15 @@ void raft_server::request_prevote() {
pre_vote_.live_.load(),
pre_vote_.dead_.load(),
quorum_size + 1,
pre_vote_.failure_count_.load());
pre_vote_.no_response_failure_count_.load());
} else {
pre_vote_.failure_count_ = 0;
pre_vote_.no_response_failure_count_ = 0;
}
}
int num_voting_members = get_num_voting_members();
if ( params->auto_adjust_quorum_for_small_cluster_ &&
num_voting_members == 2 &&
pre_vote_.failure_count_ > raft_server::raft_limits_.vote_limit_ ) {
pre_vote_.no_response_failure_count_ > raft_server::raft_limits_.vote_limit_ ) {
// 2-node cluster's pre-vote failed due to offline node.
p_wn("2-node cluster's pre-vote is failing long time, "
"adjust quorum to 1");
Expand Down Expand Up @@ -166,8 +167,28 @@ void raft_server::request_prevote() {
if (pp->make_busy()) {
pp->send_req(pp, req, resp_handler_);
} else {
p_wn("failed to send prevote request: peer %d (%s) is busy",
pp->get_id(), pp->get_endpoint().c_str());
pre_vote_.connection_busy_++;
p_wn("failed to send prevote request: peer %d (%s) is busy, count %d",
pp->get_id(), pp->get_endpoint().c_str(),
pre_vote_.connection_busy_.load());
}
}

int32 election_quorum_size = get_quorum_for_election() + 1;
if (pre_vote_.connection_busy_ >= election_quorum_size) {
// Couldn't send pre-vote request to majority of peers,
// no hope to get quorum.
pre_vote_.busy_connection_failure_count_++;
p_wn("failed to send prevote request to majority of peers, "
"no hope to get quorum, count: %d",
pre_vote_.busy_connection_failure_count_.load());
int32_t busy_conn_limit = raft_server::raft_limits_.busy_connection_limit_;
if (busy_conn_limit &&
pre_vote_.busy_connection_failure_count_ > busy_conn_limit) {
// LCOV_EXCL_START
p_ft("too many pre-vote failures due to busy connection!");
ctx_->state_mgr_->system_exit(N22_unrecoverable_isolation);
// LCOV_EXCL_STOP
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/raft_server.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1102,7 +1102,7 @@ void raft_server::become_leader() {
next_leader_candidate_ = -1;
initialized_ = true;
pre_vote_.quorum_reject_count_ = 0;
pre_vote_.failure_count_ = 0;
pre_vote_.no_response_failure_count_ = 0;
data_fresh_ = true;

request_append_entries();
Expand Down Expand Up @@ -1418,7 +1418,7 @@ void raft_server::become_follower() {
initialized_ = true;
uncommitted_config_.reset();
pre_vote_.quorum_reject_count_ = 0;
pre_vote_.failure_count_ = 0;
pre_vote_.no_response_failure_count_ = 0;

ptr<raft_params> params = ctx_->get_params();
if ( params->auto_adjust_quorum_for_small_cluster_ &&
Expand Down

0 comments on commit 386162e

Please sign in to comment.