Skip to content

Commit 9fc976d

Browse files
committed
router: calls affect temporary prioritized replica
Previously prioritized replica was changed only if it was disconnected for FAILOVER_DOWN_TIMEOUT seconds. However, if connection is shows as 'connected' it doesn't mean, that this connection actually works. The connection must be pingable in order to be operational. This commit makes failover temporary lower replica's priority if FAILOVER_DOWN_SEQUENTIAL_FAIL requests fail to it. All vshard internal requests (including failover ping) and all user calls affect the number of sequentially failed requests. Note, that we consider request failed, when net.box connection is not operational (cannot make conn.call, e.g. connection is not yet established or timeout is reached), user functions throwing errors won't affect prioritized replica. The behavior of failover is the following after this commit: 1. Failover pings all prioritized replicas. If ping doesn't succeed, the connection is recreated, which is needed, if user returns too big values from the functions, in such case no other request can be done until this value is returned. Failed ping affects the number of sequentially failed requests. 2. If connection is down for >= than FAILOVER_DOWN_TIMEOUT or if the number of sequentially failed requests is >= FAILOVER_DOWN_SEQUENTIAL_FAIL, than we take replica with lower priority as the main one. 3. If failover didn't try to use the more prioritized replica (according to weights) for more than FAILOVER_UP_TIMEOUT, then we try to set a new replica as the prioritized one. Note, that we don't set it, if ping to it didn't succeed during ping round in (1). Closes #483 NO_DOC=bugfix
1 parent 8b7524c commit 9fc976d

File tree

11 files changed

+369
-49
lines changed

11 files changed

+369
-49
lines changed

test/instances/router.lua

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ _G.ilt = require('luatest')
1111
_G.imsgpack = require('msgpack')
1212
_G.ivtest = require('test.luatest_helpers.vtest')
1313
_G.ivconst = require('vshard.consts')
14+
_G.iverror = require('vshard.error')
1415
_G.iwait_timeout = _G.ivtest.wait_timeout
1516

1617
-- Do not load entire vshard into the global namespace to catch errors when code

test/router-luatest/router_test.lua

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
local t = require('luatest')
22
local vtest = require('test.luatest_helpers.vtest')
33
local vutil = require('vshard.util')
4+
local vconsts = require('vshard.consts')
45

56
local g = t.group('router')
67
local cfg_template = {
@@ -861,3 +862,245 @@ g.test_request_timeout = function(g)
861862
_G.sleep_num = nil
862863
end)
863864
end
865+
866+
local function prepare_affect_priority_rs(g)
867+
local new_cfg_template = table.deepcopy(cfg_template)
868+
new_cfg_template.sharding[1].replicas.replica_1_a.zone = 3
869+
new_cfg_template.sharding[1].replicas.replica_1_b.zone = 2
870+
new_cfg_template.zone = 1
871+
new_cfg_template.weights = {
872+
[1] = {
873+
[1] = 0,
874+
[2] = 1,
875+
[3] = 2,
876+
},
877+
}
878+
-- So that ping timeout is always > replica.net_timeout.
879+
-- net_timeout starts with CALL_TIMEOUT_MIN and is mutiplied by 2 if number
880+
-- of failed requests is >= 2.
881+
new_cfg_template.failover_ping_timeout = vconsts.CALL_TIMEOUT_MIN * 4
882+
local new_cluster_cfg = vtest.config_new(new_cfg_template)
883+
vtest.router_cfg(g.router, new_cluster_cfg)
884+
end
885+
886+
local function affect_priority_clear_net_timeout(g)
887+
g.router:exec(function()
888+
-- Reset net_timeout, so that it doesn't affect the test. This is
889+
-- needed as we use the absolute minimum failover_ping_timeout for
890+
-- FAILOVER_DOWN_SEQUENTIAL_FAIL = 3. 10 successful calls are needed
891+
-- to restore it to CALL_TIMEOUT_MIN wthout reset.
892+
local router = ivshard.router.internal.static_router
893+
for _, rs in pairs(router.replicasets) do
894+
for _, r in pairs(rs.replicas) do
895+
r.net_timeout = ivconst.CALL_TIMEOUT_MIN
896+
end
897+
end
898+
end)
899+
end
900+
901+
--
902+
-- gh-483: failover ping temporary lower replica's priority, when it cannot be
903+
-- reached several times in a row:
904+
--
905+
-- 1. replica_1_b is the prioritized one. replica_1_a is the second one.
906+
-- 2. router establishes connection to all instances, failover sets prioritized
907+
-- replica_1_b.
908+
-- 3. Node breaks and stops to respond.
909+
-- 4. Failover retries ping FAILOVER_DOWN_SEQUENTIAL_FAIL times and changes
910+
-- prioritized replica to the lower one. Note, that connection is recreated
911+
-- on every failed ping.
912+
-- 5. Every FAILOVER_UP_TIMEOUT failover checks, if any replica with higher
913+
-- priority can be reached and changes the prioritized replica if it's so.
914+
--
915+
g.test_failover_ping_affects_priority = function()
916+
prepare_affect_priority_rs(g)
917+
918+
-- Find prioritized replica and disable failover for now.
919+
g.router:exec(function(rs_uuid, replica_uuid)
920+
local router = ivshard.router.internal.static_router
921+
local rs = router.replicasets[rs_uuid]
922+
local opts = {timeout = iwait_timeout}
923+
rs:wait_connected_all(opts)
924+
925+
t.helpers.retrying(opts, function()
926+
router.failover_fiber:wakeup()
927+
t.assert_equals(rs.replica.uuid, replica_uuid,
928+
'Prioritized replica have not been set yet')
929+
end)
930+
931+
local errinj = ivshard.router.internal.errinj
932+
errinj.ERRINJ_FAILOVER_DELAY = true
933+
t.helpers.retrying(opts, function()
934+
router.failover_fiber:wakeup()
935+
t.assert_equals(errinj.ERRINJ_FAILOVER_DELAY, 'in',
936+
'Failover have not been stopped yet')
937+
end)
938+
end, {g.replica_1_b:replicaset_uuid(), g.replica_1_b:instance_uuid()})
939+
940+
-- Break 'info' request on replica so that it fails with TimedOut error.
941+
g.replica_1_b:exec(function()
942+
rawset(_G, 'old_call', ivshard.storage._call)
943+
ivshard.storage._call = function(service_name, ...)
944+
if service_name == 'info' then
945+
ifiber.sleep(ivconst.CALL_TIMEOUT_MIN * 5)
946+
end
947+
return _G.old_call(service_name, ...)
948+
end
949+
end)
950+
951+
affect_priority_clear_net_timeout(g)
952+
g.router:exec(function(rs_uuid, master_uuid)
953+
local router = ivshard.router.internal.static_router
954+
local rs = router.replicasets[rs_uuid]
955+
956+
-- And we change the prioritized replica.
957+
ivshard.router.internal.errinj.ERRINJ_FAILOVER_DELAY = false
958+
t.helpers.retrying({timeout = iwait_timeout}, function()
959+
router.failover_fiber:wakeup()
960+
t.assert_equals(rs.replica.uuid, master_uuid)
961+
end)
962+
963+
-- Check, that prioritized replica is not changed, as it's still broken.
964+
rawset(_G, 'old_up_timeout', ivconst.FAILOVER_UP_TIMEOUT)
965+
ivconst.FAILOVER_UP_TIMEOUT = 0.01
966+
ivtest.service_wait_for_new_ok(router.failover_service,
967+
{on_yield = router.failover_fiber:wakeup()})
968+
t.assert_equals(rs.replica.uuid, master_uuid)
969+
end, {g.replica_1_b:replicaset_uuid(), g.replica_1_a:instance_uuid()})
970+
971+
-- Restore 'info' request.
972+
g.replica_1_b:exec(function()
973+
ivshard.storage._call = _G.old_call
974+
_G.old_call = nil
975+
end)
976+
977+
-- As replica_1_b has higher priority, it should be restored automatically.
978+
g.router:exec(function(rs_uuid, replica_uuid)
979+
local router = ivshard.router.internal.static_router
980+
local rs = router.replicasets[rs_uuid]
981+
t.assert_equals(rs.priority_list[1].uuid, replica_uuid)
982+
t.helpers.retrying({timeout = iwait_timeout}, function()
983+
router.failover_fiber:wakeup()
984+
t.assert_equals(rs.replica.uuid, replica_uuid,
985+
'Prioritized replica is not up yet')
986+
end)
987+
end, {g.replica_1_b:replicaset_uuid(), g.replica_1_b:instance_uuid()})
988+
989+
vtest.router_cfg(g.router, global_cfg)
990+
g.router:exec(function()
991+
ivconst.FAILOVER_UP_TIMEOUT = _G.old_up_timeout
992+
_G.old_up_timeout = nil
993+
end)
994+
end
995+
996+
--
997+
-- gh-483: user calls also affects priority. If several sequential requests
998+
-- fail, then the same logic as in the previous test happens.
999+
--
1000+
g.test_failed_calls_affect_priority = function()
1001+
prepare_affect_priority_rs(g)
1002+
local timeout = vconsts.CALL_TIMEOUT_MIN * 4
1003+
1004+
-- Find prioritized replica and disable failover for now.
1005+
g.router:exec(function(rs_uuid, replica_uuid)
1006+
local router = ivshard.router.internal.static_router
1007+
local rs = router.replicasets[rs_uuid]
1008+
local opts = {timeout = iwait_timeout}
1009+
rs:wait_connected_all(opts)
1010+
1011+
t.helpers.retrying(opts, function()
1012+
router.failover_fiber:wakeup()
1013+
t.assert_equals(rs.replica.uuid, replica_uuid,
1014+
'Prioritized replica have not been set yet')
1015+
end)
1016+
1017+
local errinj = ivshard.router.internal.errinj
1018+
errinj.ERRINJ_FAILOVER_DELAY = true
1019+
t.helpers.retrying(opts, function()
1020+
router.failover_fiber:wakeup()
1021+
t.assert_equals(errinj.ERRINJ_FAILOVER_DELAY, 'in',
1022+
'Failover have not been stopped yet')
1023+
end)
1024+
1025+
-- Discovery is disabled, as it may affect `net_sequential_fail`
1026+
-- and leads to flakiness of the test.
1027+
errinj.ERRINJ_LONG_DISCOVERY = true
1028+
t.helpers.retrying(opts, function()
1029+
router.discovery_fiber:wakeup()
1030+
t.assert_equals(errinj.ERRINJ_LONG_DISCOVERY, 'waiting',
1031+
'Discovery have not been stopped yet')
1032+
end)
1033+
end, {g.replica_1_b:replicaset_uuid(), g.replica_1_b:instance_uuid()})
1034+
1035+
-- Break 'info' request on replica so that it fails with TimedOut error.
1036+
-- No other request can be broken, as only failover changes priority and
1037+
-- as soon as it wakes up it succeeds with `_call` and sets
1038+
-- `net_sequential_fail` to 0.
1039+
g.replica_1_b:exec(function()
1040+
rawset(_G, 'old_call', ivshard.storage._call)
1041+
ivshard.storage._call = function(service_name, ...)
1042+
if service_name == 'info' then
1043+
ifiber.sleep(ivconst.CALL_TIMEOUT_MIN * 5)
1044+
end
1045+
return _G.old_call(service_name, ...)
1046+
end
1047+
end)
1048+
1049+
affect_priority_clear_net_timeout(g)
1050+
local bid = vtest.storage_first_bucket(g.replica_1_a)
1051+
g.router:exec(function(bid, timeout, rs_uuid, replica_uuid)
1052+
local router = ivshard.router.internal.static_router
1053+
local replica = router.replicasets[rs_uuid].replica
1054+
t.assert_equals(replica.uuid, replica_uuid)
1055+
1056+
local fails = replica.net_sequential_fail
1057+
for _ = 1, ivconst.FAILOVER_DOWN_SEQUENTIAL_FAIL do
1058+
local ok, err = ivshard.router.callro(bid, 'vshard.storage._call',
1059+
{'info'}, {timeout = timeout})
1060+
t.assert_not(ok)
1061+
t.assert(iverror.is_timeout(err))
1062+
end
1063+
1064+
t.assert_equals(replica.net_sequential_fail,
1065+
fails + ivconst.FAILOVER_DOWN_SEQUENTIAL_FAIL)
1066+
1067+
-- Priority is changed only by failover. So, the prioritized replica
1068+
-- is still the failing one.
1069+
t.assert_equals(router.replicasets[rs_uuid].replica.uuid, replica_uuid)
1070+
end, {bid, timeout, g.replica_1_b:replicaset_uuid(),
1071+
g.replica_1_b:instance_uuid()})
1072+
1073+
-- Enable failover, which changes priority of the replica.
1074+
g.router:exec(function(rs_uuid, master_uuid)
1075+
local router = ivshard.router.internal.static_router
1076+
ivshard.router.internal.errinj.ERRINJ_FAILOVER_DELAY = false
1077+
t.helpers.retrying({timeout = iwait_timeout}, function()
1078+
router.failover_fiber:wakeup()
1079+
t.assert_equals(router.replicasets[rs_uuid].replica.uuid,
1080+
master_uuid, 'Master is not prioritized yet')
1081+
end)
1082+
end, {g.replica_1_b:replicaset_uuid(), g.replica_1_a:instance_uuid()})
1083+
1084+
-- Restore 'info' request.
1085+
g.replica_1_b:exec(function()
1086+
ivshard.storage._call = _G.old_call
1087+
_G.old_call = nil
1088+
end)
1089+
1090+
-- As replica_1_b has higher priority, it should be restored automatically.
1091+
g.router:exec(function(rs_uuid, replica_uuid)
1092+
local old_up_timeout = ivconst.FAILOVER_UP_TIMEOUT
1093+
ivconst.FAILOVER_UP_TIMEOUT = 1
1094+
local router = ivshard.router.internal.static_router
1095+
local rs = router.replicasets[rs_uuid]
1096+
t.assert_equals(rs.priority_list[1].uuid, replica_uuid)
1097+
t.helpers.retrying({timeout = iwait_timeout}, function()
1098+
router.failover_fiber:wakeup()
1099+
t.assert_equals(rs.replica.uuid, replica_uuid,
1100+
'Prioritized replica is not up yet')
1101+
end)
1102+
ivconst.FAILOVER_UP_TIMEOUT = old_up_timeout
1103+
end, {g.replica_1_b:replicaset_uuid(), g.replica_1_b:instance_uuid()})
1104+
1105+
vtest.router_cfg(g.router, global_cfg)
1106+
end

test/router/exponential_timeout.result

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ util.map_evals(test_run, {REPLICASET_1, REPLICASET_2}, 'bootstrap_storage(\'memt
2828
_ = test_run:cmd("create server router_1 with script='router/router_1.lua'")
2929
---
3030
...
31-
-- Discovery algorithm changes sometimes and should not affect the
31+
-- Discovery algorithm and failover changes sometimes and should not affect the
3232
-- exponential timeout test.
33-
_ = test_run:cmd("start server router_1 with args='discovery_disable'")
33+
_ = test_run:cmd("start server router_1 with " .. \
34+
"args='discovery_disable failover_disable'")
3435
---
3536
...
3637
_ = test_run:switch('router_1')
@@ -103,7 +104,7 @@ util.collect_timeouts(rs1)
103104
- - fail: 0
104105
ok: 0
105106
timeout: 0.5
106-
- fail: 1
107+
- fail: 2
107108
ok: 0
108109
timeout: 1
109110
...

test/router/exponential_timeout.test.lua

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@ util.wait_master(test_run, REPLICASET_1, 'storage_1_a')
1010
util.wait_master(test_run, REPLICASET_2, 'storage_2_a')
1111
util.map_evals(test_run, {REPLICASET_1, REPLICASET_2}, 'bootstrap_storage(\'memtx\')')
1212
_ = test_run:cmd("create server router_1 with script='router/router_1.lua'")
13-
-- Discovery algorithm changes sometimes and should not affect the
13+
-- Discovery algorithm and failover changes sometimes and should not affect the
1414
-- exponential timeout test.
15-
_ = test_run:cmd("start server router_1 with args='discovery_disable'")
15+
_ = test_run:cmd("start server router_1 with " .. \
16+
"args='discovery_disable failover_disable'")
1617
_ = test_run:switch('router_1')
1718
util = require('util')
1819

test/router/retry_reads.result

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ util.map_evals(test_run, {REPLICASET_1, REPLICASET_2}, 'bootstrap_storage(\'memt
2828
_ = test_run:cmd("create server router_1 with script='router/router_1.lua'")
2929
---
3030
...
31-
-- Discovery algorithm changes sometimes and should not affect the
32-
-- read retry decisions.
33-
_ = test_run:cmd("start server router_1 with args='discovery_disable'")
31+
-- Discovery algorithm and failover changes sometimes and should not affect the
32+
-- exponential timeout test.
33+
_ = test_run:cmd("start server router_1 with " .. \
34+
"args='discovery_disable failover_disable'")
3435
---
3536
...
3637
_ = test_run:switch('router_1')
@@ -69,7 +70,7 @@ util.collect_timeouts(rs1)
6970
- - fail: 0
7071
ok: 0
7172
timeout: 0.5
72-
- fail: 1
73+
- fail: 2
7374
ok: 0
7475
timeout: 1
7576
...

test/router/retry_reads.test.lua

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@ util.wait_master(test_run, REPLICASET_1, 'storage_1_a')
1010
util.wait_master(test_run, REPLICASET_2, 'storage_2_a')
1111
util.map_evals(test_run, {REPLICASET_1, REPLICASET_2}, 'bootstrap_storage(\'memtx\')')
1212
_ = test_run:cmd("create server router_1 with script='router/router_1.lua'")
13-
-- Discovery algorithm changes sometimes and should not affect the
14-
-- read retry decisions.
15-
_ = test_run:cmd("start server router_1 with args='discovery_disable'")
13+
-- Discovery algorithm and failover changes sometimes and should not affect the
14+
-- exponential timeout test.
15+
_ = test_run:cmd("start server router_1 with " .. \
16+
"args='discovery_disable failover_disable'")
1617
_ = test_run:switch('router_1')
1718
util = require('util')
1819

test/router/router.result

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1527,7 +1527,8 @@ table.sort(error_messages)
15271527
...
15281528
error_messages
15291529
---
1530-
- - Use replica:check_is_connected(...) instead of replica.check_is_connected(...)
1530+
- - Use replica:call(...) instead of replica.call(...)
1531+
- Use replica:check_is_connected(...) instead of replica.check_is_connected(...)
15311532
- Use replica:detach_conn(...) instead of replica.detach_conn(...)
15321533
- Use replica:is_connected(...) instead of replica.is_connected(...)
15331534
- Use replica:safe_uri(...) instead of replica.safe_uri(...)

test/router/router_1.lua

Lines changed: 0 additions & 1 deletion
This file was deleted.

test/router/router_1.lua

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/usr/bin/env tarantool
2+
3+
require('strict').on()
4+
fiber = require('fiber')
5+
6+
-- Check if we are running under test-run
7+
if os.getenv('ADMIN') then
8+
test_run = require('test_run').new()
9+
require('console').listen(os.getenv('ADMIN'))
10+
end
11+
12+
replicasets = {'cbf06940-0790-498b-948d-042b62cf3d29',
13+
'ac522f65-aa94-4134-9f64-51ee384f1a54'}
14+
15+
-- Call a configuration provider
16+
cfg = dofile('localcfg.lua')
17+
if arg[1] == 'discovery_disable' then
18+
cfg.discovery_mode = 'off'
19+
end
20+
21+
-- Start the database with sharding
22+
vshard = require('vshard')
23+
24+
if arg[2] == 'failover_disable' then
25+
vshard.router.internal.errinj.ERRINJ_FAILOVER_DELAY = true
26+
end
27+
28+
vshard.router.cfg(cfg)
29+
30+
if arg[2] == 'failover_disable' then
31+
while vshard.router.internal.errinj.ERRINJ_FAILOVER_DELAY ~= 'in' do
32+
router.failover_fiber:wakeup()
33+
fiber.sleep(0.01)
34+
end
35+
end

vshard/consts.lua

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ return {
4949
CALL_TIMEOUT_MAX = 64;
5050
FAILOVER_UP_TIMEOUT = 5;
5151
FAILOVER_DOWN_TIMEOUT = 1;
52+
FAILOVER_DOWN_SEQUENTIAL_FAIL = 3;
5253
DEFAULT_FAILOVER_PING_TIMEOUT = 5;
5354
DEFAULT_SYNC_TIMEOUT = 1;
5455
RECONNECT_TIMEOUT = 0.5;

0 commit comments

Comments
 (0)