Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add issue about using sync spaces with wrong failover #2131

Merged
merged 4 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ Added

- New Failover API function ``set_options`` to change failover internal params.

- Issue about sync spaces usage with a wrong failover setup.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Changed
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
3 changes: 3 additions & 0 deletions cartridge/confapplier.lua
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ local cluster_cookie = require('cartridge.cluster-cookie')
local ClusterwideConfig = require('cartridge.clusterwide-config')
local logging_whitelist = require('cartridge.logging_whitelist')
local invalid_format = require('cartridge.invalid-format')
local sync_spaces = require('cartridge.sync-spaces')

yaml.cfg({
encode_load_metatables = false,
Expand Down Expand Up @@ -558,12 +559,14 @@ local function boot_instance(clusterwide_config)
-- It recovers snapshot
-- Or bootstraps replication
invalid_format.start_check()
sync_spaces.start_check()
local snap1 = hotreload.snap_fibers()

box.cfg(box_opts)
local snap2 = hotreload.snap_fibers()
hotreload.whitelist_fibers(hotreload.diff(snap1, snap2))
invalid_format.end_check()
sync_spaces.end_check()
require('membership.options').SUSPICIOUSNESS = true

local username = cluster_cookie.username()
Expand Down
18 changes: 18 additions & 0 deletions cartridge/failover.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,14 @@ local function is_suppressed()
return vars.failover_suppressed
end

--- Check if failover synchro mode enabled.
-- @function is_synchro_mode_enabled
-- @local
-- @treturn boolean true / false
local function is_synchro_mode_enabled()
return vars.enable_synchro_mode
end

--- Check if current configuration implies consistent switchover.
-- @function consistency_needed
-- @local
Expand All @@ -1033,6 +1041,14 @@ local function consistency_needed()
return vars.consistency_needed
end

--- Get failover mode.
-- @function mode
-- @local
-- @treturn string
local function mode()
return vars.mode
end

--- Get current stateful failover coordinator
-- @function get_coordinator
-- @treturn[1] table coordinator
Expand Down Expand Up @@ -1182,6 +1198,8 @@ return {
is_rw = is_rw,
is_paused = is_paused,
is_suppressed = is_suppressed,
is_synchro_mode_enabled = is_synchro_mode_enabled,
mode = mode,

force_inconsistency = force_inconsistency,
wait_consistency = wait_consistency,
Expand Down
21 changes: 19 additions & 2 deletions cartridge/issues.lua
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ local failover = require('cartridge.failover')
local confapplier = require('cartridge.confapplier')
local lua_api_proxy = require('cartridge.lua-api.proxy')
local invalid_format = require('cartridge.invalid-format')
local sync_spaces = require('cartridge.sync-spaces')

local ValidateConfigError = errors.new_class('ValidateConfigError')

Expand Down Expand Up @@ -305,6 +306,23 @@ local function list_on_instance(opts)
})
end

local sync_spaces_list = sync_spaces.spaces_list_str()
if sync_spaces_list ~= ''
and (failover.is_leader()
and (failover.mode() == 'eventual'
or (failover.mode() == 'stateful' and not failover.is_synchro_mode_enabled()))) then
table.insert(ret, {
level = 'warning',
topic = 'failover',
instance_uuid = instance_uuid,
replicaset_uuid = replicaset_uuid,
message = 'Having sync spaces may cause failover errors. ' ..
'Consider to change failover type to stateful and enable synchro_mode or use ' ..
'raft failover mode. Sync spaces: ' .. sync_spaces_list
})
end


-- It should be a vclockkeeper, but it's not
if failover.consistency_needed()
and failover.get_active_leaders()[replicaset_uuid] == instance_uuid
Expand Down Expand Up @@ -557,8 +575,7 @@ local function list_on_cluster()

-- Check stateful failover issues

local failover_cfg = topology.get_failover_params(topology_cfg)
if failover_cfg.mode == 'stateful' then
if failover.mode() == 'stateful' then
local coordinator, err = failover.get_coordinator()

if err ~= nil then
Expand Down
75 changes: 75 additions & 0 deletions cartridge/sync-spaces.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
local log = require('log')
local vars = require('cartridge.vars').new('cartridge.sync-spaces')

vars:new('sync_spaces', {})

local function check_sync(old, new)
if new == nil then
vars.sync_spaces[old[3]] = nil
return
end
local name = new[3]

if new[6].is_sync then
vars.sync_spaces[name] = true
else
vars.sync_spaces[name] = nil
end
end

local function before_replace(old, new)
check_sync(old, new)
return new
end

local function on_schema_init()
box.space._space:before_replace(before_replace)
end

--- List sync spaces.
--
-- @function spaces_list_str
-- @treturn string String of spaces with invalid params, delimeted by comma
local function spaces_list_str()
local res = ''
for name, _ in pairs(vars.sync_spaces) do
res = res .. name .. ', '
end
return res:sub(1, -3)
end

--- Start check if spaces have invalid format in Tarantool 2.x.x.
-- In Tarantool 1.10 you can perform check by youself with function `run_check`.
--
-- @function start_check
local function start_check()
if box.ctl.on_schema_init ~= nil then
box.ctl.on_schema_init(on_schema_init)
end
end


--- Remove set triggers and write message to log.
--
-- @function end_check
local function end_check()
if box.ctl.on_schema_init ~= nil then
box.space._space:before_replace(nil, before_replace)
box.ctl.on_schema_init(nil, on_schema_init)
end

if next(vars.sync_spaces) then
log.warn(
"Having sync spaces may cause failover errors. " ..
"Consider to change failover type to stateful and enable synchro_mode or use " ..
"raft failover mode. Sync spaces: " ..
spaces_list_str()
)
end
end

return {
start_check = start_check,
end_check = end_check,
spaces_list_str = spaces_list_str,
}
33 changes: 33 additions & 0 deletions test/integration/failover_eventual_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,39 @@ g.after_test('test_sigstop', function()
cluster:wait_until_healthy()
end)


function g.test_sync_spaces_is_prohibited()
t.skip_if(not helpers.tarantool_version_ge('2.6.1'))
local master = cluster:server('storage-1')
master:exec(function()
box.schema.space.create('test', {if_not_exists = true, is_sync=true})
end)

master:restart()

helpers.retrying({}, function()
t.assert_items_equals(helpers.list_cluster_issues(master), {
{
level = 'warning',
topic = 'failover',
message = 'Having sync spaces may cause failover errors. ' ..
'Consider to change failover type to stateful and enable synchro_mode or use ' ..
'raft failover mode. Sync spaces: test',
instance_uuid = master.instance_uuid,
replicaset_uuid = master.replicaset_uuid,
},
})
end)
end

g.after_test('test_sync_spaces_is_prohibited', function()
cluster:server('storage-1'):exec(function()
box.space.test:drop()
end)
cluster:server('storage-1'):restart()
end)


local function failover_pause()
cluster.main_server:graphql({
query = [[
Expand Down