Skip to content

Commit

Permalink
Make cluster replicas return ASK and TRYAGAIN (valkey-io#495)
Browse files Browse the repository at this point in the history
After READONLY, make a cluster replica behave as its primary regarding
returning ASK redirects and TRYAGAIN.

Without this patch, a client reading from a replica cannot tell if a key
doesn't exist or if it has already been migrated to another shard as
part of an ongoing slot migration. Therefore, without an ASK redirect in
this situation, offloading reads to cluster replicas wasn't reliable.

Note: The target of a redirect is always a primary. If a client wants to
continue reading from a replica after following a redirect, it needs to
figure out the replicas of that new primary using CLUSTER SHARDS or
similar.

This is related to valkey-io#21 and has been made possible by the introduction of
Replication of Slot Migration States in valkey-io#445.

----

Release notes:

During cluster slot migration, replicas are able to return -ASK
redirects and -TRYAGAIN.

---------

Signed-off-by: Viktor Söderqvist <[email protected]>
  • Loading branch information
zuiderkwast authored May 24, 2024
1 parent 3dd2f5a commit d72ba06
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 7 deletions.
14 changes: 9 additions & 5 deletions src/cluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -1048,10 +1048,12 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int
* can safely serve the request, otherwise we return a TRYAGAIN
* error). To do so we set the importing/migrating state and
* increment a counter for every missing key. */
if (n == myself && getMigratingSlotDest(slot) != NULL) {
migrating_slot = 1;
} else if (getImportingSlotSource(slot) != NULL) {
importing_slot = 1;
if (clusterNodeIsMaster(myself) || c->flags & CLIENT_READONLY) {
if (n == clusterNodeGetMaster(myself) && getMigratingSlotDest(slot) != NULL) {
migrating_slot = 1;
} else if (getImportingSlotSource(slot) != NULL) {
importing_slot = 1;
}
}
} else {
/* If it is not the first key/channel, make sure it is exactly
Expand Down Expand Up @@ -1120,7 +1122,9 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int
/* MIGRATE always works in the context of the local node if the slot
* is open (migrating or importing state). We need to be able to freely
* move keys among instances in this case. */
if ((migrating_slot || importing_slot) && cmd->proc == migrateCommand) return myself;
if ((migrating_slot || importing_slot) && cmd->proc == migrateCommand && clusterNodeIsMaster(myself)) {
return myself;
}

/* If we don't have all the keys and we are migrating the slot, send
* an ASK redirection or TRYAGAIN. */
Expand Down
48 changes: 46 additions & 2 deletions tests/unit/cluster/slot-migration.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica
set R3_id [R 3 CLUSTER MYID]
set R4_id [R 4 CLUSTER MYID]
set R5_id [R 5 CLUSTER MYID]
R 0 SET "{aga}2" banana

test "Slot migration states are replicated" {
# Validate initial states
Expand Down Expand Up @@ -139,8 +140,51 @@ start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica
assert_equal [get_open_slots 3] "\[609->-$R1_id\]"
assert_equal [get_open_slots 4] "\[609-<-$R0_id\]"
catch {[R 3 get aga]} e
assert_equal {MOVED} [lindex [split $e] 0]
assert_equal {609} [lindex [split $e] 1]
set port0 [srv 0 port]
assert_equal "MOVED 609 127.0.0.1:$port0" $e
}

test "Replica of migrating node returns ASK redirect after READONLY" {
# Validate initial states
assert_equal [get_open_slots 0] "\[609->-$R1_id\]"
assert_equal [get_open_slots 1] "\[609-<-$R0_id\]"
assert_equal [get_open_slots 3] "\[609->-$R1_id\]"
assert_equal [get_open_slots 4] "\[609-<-$R0_id\]"
# Read missing key in readonly replica in migrating state.
assert_equal OK [R 3 READONLY]
set port1 [srv -1 port]
catch {[R 3 get aga]} e
assert_equal "ASK 609 127.0.0.1:$port1" $e
assert_equal OK [R 3 READWRITE]
}

test "Replica of migrating node returns TRYAGAIN after READONLY" {
# Validate initial states
assert_equal [get_open_slots 0] "\[609->-$R1_id\]"
assert_equal [get_open_slots 1] "\[609-<-$R0_id\]"
assert_equal [get_open_slots 3] "\[609->-$R1_id\]"
assert_equal [get_open_slots 4] "\[609-<-$R0_id\]"
# Read some existing and some missing keys in readonly replica in
# migrating state results in TRYAGAIN, just like its primary would do.
assert_equal OK [R 3 READONLY]
catch {[R 3 mget "{aga}1" "{aga}2"]} e
assert_match "TRYAGAIN *" $e
assert_equal OK [R 3 READWRITE]
}

test "Replica of importing node returns TRYAGAIN after READONLY and ASKING" {
# Validate initial states
assert_equal [get_open_slots 0] "\[609->-$R1_id\]"
assert_equal [get_open_slots 1] "\[609-<-$R0_id\]"
assert_equal [get_open_slots 3] "\[609->-$R1_id\]"
assert_equal [get_open_slots 4] "\[609-<-$R0_id\]"
# A client follows an ASK redirect to a primary, but wants to read from a replica.
# The replica returns TRYAGAIN just like a primary would do for two missing keys.
assert_equal OK [R 4 READONLY]
assert_equal OK [R 4 ASKING]
catch {R 4 MGET "{aga}1" "{aga}2"} e
assert_match "TRYAGAIN *" $e
assert_equal OK [R 4 READWRITE]
}

test "New replica inherits migrating slot" {
Expand Down

0 comments on commit d72ba06

Please sign in to comment.