Skip to content

Commit

Permalink
feat: add tests for processing of health checks and also fix a bug
Browse files Browse the repository at this point in the history
Signed-off-by: Manan Gupta <[email protected]>
  • Loading branch information
GuptaManan100 authored and arthurschreiber committed Nov 8, 2024
1 parent 517f519 commit d77fae7
Show file tree
Hide file tree
Showing 2 changed files with 239 additions and 9 deletions.
20 changes: 11 additions & 9 deletions go/vt/discovery/keyspace_events.go
Original file line number Diff line number Diff line change
Expand Up @@ -381,17 +381,19 @@ func (kss *keyspaceState) onHealthCheck(th *TabletHealth) {
sstate.serving = true
case !th.Serving:
sstate.serving = false
// Once we have seen a non-serving primary healthcheck, there is no need for us to explicitly wait
// for a reparent to happen. We use waitForReparent to ensure that we don't prematurely stop
// buffering when we receive a serving healthcheck from the primary that is being demoted.
// However, if we receive a non-serving check, then we know that we won't receive any more serving
// healthchecks anymore until reparent finishes. Specifically, this helps us when PRS fails, but
// stops gracefully because the new candidate couldn't get caught up in time. In this case, we promote
// the previous primary back. Without turning off waitForReparent here, we wouldn't be able to stop
// buffering for that case.
sstate.waitForReparent = false
}
}
if !th.Serving {
// Once we have seen a non-serving primary healthcheck, there is no need for us to explicitly wait
// for a reparent to happen. We use waitForReparent to ensure that we don't prematurely stop
// buffering when we receive a serving healthcheck from the primary that is being demoted.
// However, if we receive a non-serving check, then we know that we won't receive any more serving
// healthchecks anymore until reparent finishes. Specifically, this helps us when PRS fails, but
// stops gracefully because the new candidate couldn't get caught up in time. In this case, we promote
// the previous primary back. Without turning off waitForReparent here, we wouldn't be able to stop
// buffering for that case.
sstate.waitForReparent = false
}

// if the primary for this shard has been externally reparented, we're undergoing a failover,
// which is considered an availability event. update this shard to point it to the new tablet
Expand Down
228 changes: 228 additions & 0 deletions go/vt/discovery/keyspace_events_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,234 @@ func TestWaitForConsistentKeyspaces(t *testing.T) {
}
}

func TestOnHealthCheck(t *testing.T) {
testcases := []struct {
name string
ss *shardState
th *TabletHealth
wantServing bool
wantWaitForReparent bool
wantExternallyReparented int64
wantUID uint32
}{
{
name: "Non primary tablet health ignored",
ss: &shardState{
serving: false,
waitForReparent: false,
externallyReparented: 10,
currentPrimary: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 1,
},
},
th: &TabletHealth{
Target: &querypb.Target{
TabletType: topodatapb.TabletType_REPLICA,
},
Serving: true,
},
wantServing: false,
wantWaitForReparent: false,
wantExternallyReparented: 10,
wantUID: 1,
}, {
name: "Serving primary seen in non-serving shard",
ss: &shardState{
serving: false,
waitForReparent: false,
externallyReparented: 10,
currentPrimary: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 1,
},
},
th: &TabletHealth{
Target: &querypb.Target{
TabletType: topodatapb.TabletType_PRIMARY,
},
Serving: true,
PrimaryTermStartTime: 20,
Tablet: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 2,
},
},
},
wantServing: true,
wantWaitForReparent: false,
wantExternallyReparented: 20,
wantUID: 2,
}, {
name: "New serving primary seen while waiting for reparent",
ss: &shardState{
serving: false,
waitForReparent: true,
externallyReparented: 10,
currentPrimary: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 1,
},
},
th: &TabletHealth{
Target: &querypb.Target{
TabletType: topodatapb.TabletType_PRIMARY,
},
Serving: true,
PrimaryTermStartTime: 20,
Tablet: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 2,
},
},
},
wantServing: true,
wantWaitForReparent: false,
wantExternallyReparented: 20,
wantUID: 2,
}, {
name: "Old serving primary seen while waiting for reparent",
ss: &shardState{
serving: false,
waitForReparent: true,
externallyReparented: 10,
currentPrimary: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 1,
},
},
th: &TabletHealth{
Target: &querypb.Target{
TabletType: topodatapb.TabletType_PRIMARY,
},
Serving: true,
PrimaryTermStartTime: 10,
Tablet: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 1,
},
},
},
wantServing: false,
wantWaitForReparent: true,
wantExternallyReparented: 10,
wantUID: 1,
}, {
name: "Old non-serving primary seen while waiting for reparent",
ss: &shardState{
serving: false,
waitForReparent: true,
externallyReparented: 10,
currentPrimary: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 1,
},
},
th: &TabletHealth{
Target: &querypb.Target{
TabletType: topodatapb.TabletType_PRIMARY,
},
Serving: false,
PrimaryTermStartTime: 10,
Tablet: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 1,
},
},
},
wantServing: false,
wantWaitForReparent: false,
wantExternallyReparented: 10,
wantUID: 1,
}, {
name: "New serving primary while already serving",
ss: &shardState{
serving: true,
waitForReparent: false,
externallyReparented: 10,
currentPrimary: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 1,
},
},
th: &TabletHealth{
Target: &querypb.Target{
TabletType: topodatapb.TabletType_PRIMARY,
},
Serving: true,
PrimaryTermStartTime: 20,
Tablet: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 2,
},
},
},
wantServing: true,
wantWaitForReparent: false,
wantExternallyReparented: 20,
wantUID: 2,
}, {
name: "Primary goes non serving",
ss: &shardState{
serving: true,
waitForReparent: false,
externallyReparented: 10,
currentPrimary: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 1,
},
},
th: &TabletHealth{
Target: &querypb.Target{
TabletType: topodatapb.TabletType_PRIMARY,
},
Serving: false,
PrimaryTermStartTime: 10,
Tablet: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: testCell,
Uid: 1,
},
},
},
wantServing: false,
wantWaitForReparent: false,
wantExternallyReparented: 10,
wantUID: 1,
},
}

ksName := "ks"
shard := "-80"
kss := &keyspaceState{
mu: sync.Mutex{},
keyspace: ksName,
shards: make(map[string]*shardState),
}
// Adding this so that we don't run any topo calls from ensureConsistentLocked.
kss.moveTablesState = &MoveTablesState{
Typ: MoveTablesRegular,
State: MoveTablesSwitching,
}
for _, tt := range testcases {
t.Run(tt.name, func(t *testing.T) {
kss.shards[shard] = tt.ss
tt.th.Target.Keyspace = ksName
tt.th.Target.Shard = shard
kss.onHealthCheck(tt.th)
require.Equal(t, tt.wantServing, tt.ss.serving)
require.Equal(t, tt.wantWaitForReparent, tt.ss.waitForReparent)
require.Equal(t, tt.wantExternallyReparented, tt.ss.externallyReparented)
require.Equal(t, tt.wantUID, tt.ss.currentPrimary.Uid)
})
}
}

type fakeTopoServer struct {
}

Expand Down

0 comments on commit d77fae7

Please sign in to comment.