Skip to content

Commit

Permalink
1. add ready phase to etcd raft spec
Browse files Browse the repository at this point in the history
2. update trace validation to include ready phase

Signed-off-by: Joshua Zhang <[email protected]>
  • Loading branch information
joshuazh-x committed Oct 15, 2024
1 parent d6c0213 commit d0a1a11
Show file tree
Hide file tree
Showing 16 changed files with 1,120 additions and 5,492 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ verify-mod-tidy:
verify-genproto:
PASSES="genproto" ./scripts/test.sh

.PHONY: trace-validation
trace-validation:
PASSES="trace-validation" ./scripts/test.sh

.PHONY: test
test:
PASSES="unit" ./scripts/test.sh $(GO_TEST_FLAGS)
2 changes: 2 additions & 0 deletions bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,7 @@ func (rn *RawNode) Bootstrap(peers []Peer) error {
for _, peer := range peers {
rn.raft.applyConfChange(pb.ConfChange{NodeID: peer.ID, Type: pb.ConfChangeAddNode}.AsV2())
}

traceBootstrap(rn.raft)
return nil
}
4 changes: 2 additions & 2 deletions raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -1963,6 +1963,8 @@ func (r *raft) applyConfChange(cc pb.ConfChangeV2) pb.ConfState {
panic(err)
}

traceConfChangeEvent(cfg, r)

return r.switchToConfig(cfg, trk)
}

Expand All @@ -1973,8 +1975,6 @@ func (r *raft) applyConfChange(cc pb.ConfChangeV2) pb.ConfState {
//
// The inputs usually result from restoring a ConfState or applying a ConfChange.
func (r *raft) switchToConfig(cfg tracker.Config, trk tracker.ProgressMap) pb.ConfState {
traceConfChangeEvent(cfg, r)

r.trk.Config = cfg
r.trk.Progress = trk

Expand Down
2 changes: 2 additions & 0 deletions rawnode.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,8 @@ func (rn *RawNode) Advance(_ Ready) {
rn.stepsOnAdvance[i] = pb.Message{}
}
rn.stepsOnAdvance = rn.stepsOnAdvance[:0]

traceAdvance(rn.raft)
}

// Status returns the current status of the given group. This allocates, see
Expand Down
14 changes: 14 additions & 0 deletions state_trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type stateMachineEventType int

const (
rsmInitState stateMachineEventType = iota
rsmBootstrap
rsmBecomeCandidate
rsmBecomeFollower
rsmBecomeLeader
Expand All @@ -38,6 +39,7 @@ const (
rsmChangeConf
rsmApplyConfChange
rsmReady
rsmAdvance
rsmSendAppendEntriesRequest
rsmReceiveAppendEntriesRequest
rsmSendAppendEntriesResponse
Expand All @@ -53,6 +55,7 @@ const (
func (e stateMachineEventType) String() string {
return []string{
"InitState",
"Bootstrap",
"BecomeCandidate",
"BecomeFollower",
"BecomeLeader",
Expand All @@ -61,6 +64,7 @@ func (e stateMachineEventType) String() string {
"ChangeConf",
"ApplyConfChange",
"Ready",
"Advance",
"SendAppendEntriesRequest",
"ReceiveAppendEntriesRequest",
"SendAppendEntriesResponse",
Expand All @@ -86,6 +90,7 @@ type TracingEvent struct {
State TracingState `json:"state"`
Role string `json:"role"`
LogSize uint64 `json:"log"`
Applied uint64 `json:"applied"`
Conf [2][]string `json:"conf"`
Message *TracingMessage `json:"msg,omitempty"`
ConfChange *TracingConfChange `json:"cc,omitempty"`
Expand Down Expand Up @@ -173,6 +178,7 @@ func traceEvent(evt stateMachineEventType, r *raft, m *raftpb.Message, prop map[
NodeID: strconv.FormatUint(r.id, 10),
State: makeTracingState(r),
LogSize: r.raftLog.lastIndex(),
Applied: r.raftLog.applied,
Conf: [2][]string{formatConf(r.trk.Voters[0].Slice()), formatConf(r.trk.Voters[1].Slice())},
Role: r.state.String(),
Message: makeTracingMessage(m),
Expand Down Expand Up @@ -206,10 +212,18 @@ func traceInitState(r *raft) {
traceNodeEvent(rsmInitState, r)
}

func traceBootstrap(r *raft) {
traceNodeEvent(rsmBootstrap, r)
}

func traceReady(r *raft) {
traceNodeEvent(rsmReady, r)
}

func traceAdvance(r *raft) {
traceNodeEvent(rsmAdvance, r)
}

func traceCommit(r *raft) {
traceNodeEvent(rsmCommit, r)
}
Expand Down
4 changes: 4 additions & 0 deletions state_trace_nop.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,12 @@ type TracingEvent struct{}

func traceInitState(*raft) {}

func traceBootstrap(*raft) {}

func traceReady(*raft) {}

func traceAdvance(*raft) {}

func traceCommit(*raft) {}

func traceReplicate(*raft, ...raftpb.Entry) {}
Expand Down
33 changes: 15 additions & 18 deletions tla/MCetcdraft.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -12,33 +12,31 @@
\* See the License for the specific language governing permissions and
\* limitations under the License.
\*
SPECIFICATION mc_etcdSpec
SPECIFICATION MCSpec

CONSTANTS
s1 = 1
s2 = 2
s3 = 3
s4 = 4
s5 = 5
s1 = s1
s2 = s2
s3 = s3
s4 = s4
s5 = s5

InitServer = {s1, s2, s3}
Server = {s1, s2, s3, s4}

ReconfigurationLimit = 2
MaxTermLimit = 10
RequestLimit = 5

Timeout <- MCTimeout
Send <- MCSend
ClientRequest <- MCClientRequest
AddNewServer <- MCAddNewServer
DeleteServer <- MCDeleteServer
AddLearner <- MCAddLearner

MaxTermLimit = 3
RequestLimit = 3
MaxRestart = 2
MaxStepDownToFollower = 1

InitServerVars <- etcdInitServerVars
InitLogVars <- etcdInitLogVars
InitConfigVars <- etcdInitConfigVars

IsEnabled <- MCIsEnabled
PostAction <- MCPostAction

Nil = 0

ValueEntry = "ValueEntry"
Expand All @@ -65,5 +63,4 @@ INVARIANTS
LogMatchingInv
QuorumLogInv
MoreUpToDateCorrectInv
LeaderCompletenessInv
CommittedIsDurableInv
LeaderCompletenessInv
144 changes: 80 additions & 64 deletions tla/MCetcdraft.tla
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,39 @@
\* limitations under the License.
\*

EXTENDS etcdraft
EXTENDS etcdraft_control

CONSTANTS s1,s2,s3,s4,s5

CONSTANT ReconfigurationLimit
ASSUME ReconfigurationLimit \in Nat

CONSTANT MaxTermLimit
ASSUME MaxTermLimit \in Nat

CONSTANT MaxRestart
ASSUME MaxRestart \in Nat

\* Limit on client requests
CONSTANT RequestLimit
ASSUME RequestLimit \in Nat

etcd == INSTANCE etcdraft
CONSTANT MaxStepDownToFollower
ASSUME MaxStepDownToFollower \in Nat

etcd_control == INSTANCE etcdraft_control

VARIABLE restartCount
VARIABLE stepDownCount
constaintVars == <<restartCount, stepDownCount>>

mcVars == << vars, constaintVars >>

ChangeOneVar(_vars, index, op(_)) ==
IF op(index) THEN
UNCHANGED RemoveAt(_vars, index)
ELSE
UNCHANGED _vars

\* Application uses Node (instead of RawNode) will have multiple ConfigEntry entries appended to log in bootstrapping.
BootstrapLog ==
Expand All @@ -42,79 +62,75 @@ etcdInitServerVars == /\ currentTerm = [i \in Server |-> IF i \in InitServer TH
/\ votedFor = [i \in Server |-> Nil]
etcdInitLogVars == /\ log = [i \in Server |-> IF i \in InitServer THEN BootstrapLog ELSE <<>>]
/\ commitIndex = [i \in Server |-> IF i \in InitServer THEN Cardinality(InitServer) ELSE 0]
/\ applied = [i \in Server |-> 0]
etcdInitConfigVars == /\ config = [i \in Server |-> [ jointConfig |-> IF i \in InitServer THEN <<InitServer, {}>> ELSE <<{}, {}>>, learners |-> {}]]
/\ reconfigCount = 0 \* the bootstrap configuraitons are not counted
/\ appliedConfChange = [i \in Server |-> Len(BootstrapLog)]

\* This file controls the constants as seen below.
\* In addition to basic settings of how many nodes are to be model checked,
\* the model allows to place additional limitations on the state space of the program.

\* Limit the # of reconfigurations to ReconfigurationLimit
MCAddNewServer(i, j) ==
/\ reconfigCount < ReconfigurationLimit
/\ etcd!AddNewServer(i, j)
MCDeleteServer(i, j) ==
/\ reconfigCount < ReconfigurationLimit
/\ etcd!DeleteServer(i, j)
MCAddLearner(i, j) ==
/\ reconfigCount < ReconfigurationLimit
/\ etcd!AddLearner(i, j)

\* Limit the terms that can be reached. Needs to be set to at least 3 to
\* evaluate all relevant states. If set to only 2, the candidate_quorum
\* constraint below is too restrictive.
MCTimeout(i) ==
\* Limit the term of each server to reduce state space
/\ currentTerm[i] < MaxTermLimit
\* Limit max number of simultaneous candidates
\* We made several restrictions to the state space of Raft. However since we
\* made these restrictions, Deadlocks can occur at places that Raft would in
\* real-world deployments handle graciously.
\* One example of this is if a Quorum of nodes becomes Candidate but can not
\* timeout anymore since we constrained the terms. Then, an artificial Deadlock
\* is reached. We solve this below. If TermLimit is set to any number >2, this is
\* not an issue since breadth-first search will make sure that a similar
\* situation is simulated at term==1 which results in a term increase to 2.
/\ Cardinality({ s \in GetConfig(i) : state[s] = Candidate}) < 1
/\ etcd!Timeout(i)

\* Limit number of requests (new entries) that can be made
MCClientRequest(i, v) ==
\* Allocation-free variant of Len(SelectSeq(log[i], LAMBDA e: e.contentType = TypeEntry)) < RequestLimit
/\ FoldSeq(LAMBDA e, count: IF e.type = ValueEntry THEN count + 1 ELSE count, 0, log[i]) < RequestLimit
/\ etcd!ClientRequest(i, v)

\* Limit how many identical append entries messages each node can send to another
\* Limit number of duplicate messages sent to the same server
MCSend(msg) ==
\* One AppendEntriesRequest per node-pair at a time:
\* a) No AppendEntries request from i to j.
/\ ~ \E n \in DOMAIN messages \union DOMAIN pendingMessages:
/\ n.mdest = msg.mdest
/\ n.msource = msg.msource
/\ n.mterm = msg.mterm
/\ n.mtype = AppendEntriesRequest
/\ msg.mtype = AppendEntriesRequest
\* b) No (corresponding) AppendEntries response from j to i.
/\ ~ \E n \in DOMAIN messages \union DOMAIN pendingMessages:
/\ n.mdest = msg.msource
/\ n.msource = msg.mdest
/\ n.mterm = msg.mterm
/\ n.mtype = AppendEntriesResponse
/\ msg.mtype = AppendEntriesRequest
/\ etcd!Send(msg)

mc_etcdSpec ==
/\ Init
/\ [][NextDynamic]_vars

\* Check the # of reconfigurations and return true if it is lower than ReconfigurationLimit
UnderReconfigLimit ==
LET leaders == {i \in Server : state[i] = Leader}
IN
\E i \in leaders :
/\ \A j \in leaders \ {i} : Len(log[i]) >= Len(log[j])
/\ ReconfigurationLimit > FoldSeq(LAMBDA x, y: IF x.type = ConfigEntry THEN 1 ELSE 0, -Cardinality(InitServer), log[i])

MCIsEnabled(action, args) ==
CASE action \in { "AddNewServer", "DeleteServer", "AddLearner" } -> UnderReconfigLimit
[] action = "Restart" -> restartCount < MaxRestart
[] action = "Timeout" ->
\* Limit the term of each server to reduce state space
/\ currentTerm[args[1]] < MaxTermLimit
\* Limit max number of simultaneous candidates
\* We made several restrictions to the state space of Raft. However since we
\* made these restrictions, Deadlocks can occur at places that Raft would in
\* real-world deployments handle graciously.
\* One example of this is if a Quorum of nodes becomes Candidate but can not
\* timeout anymore since we constrained the terms. Then, an artificial Deadlock
\* is reached. We solve this below. If TermLimit is set to any number >2, this is
\* not an issue since breadth-first search will make sure that a similar
\* situation is simulated at term==1 which results in a term increase to 2.
/\ Cardinality({ s \in GetConfig(args[1]) : state[s] = Candidate}) < 1
[] action = "ClientRequest" ->
\* Allocation-free variant of Len(SelectSeq(log[i], LAMBDA e: e.contentType = TypeEntry)) < RequestLimit
FoldSeq(LAMBDA e, count: IF e.type = ValueEntry THEN count + 1 ELSE count, 0, log[args[1]]) < RequestLimit
[] action = "Ready" ->
\* If there is no new message and no state change, we can skip Advance to reduce state spece
\* to be explored
LET rd == ReadyData(args[1])
IN
\/ rd.msgs /= EmptyBag
\/ DurableStateFromReady(rd) /= durableState[args[1]]
[] action = "StepDownToFollower" -> stepDownCount < MaxStepDownToFollower
[] OTHER -> etcd_control!IsEnabled(action, args)

MCPostAction(action, args) ==
CASE action = "Restart" ->
/\ restartCount' = restartCount + 1
/\ UNCHANGED <<stepDownCount>>
[] action = "StepDownToFollower" ->
/\ stepDownCount' = stepDownCount + 1
/\ UNCHANGED <<restartCount>>
[] OTHER -> UNCHANGED <<constaintVars>>
\* Symmetry set over possible servers. May dangerous and is only enabled
\* via the Symmetry option in cfg file.
Symmetry == Permutations(Server)

\* Include all variables in the view, which is similar to defining no view.
View == << vars >>
View == mcVars

----
InitConstraintVars ==
/\ restartCount = 0
/\ stepDownCount = 0

MCInit ==
/\ Init
/\ InitConstraintVars

MCSpec == MCInit /\ [][Controlled_Next]_mcVars

----
===================================
5 changes: 5 additions & 0 deletions tla/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ The TLA+ spec defines the desired behaviors of the model. To validate the correc
./validate-model.sh -s ./MCetcdraft.tla -c ./MCetcdraft.cfg
```

You can also add `-m` option to run model checking in simulation mode, which will randomly walk in the state machine and is helpful for finding issues quickly.

```console
./validate-model.sh -m -s ./MCetcdraft.tla -c ./MCetcdraft.cfg
```

## Validate Collected Traces
With above example trace logger, validate.sh can be used to validate traces parallelly.
Expand Down
Loading

0 comments on commit d0a1a11

Please sign in to comment.