From 8622249bc3dc258336191f8ae92f746c2c245b8c Mon Sep 17 00:00:00 2001 From: Markus Alexander Kuppe Date: Thu, 10 Oct 2024 06:52:00 -0700 Subject: [PATCH 1/4] Collect (more detailed) extended statistics from N independent simulation runs. Signed-off-by: Markus Alexander Kuppe --- tla/consensus/SIMCoverageccfraft.R | 18 ------ tla/consensus/SIMCoverageccfraft.cfg | 49 ---------------- tla/consensus/SIMCoverageccfraft.tla | 87 ++++++++++------------------ tla/consensus/SIMccfraft.cfg | 4 ++ tla/consensus/SIMccfraft.tla | 30 ++++++++-- 5 files changed, 61 insertions(+), 127 deletions(-) delete mode 100644 tla/consensus/SIMCoverageccfraft.R delete mode 100644 tla/consensus/SIMCoverageccfraft.cfg diff --git a/tla/consensus/SIMCoverageccfraft.R b/tla/consensus/SIMCoverageccfraft.R deleted file mode 100644 index f7822ce015e9..000000000000 --- a/tla/consensus/SIMCoverageccfraft.R +++ /dev/null @@ -1,18 +0,0 @@ -require(ggplot2) -require(dplyr) - -df <- unique(read.csv(header = TRUE, sep = "#", file = "./SIMCoverageccfraft_S5.csv")) - -# Add a column to df that combines the three columns Spec, P, and C. -df$SpecP <- paste(df$Spec, df$P, df$Q, df$R, sep = "_") - -# Eyeball if all configurations are roughly equally represented. -df %>% group_by(SpecP) %>% summarize(count = n()) -# Count the occurrences of each character sequence in column state -# grouped by column SpecP. -df %>% - group_by(SpecP, state) %>% - summarize(count = n()) %>% - ggplot(aes(x=SpecP, fill=state, y=count)) + - geom_bar(stat="identity") + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) diff --git a/tla/consensus/SIMCoverageccfraft.cfg b/tla/consensus/SIMCoverageccfraft.cfg deleted file mode 100644 index 60ad478208d7..000000000000 --- a/tla/consensus/SIMCoverageccfraft.cfg +++ /dev/null @@ -1,49 +0,0 @@ -SPECIFICATION SIMCoverageSpec - -CONSTANTS - Servers <- Servers_mc - - Nil = Nil - - Follower = L_Follower - Candidate = L_Candidate - Leader = L_Leader - None = L_None - - Active = R_Active - RetirementOrdered = R_RetirementOrdered - RetirementSigned = R_RetirementSigned - RetirementCompleted = R_RetirementCompleted - RetiredCommitted = R_RetiredCommitted - - RequestVoteRequest = M_RequestVoteRequest - RequestVoteResponse = M_RequestVoteResponse - AppendEntriesRequest = M_AppendEntriesRequest - AppendEntriesResponse = M_AppendEntriesResponse - ProposeVoteRequest = M_ProposeVoteRequest - - OrderedNoDup = N_OrderedNoDup - Ordered = N_Ordered - ReorderedNoDup = N_ReorderedNoDup - Reordered = N_Reordered - Guarantee = N_OrderedNoDup - - TypeEntry = T_Entry - TypeSignature = T_Signature - TypeReconfiguration = T_Reconfiguration - TypeRetired = T_Retired - - NodeOne = n1 - NodeTwo = n2 - NodeThree = n3 - NodeFour = n4 - NodeFive = n5 - - Timeout <- SIMTimeout - ChangeConfigurationInt <-SIMChangeConfigurationInt - CheckQuorum <- SIMCheckQuorum - - InitReconfigurationVars <- SIMInitReconfigurationVars - -CONSTRAINT - StatisticsStateConstraint diff --git a/tla/consensus/SIMCoverageccfraft.tla b/tla/consensus/SIMCoverageccfraft.tla index 69f2d0c4c277..abb25be20280 100644 --- a/tla/consensus/SIMCoverageccfraft.tla +++ b/tla/consensus/SIMCoverageccfraft.tla @@ -1,65 +1,40 @@ ----------- MODULE SIMCoverageccfraft ---------- -EXTENDS ccfraft, TLC, Integers, CSV, TLCExt -CONSTANTS - NodeOne, NodeTwo, NodeThree, NodeFour, NodeFive + $ wget https://nightly.tlapl.us/dist/tla2tools.jar + $ wget https://github.com/tlaplus/CommunityModules/releases/latest/download/CommunityModules-deps.jar + ## Run wiht as many workers as you like to parallelize the nested simulation runs (auto uses all your cores). + $ java -jar tla2tools.jar -config SIMCoverageccfraft.tla SIMCoverageccfraft.tla -workers auto -Servers_mc == {NodeOne, NodeTwo, NodeThree, NodeFour, NodeFive} +----------------------------- MODULE SIMCoverageccfraft ----------------------------- +EXTENDS TLC, Naturals, Sequences, IOUtils -Baseline == - {<<"Next", 0..0, 0..0, 0..0>>} +CmdLine == + <<"sh", "-c", + "java " \o + "-XX:+UseParallelGC " \o + "-Dtlc2.tool.impl.Tool.cdot=true " \o + "-Dtlc2.tool.Simulator.extendedStatistics=true " \o + "-jar tla2tools.jar " \o + "-depth 1000 " \o + "-simulate SIMccfraft.tla >> SIMCoverageccfraft.txt 2>&1">> -Confs == - Baseline \cup - ({"SIMNext"} \X {1..1, 1..10, 1..100} \X {201..201, 201..210, 201..300} \X {401..401, 401..410, 401..500}) +----------------------------------------------------------------------------- -VARIABLE conf +VARIABLE c, d -CCF == INSTANCE ccfraft +Init == + /\ c \in [ R: {10, 1000}, C: {10, 1000}, Q: {10, 1000}, T: {10, 1000} ] + /\ d = FALSE -SIMInitReconfigurationVars == - \/ CCF!InitLogConfigServerVars(Servers, JoinedLog) - \/ CCF!InitReconfigurationVars +Next == + /\ ~d + /\ d' = TRUE + /\ PrintT(<<"conf", c>>) + /\ IOEnvExec(c, CmdLine).exitValue = 0 + /\ UNCHANGED c -SIMCheckQuorum(i) == - /\ conf[1] # "Next" => RandomElement(1..1000) \in conf[3] - /\ CCF!CheckQuorum(i) - -SIMChangeConfigurationInt(i, newConfiguration) == - /\ conf[1] # "Next" => RandomElement(1..1000) \in conf[4] - /\ CCF!ChangeConfigurationInt(i, newConfiguration) - -SIMTimeout(i) == - /\ \/ RandomElement(1..1000) \in conf[2] - \/ conf[1] # "Next" - \* Always allow Timeout if no messages are in the network - \* and no node is a candidate or leader. Otherwise, the system - \* will deadlock if 1 # RandomElement(...). - \/ /\ \A s \in Servers: leadershipState[s] \notin {Leader, Candidate} - /\ Network!Messages = {} - /\ CCF!Timeout(i) - -SIMCoverageSpec == - /\ Init - /\ conf \in Confs - /\ [][UNCHANGED conf /\ Next]_<> - ------------------------------------------------------------------------------- - -CSVFile == "SIMCoverageccfraft_S" \o ToString(Cardinality(Servers)) \o ".csv" - -CSVColumnHeaders == - "Spec#P#Q#R#currentTerm#state#commitIndex#log#node" - -ASSUME - CSVRecords(CSVFile) = 0 => - CSVWrite(CSVColumnHeaders, <<>>, CSVFile) - -StatisticsStateConstraint == - (TLCGet("level") > TLCGet("config").depth) => - TLCDefer(\A s \in Servers : CSVWrite("%1$s#%2$s#%3$s#%4$s#%5$s#%6$s#%7$s#%8$s#%9$s", - << conf[1], Cardinality(conf[2]), Cardinality(conf[3]), Cardinality(conf[4]), - currentTerm[s], leadershipState[s], commitIndex[s], Len(log[s]), s - >>, - CSVFile)) ============================================================================= +---- CONFIG SIMCoverageccfraft ---- +INIT Init +NEXT Next +CHECK_DEADLOCK FALSE +==== \ No newline at end of file diff --git a/tla/consensus/SIMccfraft.cfg b/tla/consensus/SIMccfraft.cfg index 0e2dac7a5aac..9c69884beace 100644 --- a/tla/consensus/SIMccfraft.cfg +++ b/tla/consensus/SIMccfraft.cfg @@ -42,6 +42,7 @@ CONSTANTS Timeout <- SIMTimeout ChangeConfigurationInt <-SIMChangeConfigurationInt CheckQuorum <- SIMCheckQuorum + ClientRequest <- SIMClientRequest Fairness <- SIMFairness @@ -53,6 +54,9 @@ CONSTANTS CONSTRAINT StopAfter +_PERIODIC + SerializeTLCStats + CHECK_DEADLOCK FALSE diff --git a/tla/consensus/SIMccfraft.tla b/tla/consensus/SIMccfraft.tla index 269db4c61009..18a1073acb40 100644 --- a/tla/consensus/SIMccfraft.tla +++ b/tla/consensus/SIMccfraft.tla @@ -1,5 +1,5 @@ ---------- MODULE SIMccfraft ---------- -EXTENDS ccfraft, TLC, Integers, IOUtils, MCAliases +EXTENDS ccfraft, TLC, Integers, IOUtils CONSTANTS NodeOne, NodeTwo, NodeThree, NodeFour, NodeFive @@ -16,16 +16,32 @@ SIMInitReconfigurationVars == \* Start with any subset of servers in the active configuration. \/ CCF!InitReconfigurationVars +R == + 1..IF "R" \in DOMAIN IOEnv THEN atoi(IOEnv.R) ELSE 100 + +SIMClientRequest(i) == + /\ 1 = RandomElement(R) + /\ CCF!ClientRequest(i) + +Q == + 1..IF "Q" \in DOMAIN IOEnv THEN atoi(IOEnv.Q) ELSE 100 + SIMCheckQuorum(i) == - /\ 1 = RandomElement(1..10) + /\ 1 = RandomElement(Q) /\ CCF!CheckQuorum(i) +C == + 1..IF "C" \in DOMAIN IOEnv THEN atoi(IOEnv.C) ELSE 100 + SIMChangeConfigurationInt(i, newConfiguration) == - /\ 1 = RandomElement(1..100) + /\ 1 = RandomElement(C) /\ CCF!ChangeConfigurationInt(i, newConfiguration) +T == + 1..IF "T" \in DOMAIN IOEnv THEN atoi(IOEnv.T) ELSE 100 + SIMTimeout(i) == - /\ \/ 1 = RandomElement(1..100) + /\ \/ 1 = RandomElement(T) \* Always allow Timeout if no messages are in the network \* and no node is a candidate or leader. Otherwise, the system \* will deadlock if 1 # RandomElement(...). @@ -65,6 +81,12 @@ StopAfter == (* The smoke test has a time budget of 20 minutes. *) IN TLCSet("exit", TLCGet("duration") > timeout) +SerializeFilename == + "SIMccfraft_" \o "R-" \o ToString(R) \o "_T-" \o ToString(T) \o "_Q-" \o ToString(Q) \o "_C-" \o ToString(C) \o "_ts-" \o ToString(JavaTime) \o ".ndjson" + +SerializeTLCStats == + Serialize(<>, SerializeFilename, [format |-> "NDJSON", charset |-> "UTF-8", openOptions |-> <<"WRITE", "CREATE", "APPEND">>]) + DebugInvUpToDepth == \* The following invariant causes TLC to terminate with a counterexample of length \* -depth after generating the first trace. From 59922ca899d488dbe20341734bb60d96d377e664 Mon Sep 17 00:00:00 2001 From: Markus Alexander Kuppe Date: Thu, 10 Oct 2024 07:04:58 -0700 Subject: [PATCH 2/4] Change StopAfter from a state constraint to a periodic hook. Resolution will now be limited to the frequency of the periodic interval because TLC will no longer evaluate StopAfter for every state. Signed-off-by: Markus Alexander Kuppe --- tla/consensus/SIMccfraft.cfg | 5 +---- tla/consensus/SIMccfraft.tla | 11 +++++++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tla/consensus/SIMccfraft.cfg b/tla/consensus/SIMccfraft.cfg index 9c69884beace..26d2b1f51e06 100644 --- a/tla/consensus/SIMccfraft.cfg +++ b/tla/consensus/SIMccfraft.cfg @@ -51,11 +51,8 @@ CONSTANTS Extend <- [abs]ABSExtend CopyMaxAndExtend <- [abs]ABSCopyMaxAndExtend -CONSTRAINT - StopAfter - _PERIODIC - SerializeTLCStats + Periodically CHECK_DEADLOCK FALSE diff --git a/tla/consensus/SIMccfraft.tla b/tla/consensus/SIMccfraft.tla index 18a1073acb40..95ea8805cfb5 100644 --- a/tla/consensus/SIMccfraft.tla +++ b/tla/consensus/SIMccfraft.tla @@ -74,11 +74,12 @@ SIMFairness == \E newConfiguration \in SUBSET(Servers) \ {{}}: WF_vars(CCF!ChangeConfigurationInt(s, newConfiguration)) -\* The state constraint StopAfter stops TLC after the alloted +---- + +\* StopAfter stops TLC after the alloted \* time budget is up, unless TLC encounters an error first. StopAfter == LET timeout == IF ("SIM_TIMEOUT" \in DOMAIN IOEnv) /\ IOEnv.SIM_TIMEOUT # "" THEN atoi(IOEnv.SIM_TIMEOUT) ELSE 1200 - (* The smoke test has a time budget of 20 minutes. *) IN TLCSet("exit", TLCGet("duration") > timeout) SerializeFilename == @@ -87,6 +88,12 @@ SerializeFilename == SerializeTLCStats == Serialize(<>, SerializeFilename, [format |-> "NDJSON", charset |-> "UTF-8", openOptions |-> <<"WRITE", "CREATE", "APPEND">>]) +Periodically == + /\ StopAfter + /\ SerializeTLCStats + +---- + DebugInvUpToDepth == \* The following invariant causes TLC to terminate with a counterexample of length \* -depth after generating the first trace. From 4847693fb550dc330a4d457be5314ac40816bcd6 Mon Sep 17 00:00:00 2001 From: Markus Alexander Kuppe Date: Thu, 10 Oct 2024 07:12:00 -0700 Subject: [PATCH 3/4] Adjust the probabilities based on empirical data to achieve less uneven coverage of the action space. Signed-off-by: Markus Alexander Kuppe --- tla/consensus/SIMccfraft.tla | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tla/consensus/SIMccfraft.tla b/tla/consensus/SIMccfraft.tla index 95ea8805cfb5..d9c234e89cf6 100644 --- a/tla/consensus/SIMccfraft.tla +++ b/tla/consensus/SIMccfraft.tla @@ -16,29 +16,29 @@ SIMInitReconfigurationVars == \* Start with any subset of servers in the active configuration. \/ CCF!InitReconfigurationVars -R == - 1..IF "R" \in DOMAIN IOEnv THEN atoi(IOEnv.R) ELSE 100 +LOCAL R == + 1..IF "R" \in DOMAIN IOEnv THEN atoi(IOEnv.R) ELSE 10 SIMClientRequest(i) == /\ 1 = RandomElement(R) /\ CCF!ClientRequest(i) -Q == - 1..IF "Q" \in DOMAIN IOEnv THEN atoi(IOEnv.Q) ELSE 100 +LOCAL Q == + 1..IF "Q" \in DOMAIN IOEnv THEN atoi(IOEnv.Q) ELSE 10 SIMCheckQuorum(i) == /\ 1 = RandomElement(Q) /\ CCF!CheckQuorum(i) -C == - 1..IF "C" \in DOMAIN IOEnv THEN atoi(IOEnv.C) ELSE 100 +LOCAL C == + 1..IF "C" \in DOMAIN IOEnv THEN atoi(IOEnv.C) ELSE 10 SIMChangeConfigurationInt(i, newConfiguration) == /\ 1 = RandomElement(C) /\ CCF!ChangeConfigurationInt(i, newConfiguration) -T == - 1..IF "T" \in DOMAIN IOEnv THEN atoi(IOEnv.T) ELSE 100 +LOCAL T == + 1..IF "T" \in DOMAIN IOEnv THEN atoi(IOEnv.T) ELSE 1000 SIMTimeout(i) == /\ \/ 1 = RandomElement(T) From 6ea22ff208ac2ca182a92ca775b9c9343c7a1fa6 Mon Sep 17 00:00:00 2001 From: Markus Alexander Kuppe Date: Tue, 15 Oct 2024 08:36:39 -0700 Subject: [PATCH 4/4] Update tla/consensus/SIMCoverageccfraft.tla Co-authored-by: Amaury Chamayou --- tla/consensus/SIMCoverageccfraft.tla | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tla/consensus/SIMCoverageccfraft.tla b/tla/consensus/SIMCoverageccfraft.tla index abb25be20280..83f3d02bd82b 100644 --- a/tla/consensus/SIMCoverageccfraft.tla +++ b/tla/consensus/SIMCoverageccfraft.tla @@ -1,7 +1,7 @@ $ wget https://nightly.tlapl.us/dist/tla2tools.jar $ wget https://github.com/tlaplus/CommunityModules/releases/latest/download/CommunityModules-deps.jar - ## Run wiht as many workers as you like to parallelize the nested simulation runs (auto uses all your cores). + ## Run with as many workers as you like to parallelize the nested simulation runs (auto uses all your cores). $ java -jar tla2tools.jar -config SIMCoverageccfraft.tla SIMCoverageccfraft.tla -workers auto ----------------------------- MODULE SIMCoverageccfraft -----------------------------