From 2c4a32b9b2efeb0299ff279c0281c1d1a42a28c3 Mon Sep 17 00:00:00 2001
From: Benjamin Samuels <1222451+bsamuels453@users.noreply.github.com>
Date: Thu, 11 Jan 2024 08:15:15 -0800
Subject: [PATCH 1/6] add artifact writer

---
 pkg/artifacts/artifacts.go             |  74 ++++++++++++
 pkg/artifacts/types.go                 |   1 +
 pkg/health/checker.go                  |  26 ++--
 pkg/health/ethereum/consensus.go       |   2 +-
 pkg/health/ethereum/execution_rpc.go   |  94 ++++++++++++++-
 pkg/health/ethereum/network_checker.go | 158 ++++++++++---------------
 pkg/health/types/types.go              |  24 +++-
 pkg/runtime.go                         |  87 ++------------
 8 files changed, 277 insertions(+), 189 deletions(-)
 create mode 100644 pkg/artifacts/artifacts.go
 create mode 100644 pkg/artifacts/types.go

diff --git a/pkg/artifacts/artifacts.go b/pkg/artifacts/artifacts.go
new file mode 100644
index 0000000..6f619e0
--- /dev/null
+++ b/pkg/artifacts/artifacts.go
@@ -0,0 +1,74 @@
+package artifacts
+
+import (
+	chaosMesh "attacknet/cmd/pkg/chaos-mesh"
+	"attacknet/cmd/pkg/health"
+	healthTypes "attacknet/cmd/pkg/health/types"
+	"attacknet/cmd/pkg/types"
+	"errors"
+	"fmt"
+	"github.com/kurtosis-tech/stacktrace"
+	log "github.com/sirupsen/logrus"
+	"gopkg.in/yaml.v3"
+	"os"
+	path2 "path"
+	"time"
+)
+
+type TestArtifact struct {
+	TestDescription    string                         `yaml:"test_description"`
+	ContainersTargeted []string                       `yaml:"fault_injection_targets"`
+	TestPassed         bool                           `yaml:"test_passed"`
+	HealthResult       *healthTypes.HealthCheckResult `yaml:"health_check_results"`
+}
+
+func BuildTestArtifact(
+	healthResults *healthTypes.HealthCheckResult,
+	podsUnderTest []*chaosMesh.PodUnderTest,
+	test types.SuiteTest,
+) *TestArtifact {
+
+	var containersTargeted []string
+	for _, p := range podsUnderTest {
+		containersTargeted = append(containersTargeted, p.GetName())
+	}
+
+	testPassed := health.AllChecksPassed(healthResults)
+
+	return &TestArtifact{
+		test.TestName,
+		containersTargeted,
+		testPassed,
+		healthResults,
+	}
+}
+
+func SerializeTestArtifacts(artifacts []*TestArtifact) error {
+	artifactFilename := fmt.Sprintf("results-%d.yaml", time.Now().UnixMilli())
+
+	cwd, err := os.Getwd()
+	if err != nil {
+		return err
+	}
+	path := path2.Join(cwd, "artifacts")
+
+	if _, err := os.Stat(path); errors.Is(err, os.ErrNotExist) {
+		err := os.Mkdir(path, os.ModePerm)
+		if err != nil {
+			log.Println(err)
+		}
+	}
+
+	artifactPath := path2.Join(path, artifactFilename)
+	bs, err := yaml.Marshal(artifacts)
+	if err != nil {
+		return stacktrace.Propagate(err, "could not marshal test artifacts")
+	}
+
+	err = os.WriteFile(artifactPath, bs, 0600)
+	if err != nil {
+		return stacktrace.Propagate(err, "could not write artifacts to %s", artifactPath)
+	}
+	log.Infof("Wrote test artifact to %s", artifactPath)
+	return nil
+}
diff --git a/pkg/artifacts/types.go b/pkg/artifacts/types.go
new file mode 100644
index 0000000..33378d0
--- /dev/null
+++ b/pkg/artifacts/types.go
@@ -0,0 +1 @@
+package artifacts
diff --git a/pkg/health/checker.go b/pkg/health/checker.go
index 6161ba9..74cdc53 100644
--- a/pkg/health/checker.go
+++ b/pkg/health/checker.go
@@ -33,16 +33,18 @@ func BuildHealthChecker(cfg *confTypes.ConfigParsed, kubeClient *kubernetes.Kube
 	return &CheckOrchestrator{checkerImpl: checkerImpl, gracePeriod: healthCheckConfig.GracePeriod}, nil
 }
 
-func (hc *CheckOrchestrator) RunChecks(ctx context.Context) ([]*types.CheckResult, error) {
+func (hc *CheckOrchestrator) RunChecks(ctx context.Context) (*types.HealthCheckResult, error) {
 	start := time.Now()
 	latestAllowable := start.Add(hc.gracePeriod)
 	log.Infof("Allowing up to %.0f seconds for health checks to pass on all nodes", hc.gracePeriod.Seconds())
 
+	lastHealthCheckResult := &types.HealthCheckResult{}
 	for {
-		results, err := hc.checkerImpl.RunAllChecks(ctx)
+		results, err := hc.checkerImpl.RunAllChecks(ctx, lastHealthCheckResult)
 		if err != nil {
 			return nil, err
 		}
+		lastHealthCheckResult = results
 		if AllChecksPassed(results) {
 			timeToPass := time.Since(start).Seconds()
 			pctGraceUsed := timeToPass / hc.gracePeriod.Seconds() * 100
@@ -52,7 +54,7 @@ func (hc *CheckOrchestrator) RunChecks(ctx context.Context) ([]*types.CheckResul
 
 		if time.Now().After(latestAllowable) {
 			log.Warn("Grace period elapsed and a health check is still failing")
-			return results, stacktrace.NewError("tests failed")
+			return results, nil
 		} else {
 			log.Warn("Health checks failed but still in grace period")
 			time.Sleep(1 * time.Second)
@@ -60,11 +62,19 @@ func (hc *CheckOrchestrator) RunChecks(ctx context.Context) ([]*types.CheckResul
 	}
 }
 
-func AllChecksPassed(checks []*types.CheckResult) bool {
-	for _, r := range checks {
-		if len(r.PodsFailing) != 0 {
-			return false
-		}
+func AllChecksPassed(checks *types.HealthCheckResult) bool {
+	if len(checks.LatestElBlockResult.FailingClientsReportedBlock) > 0 {
+		return false
+	}
+	if len(checks.LatestElBlockResult.FailingClientsReportedHash) > 0 {
+		return false
 	}
+	if len(checks.FinalizedElBlockResult.FailingClientsReportedBlock) > 0 {
+		return false
+	}
+	if len(checks.FinalizedElBlockResult.FailingClientsReportedHash) > 0 {
+		return false
+	}
+
 	return true
 }
diff --git a/pkg/health/ethereum/consensus.go b/pkg/health/ethereum/consensus.go
index 33ccc8b..7e03067 100644
--- a/pkg/health/ethereum/consensus.go
+++ b/pkg/health/ethereum/consensus.go
@@ -16,7 +16,7 @@ type ClientForkChoice struct {
 	BlockHash   string
 }
 
-func getExecNetworkConsensus(ctx context.Context, nodeClients []*ExecRpcClient, blockType string) ([]*ClientForkChoice, error) {
+func getExecNetworkConsensus(ctx context.Context, nodeClients []*ExecClientRPC, blockType string) ([]*ClientForkChoice, error) {
 	clientForkVotes := make([]*ClientForkChoice, len(nodeClients))
 	for i, client := range nodeClients {
 		choice, err := client.GetLatestBlockBy(ctx, blockType)
diff --git a/pkg/health/ethereum/execution_rpc.go b/pkg/health/ethereum/execution_rpc.go
index 3ffd628..42f3a6c 100644
--- a/pkg/health/ethereum/execution_rpc.go
+++ b/pkg/health/ethereum/execution_rpc.go
@@ -1,33 +1,117 @@
 package ethereum
 
 import (
+	"attacknet/cmd/pkg/health/types"
 	"attacknet/cmd/pkg/kubernetes"
 	"context"
 	"fmt"
 	geth "github.com/ethereum/go-ethereum/core/types"
 	"github.com/ethereum/go-ethereum/ethclient"
 	"github.com/kurtosis-tech/stacktrace"
+	log "github.com/sirupsen/logrus"
+	"time"
 )
 
-type ExecRpcClient struct {
+type ExecClientRPC struct {
 	session *kubernetes.PortForwardsSession
 	client  *ethclient.Client
 }
 
-func CreateExecRpcClient(session *kubernetes.PortForwardsSession) (*ExecRpcClient, error) {
+func (e *EthNetworkChecker) getExecBlockConsensus(ctx context.Context, clients []*ExecClientRPC, blockType string, maxAttempts int) (*types.BlockConsensusTestResult, error) {
+	forkChoice, err := getExecNetworkConsensus(ctx, clients, blockType)
+	if err != nil {
+		return nil, err
+	}
+	// determine whether the nodes are in consensus
+	consensusBlockNum, wrongBlockNum, consensusBlockHash, wrongBlockHash := determineForkConsensus(forkChoice)
+	if len(wrongBlockNum) > 0 {
+		if maxAttempts > 0 {
+			log.Debugf("Nodes not at consensus for %s block. Waiting and re-trying in case we're on block propagation boundary. Attempts left: %d", blockType, maxAttempts-1)
+			time.Sleep(1 * time.Second)
+			return e.getExecBlockConsensus(ctx, clients, blockType, maxAttempts-1)
+		} else {
+			reportConsensusDataToLogger(blockType, consensusBlockNum, wrongBlockNum, consensusBlockHash, wrongBlockHash)
+		}
+	}
+
+	blockNumWrong := make(map[string]uint64)
+	for _, node := range wrongBlockNum {
+		blockNumWrong[node.Pod.GetName()] = node.BlockNumber
+	}
+
+	blockHashWrong := make(map[string]string)
+
+	for _, node := range wrongBlockHash {
+		blockHashWrong[node.Pod.GetName()] = node.BlockHash
+	}
+	reportConsensusDataToLogger(blockType, consensusBlockNum, wrongBlockNum, consensusBlockHash, wrongBlockHash)
+	return &types.BlockConsensusTestResult{
+		ConsensusBlock:              (consensusBlockNum)[0].BlockNumber,
+		ConsensusHash:               consensusBlockHash[0].BlockHash,
+		FailingClientsReportedBlock: blockNumWrong,
+		FailingClientsReportedHash:  blockHashWrong,
+	}, nil
+}
+
+func (e *EthNetworkChecker) dialToExecutionClients(ctx context.Context) ([]*ExecClientRPC, error) {
+	labelKey := "kurtosistech.com.custom/ethereum-package.client-type"
+	labelValue := "execution"
+	var podsToHealthCheck []kubernetes.KubePod
+	// add pods under test that match the label criteria
+	for _, pod := range e.podsUnderTest {
+		if pod.MatchesLabel(labelKey, labelValue) && !pod.ExpectDeath {
+			podsToHealthCheck = append(podsToHealthCheck, pod)
+		}
+	}
+	// add pods that were not targeted by a fault
+	bystanders, err := e.kubeClient.PodsMatchingLabel(ctx, labelKey, labelValue)
+	if err != nil {
+		return nil, err
+	}
+	for _, pod := range bystanders {
+		_, match := e.podsUnderTestLookup[pod.GetName()]
+		// don't add pods we've already added
+		if !match {
+			podsToHealthCheck = append(podsToHealthCheck, pod)
+		}
+	}
+
+	log.Debugf("Starting port forward sessions to %d pods", len(podsToHealthCheck))
+	portForwardSessions, err := e.kubeClient.StartMultiPortForwardToLabeledPods(
+		podsToHealthCheck,
+		labelKey,
+		labelValue,
+		8545)
+	if err != nil {
+		return nil, err
+	}
+
+	// dial out to clients
+	rpcClients := make([]*ExecClientRPC, len(portForwardSessions))
+	for i, s := range portForwardSessions {
+		client, err := dialExecRpcClient(s)
+		if err != nil {
+			return nil, err
+		}
+		rpcClients[i] = client
+	}
+	return rpcClients, nil
+}
+
+func dialExecRpcClient(session *kubernetes.PortForwardsSession) (*ExecClientRPC, error) {
 	c, err := ethclient.Dial(fmt.Sprintf("http://localhost:%d", session.LocalPort))
 	if err != nil {
 		return nil, stacktrace.Propagate(err, "err while dialing RPC for %s", session.Pod.GetName())
 	}
-	return &ExecRpcClient{session: session, client: c}, nil
+	return &ExecClientRPC{session: session, client: c}, nil
 }
 
-func (c *ExecRpcClient) Close() {
+func (c *ExecClientRPC) Close() {
 	c.client.Close()
 	c.session.Close()
 }
 
-func (c *ExecRpcClient) GetLatestBlockBy(ctx context.Context, blockType string) (*ClientForkChoice, error) {
+func (c *ExecClientRPC) GetLatestBlockBy(ctx context.Context, blockType string) (*ClientForkChoice, error) {
 	// todo: handle pods that died and we didn't expect it
 	var head *geth.Header
 	var choice *ClientForkChoice
diff --git a/pkg/health/ethereum/network_checker.go b/pkg/health/ethereum/network_checker.go
index 7597963..2ab3687 100644
--- a/pkg/health/ethereum/network_checker.go
+++ b/pkg/health/ethereum/network_checker.go
@@ -4,16 +4,16 @@ import (
 	chaos_mesh "attacknet/cmd/pkg/chaos-mesh"
 	"attacknet/cmd/pkg/kubernetes"
 	"context"
-	"fmt"
 	log "github.com/sirupsen/logrus"
 	"time"
 )
 import "attacknet/cmd/pkg/health/types"
 
 type EthNetworkChecker struct {
-	kubeClient          *kubernetes.KubeClient
-	podsUnderTest       []*chaos_mesh.PodUnderTest
-	podsUnderTestLookup map[string]*chaos_mesh.PodUnderTest
+	kubeClient           *kubernetes.KubeClient
+	podsUnderTest        []*chaos_mesh.PodUnderTest
+	podsUnderTestLookup  map[string]*chaos_mesh.PodUnderTest
+	healthCheckStartTime time.Time
 }
 
 func CreateEthNetworkChecker(kubeClient *kubernetes.KubeClient, podsUnderTest []*chaos_mesh.PodUnderTest) *EthNetworkChecker {
@@ -25,124 +25,92 @@ func CreateEthNetworkChecker(kubeClient *kubernetes.KubeClient, podsUnderTest []
 	}
 
 	return &EthNetworkChecker{
-		podsUnderTest:       podsUnderTest,
-		podsUnderTestLookup: podsUnderTestMap,
-		kubeClient:          kubeClient,
+		podsUnderTest:        podsUnderTest,
+		podsUnderTestLookup:  podsUnderTestMap,
+		kubeClient:           kubeClient,
+		healthCheckStartTime: time.Now(),
 	}
 }
 
-func (e *EthNetworkChecker) RunAllChecks(ctx context.Context) ([]*types.CheckResult, error) {
-	labelKey := "kurtosistech.com.custom/ethereum-package.client-type"
-	labelValue := "execution"
-
-	var podsToHealthCheck []kubernetes.KubePod
-	// add pods under test that match the label criteria
-	for _, pod := range e.podsUnderTest {
-		if pod.MatchesLabel(labelKey, labelValue) && !pod.ExpectDeath {
-			podsToHealthCheck = append(podsToHealthCheck, pod)
-		}
-	}
-	// add pods that were not targeted by a fault
-	bystanders, err := e.kubeClient.PodsMatchingLabel(ctx, labelKey, labelValue)
-	if err != nil {
-		return nil, err
-	}
-	for _, pod := range bystanders {
-		_, match := e.podsUnderTestLookup[pod.GetName()]
-		// don't add pods we've already added
-		if !match {
-			podsToHealthCheck = append(podsToHealthCheck, pod)
-		}
-	}
-
-	log.Infof("Starting port forward sessions to %d pods", len(podsToHealthCheck))
-	portForwardSessions, err := e.kubeClient.StartMultiPortForwardToLabeledPods(
-		podsToHealthCheck,
-		labelKey,
-		labelValue,
-		8545)
+func (e *EthNetworkChecker) RunAllChecks(ctx context.Context, prevHealthCheckResult *types.HealthCheckResult) (*types.HealthCheckResult, error) {
+	execRpcClients, err := e.dialToExecutionClients(ctx)
 	if err != nil {
 		return nil, err
 	}
 
-	// dial out to clients
-	rpcClients := make([]*ExecRpcClient, len(portForwardSessions))
-	for i, s := range portForwardSessions {
-		client, err := CreateExecRpcClient(s)
-		if err != nil {
-			return nil, err
-		}
-		rpcClients[i] = client
-	}
-
 	log.Debug("Ready to query for health checks")
-	latestResult, err := e.getBlockConsensus(ctx, rpcClients, "latest", 3)
+	latestResult, err := e.getExecBlockConsensus(ctx, execRpcClients, "latest", 5)
 	if err != nil {
 		return nil, err
 	}
-	finalResult, err := e.getBlockConsensus(ctx, rpcClients, "finalized", 3)
+	latestArtifact := e.convertResultToArtifact(prevHealthCheckResult.LatestElBlockResult, latestResult)
+
+	finalResult, err := e.getExecBlockConsensus(ctx, execRpcClients, "finalized", 3)
 	if err != nil {
 		return nil, err
 	}
+	finalArtifact := e.convertResultToArtifact(prevHealthCheckResult.FinalizedElBlockResult, finalResult)
 
-	log.Infof("Finalization -> latest lag: %d", latestResult.ConsensusBlockNum-finalResult.ConsensusBlockNum)
+	log.Debugf("Finalization -> latest lag: %d", latestResult.ConsensusBlock-finalResult.ConsensusBlock)
 
-	// construct results
-	results := make([]*types.CheckResult, 4)
-	results[0] = latestResult.BlockNumResult
-	results[1] = latestResult.BlockHashResult
-	results[2] = finalResult.BlockNumResult
-	results[3] = finalResult.BlockHashResult
+	results := &types.HealthCheckResult{
+		LatestElBlockResult:    latestArtifact,
+		FinalizedElBlockResult: finalArtifact,
+	}
 
 	return results, nil
 }
 
-type getBlockConsensusResult struct {
-	BlockNumResult     *types.CheckResult
-	BlockHashResult    *types.CheckResult
-	ConsensusBlockNum  uint64
-	ConsensusBlockHash string
-}
+func (e *EthNetworkChecker) convertResultToArtifact(
+	prevArtifact *types.BlockConsensusArtifact,
+	result *types.BlockConsensusTestResult) *types.BlockConsensusArtifact {
 
-func (e *EthNetworkChecker) getBlockConsensus(ctx context.Context, clients []*ExecRpcClient, blockType string, maxAttempts int) (*getBlockConsensusResult, error) {
-	forkChoice, err := getExecNetworkConsensus(ctx, clients, blockType)
-	if err != nil {
-		return nil, err
+	timeSinceChecksStarted := time.Since(e.healthCheckStartTime)
+	recoveredClients := make(map[string]int)
+
+	if prevArtifact != nil {
+		// we only mark clients as recovered if at some point they were failing health checks.
+		for client := range prevArtifact.FailingClientsReportedHash {
+			if _, stillFailing := result.FailingClientsReportedHash[client]; !stillFailing {
+				recoveredClients[client] = int(timeSinceChecksStarted.Seconds())
+			}
+		}
+
+		for client := range prevArtifact.FailingClientsReportedBlock {
+			if _, stillFailing := result.FailingClientsReportedBlock[client]; !stillFailing {
+				recoveredClients[client] = int(timeSinceChecksStarted.Seconds())
+			}
+		}
+
+		// merge previously recovered clients with the new
+		for k, v := range prevArtifact.NodeRecoveryTimeSeconds {
+			recoveredClients[k] = v
+		}
 	}
-	// determine whether the nodes are in consensus
-	consensusBlockNum, wrongBlockNum, consensusBlockHash, wrongBlockHash := determineForkConsensus(forkChoice)
-	if len(wrongBlockNum) > 0 {
-		if maxAttempts > 0 {
-			log.Debugf("Nodes not at consensus for %s block. Waiting and re-trying in case we're on block propagation boundary. Attempts left: %d", blockType, maxAttempts-1)
-			time.Sleep(2 * time.Second)
-			return e.getBlockConsensus(ctx, clients, blockType, maxAttempts-1)
-		} else {
-			reportConsensusDataToLogger(blockType, consensusBlockNum, wrongBlockNum, consensusBlockHash, wrongBlockHash)
+
+	didUnfaultedNodesNeedToRecover := false
+	for client := range recoveredClients {
+		if _, wasUnderTest := e.podsUnderTestLookup[client]; !wasUnderTest {
+			didUnfaultedNodesNeedToRecover = true
 		}
 	}
 
-	blockNumResult := &types.CheckResult{}
-	blockNumResult.TestName = fmt.Sprintf("All nodes agree on %s block number", blockType)
-	for _, node := range consensusBlockNum {
-		blockNumResult.PodsPassing = append(blockNumResult.PodsPassing, node.Pod.GetName())
+	didUnfaultedNodesFail := false
+	for client := range result.FailingClientsReportedBlock {
+		if _, wasUnderTest := e.podsUnderTestLookup[client]; !wasUnderTest {
+			didUnfaultedNodesFail = true
+		}
 	}
-	for _, node := range wrongBlockNum {
-		blockNumResult.PodsFailing = append(blockNumResult.PodsFailing, node.Pod.GetName())
+	for client := range result.FailingClientsReportedHash {
+		if _, wasUnderTest := e.podsUnderTestLookup[client]; !wasUnderTest {
+			didUnfaultedNodesFail = true
+		}
 	}
 
-	blockHashResult := &types.CheckResult{}
-	blockHashResult.TestName = fmt.Sprintf("All nodes agree on %s block hash", blockType)
-	for _, node := range consensusBlockHash {
-		blockHashResult.PodsPassing = append(blockHashResult.PodsPassing, node.Pod.GetName())
-	}
-	for _, node := range wrongBlockHash {
-		blockHashResult.PodsFailing = append(blockHashResult.PodsFailing, node.Pod.GetName())
+	return &types.BlockConsensusArtifact{
+		BlockConsensusTestResult:       result,
+		DidUnfaultedNodesFail:          didUnfaultedNodesFail,
+		DidUnfaultedNodesNeedToRecover: didUnfaultedNodesNeedToRecover,
+		NodeRecoveryTimeSeconds:        recoveredClients,
 	}
-	reportConsensusDataToLogger(blockType, consensusBlockNum, wrongBlockNum, consensusBlockHash, wrongBlockHash)
-	return &getBlockConsensusResult{
-		blockNumResult,
-		blockHashResult,
-		consensusBlockNum[0].BlockNumber,
-		consensusBlockHash[0].BlockHash,
-	}, nil
 }
diff --git a/pkg/health/types/types.go b/pkg/health/types/types.go
index 2a8a5b0..79a3e8d 100644
--- a/pkg/health/types/types.go
+++ b/pkg/health/types/types.go
@@ -3,12 +3,24 @@ package types
 import "context"
 
 type GenericNetworkChecker interface {
-	RunAllChecks(context.Context) ([]*CheckResult, error)
+	RunAllChecks(context.Context, *HealthCheckResult) (*HealthCheckResult, error)
 }
 
-type CheckResult struct {
-	// think of a better struct later
-	TestName    string
-	PodsPassing []string
-	PodsFailing []string
+type BlockConsensusTestResult struct {
+	ConsensusBlock              uint64            `yaml:"consensus_block"`
+	ConsensusHash               string            `yaml:"consensus_hash"`
+	FailingClientsReportedBlock map[string]uint64 `yaml:"failing_clients_reported_block"`
+	FailingClientsReportedHash  map[string]string `yaml:"failing_clients_reported_hash"`
+}
+
+type BlockConsensusArtifact struct {
+	*BlockConsensusTestResult      `yaml:",inline"`
+	DidUnfaultedNodesFail          bool           `yaml:"did_unfaulted_nodes_fail"`
+	DidUnfaultedNodesNeedToRecover bool           `yaml:"did_unfaulted_nodes_need_to_recover"`
+	NodeRecoveryTimeSeconds        map[string]int `yaml:"node_recovery_time_seconds"`
+}
+
+type HealthCheckResult struct {
+	LatestElBlockResult    *BlockConsensusArtifact `yaml:"latest_el_block_health_result"`
+	FinalizedElBlockResult *BlockConsensusArtifact `yaml:"finalized_el_block_health_result"`
 }
diff --git a/pkg/runtime.go b/pkg/runtime.go
index 6b75125..fda9720 100644
--- a/pkg/runtime.go
+++ b/pkg/runtime.go
@@ -1,6 +1,7 @@
 package pkg
 
 import (
+	"attacknet/cmd/pkg/artifacts"
 	chaos_mesh "attacknet/cmd/pkg/chaos-mesh"
 	"attacknet/cmd/pkg/health"
 	"attacknet/cmd/pkg/kubernetes"
@@ -40,6 +41,8 @@ func StartTestSuite(ctx context.Context, cfg *types.ConfigParsed) error {
 
 	log.Infof("Running %d tests", len(cfg.TestConfig.Tests))
 
+	var testArtifacts []*artifacts.TestArtifact
+
 	for i, test := range cfg.TestConfig.Tests {
 		log.Infof("Running test #%d, '%s'", i, test.TestName)
 		executor := test_executor.CreateTestExecutor(chaosClient, test)
@@ -66,84 +69,20 @@ func StartTestSuite(ctx context.Context, cfg *types.ConfigParsed) error {
 			if err != nil {
 				return err
 			}
-			// todo: log here
-			_ = results
+			testArtifact := artifacts.BuildTestArtifact(results, podsUnderTest, test)
+			testArtifacts = append(testArtifacts, testArtifact)
+			if !testArtifact.TestPassed {
+				log.Warn("Some health checks failed. Stopping test suite.")
+				break
+			}
 		}
 	}
+	err = artifacts.SerializeTestArtifacts(testArtifacts)
+	if err != nil {
+		return err
+	}
 
 	enclave.Destroy(ctx)
 
 	return nil
-	/*
-		faultSession, err := chaosClient.StartFault(ctx, cfg.Tests[0].FaultSpec)
-		if err != nil {
-			grafanaTunnel.Cleanup(true)
-			return err
-		}
-
-		// start core logic loop here.
-		err = waitForInjectionCompleted(ctx, faultSession)
-		if err != nil {
-			grafanaTunnel.Cleanup(true)
-			return err
-		}
-		var timeToSleep time.Duration
-		if faultSession.TestDuration != nil {
-			durationSeconds := int(faultSession.TestDuration.Seconds())
-			log.Infof("Fault injected successfully. Fault will run for %d seconds before recovering.", durationSeconds)
-			timeToSleep = *faultSession.TestDuration
-		} else {
-			log.Infof("Fault injected successfully. This fault has no specific duration.")
-		}
-		time.Sleep(timeToSleep)
-
-		// we can build the health checker once the fault is injected
-		log.Info("creating health checker")
-		hc, err := health.BuildHealthChecker(cfg, kubeClient, faultSession.PodsUnderTest)
-		if err != nil {
-			return err
-		}
-		_ = hc
-
-		err = waitForFaultRecovery(ctx, faultSession)
-		if err != nil {
-			grafanaTunnel.Cleanup(true)
-			return err
-		}
-
-		_, err = hc.RunChecksUntilTimeout(ctx)
-
-		return err*/
-}
-
-// todo: move to fault session?
-/*
-
-
-func waitForFaultRecovery(ctx context.Context, session *chaos_mesh.FaultSession) error {
-	for {
-		status, err := session.GetStatus(ctx)
-		if err != nil {
-			return err
-		}
-
-		switch status {
-		case chaos_mesh.InProgress:
-			log.Infof("The fault is still finishing up. Sleeping for 10s")
-			time.Sleep(10 * time.Second)
-		case chaos_mesh.Stopping:
-			log.Infof("The fault is being stopped. Sleeping for 10s")
-			time.Sleep(10 * time.Second)
-		case chaos_mesh.Error:
-			log.Errorf("there was an error returned by chaos-mesh")
-			return errors.New("there was an unspecified error returned by chaos-mesh. inspect the fault resource")
-		case chaos_mesh.Completed:
-			log.Infof("The fault terminated successfully!")
-			return nil
-		default:
-			return stacktrace.NewError("unknown chaos session state %s", status)
-		}
-		// todo: add timeout break if no changes in k8s resource after fault duration elapses
-	}
 }
-*/

From a47b28908e5bcdd779d773e95f97ad19fa2dace8 Mon Sep 17 00:00:00 2001
From: Benjamin Samuels <1222451+bsamuels453@users.noreply.github.com>
Date: Thu, 11 Jan 2024 08:31:49 -0800
Subject: [PATCH 2/6] improve logging

---
 pkg/runtime.go                | 2 +-
 pkg/test_executor/executor.go | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/runtime.go b/pkg/runtime.go
index fda9720..7f7dd47 100644
--- a/pkg/runtime.go
+++ b/pkg/runtime.go
@@ -44,7 +44,7 @@ func StartTestSuite(ctx context.Context, cfg *types.ConfigParsed) error {
 	var testArtifacts []*artifacts.TestArtifact
 
 	for i, test := range cfg.TestConfig.Tests {
-		log.Infof("Running test #%d, '%s'", i, test.TestName)
+		log.Infof("Running test (%d/%d): '%s'", i, len(cfg.TestConfig.Tests), test.TestName)
 		executor := test_executor.CreateTestExecutor(chaosClient, test)
 
 		err = executor.RunTestPlan(ctx)
diff --git a/pkg/test_executor/executor.go b/pkg/test_executor/executor.go
index fe03bbf..b160889 100644
--- a/pkg/test_executor/executor.go
+++ b/pkg/test_executor/executor.go
@@ -27,12 +27,12 @@ func (te *TestExecutor) RunTestPlan(ctx context.Context) error {
 	if te.planCompleted {
 		return stacktrace.NewError("test executor %s has already been run", te.testName)
 	}
-	for _, genericStep := range te.planSteps {
+	for i, genericStep := range te.planSteps {
 		marshalledSpec, err := yaml.Marshal(genericStep.Spec)
 		if err != nil {
 			return stacktrace.Propagate(err, "could not marshal plan step %s", genericStep.Spec)
 		}
-		log.Infof("Running test step '%s'", genericStep.StepDescription)
+		log.Infof("Running test step (%d/%d): '%s'", i, len(te.planSteps), genericStep.StepDescription)
 		switch genericStep.StepType {
 		case types.InjectFault:
 			var s PlanStepSingleFault

From 0c70d7a73a04fb21b4db4cbd2c9bd15ffcf9ac2e Mon Sep 17 00:00:00 2001
From: Benjamin Samuels <1222451+bsamuels453@users.noreply.github.com>
Date: Thu, 11 Jan 2024 08:47:52 -0800
Subject: [PATCH 3/6] off by one bug

---
 pkg/runtime.go                | 2 +-
 pkg/test_executor/executor.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pkg/runtime.go b/pkg/runtime.go
index 7f7dd47..2423e07 100644
--- a/pkg/runtime.go
+++ b/pkg/runtime.go
@@ -44,7 +44,7 @@ func StartTestSuite(ctx context.Context, cfg *types.ConfigParsed) error {
 	var testArtifacts []*artifacts.TestArtifact
 
 	for i, test := range cfg.TestConfig.Tests {
-		log.Infof("Running test (%d/%d): '%s'", i, len(cfg.TestConfig.Tests), test.TestName)
+		log.Infof("Running test (%d/%d): '%s'", i+1, len(cfg.TestConfig.Tests), test.TestName)
 		executor := test_executor.CreateTestExecutor(chaosClient, test)
 
 		err = executor.RunTestPlan(ctx)
diff --git a/pkg/test_executor/executor.go b/pkg/test_executor/executor.go
index b160889..5354086 100644
--- a/pkg/test_executor/executor.go
+++ b/pkg/test_executor/executor.go
@@ -32,7 +32,7 @@ func (te *TestExecutor) RunTestPlan(ctx context.Context) error {
 		if err != nil {
 			return stacktrace.Propagate(err, "could not marshal plan step %s", genericStep.Spec)
 		}
-		log.Infof("Running test step (%d/%d): '%s'", i, len(te.planSteps), genericStep.StepDescription)
+		log.Infof("Running test step (%d/%d): '%s'", i+1, len(te.planSteps), genericStep.StepDescription)
 		switch genericStep.StepType {
 		case types.InjectFault:
 			var s PlanStepSingleFault

From e9f01c212411c09c745299313c490651a1738c6f Mon Sep 17 00:00:00 2001
From: Benjamin Samuels <1222451+bsamuels453@users.noreply.github.com>
Date: Thu, 11 Jan 2024 08:53:16 -0800
Subject: [PATCH 4/6] maker grace period and initial wait time configurable for
 plans

---
 pkg/plan/plan.go                     | 74 +---------------------------
 pkg/plan/suite/faults.go             |  2 +-
 pkg/plan/suite/suite_builder.go      | 44 ++++++++++++++---
 pkg/plan/suite/test_builder.go       | 11 ++---
 pkg/plan/suite/types.go              |  3 ++
 planner-configs/clock-skew-reth.yaml |  7 ++-
 6 files changed, 53 insertions(+), 88 deletions(-)

diff --git a/pkg/plan/plan.go b/pkg/plan/plan.go
index 3cbb979..38d414f 100644
--- a/pkg/plan/plan.go
+++ b/pkg/plan/plan.go
@@ -30,7 +30,7 @@ func BuildPlan(planName string, config *PlannerConfig) error {
 		attacknetConfig = types.AttacknetConfig{
 			GrafanaPodName:             "grafana",
 			GrafanaPodPort:             "3000",
-			WaitBeforeInjectionSeconds: 0,
+			WaitBeforeInjectionSeconds: uint32(config.FaultConfig.WaitBeforeFirstTest.Seconds()),
 			ReuseDevnetBetweenRuns:     true,
 			AllowPostFaultInspection:   false,
 		}
@@ -38,7 +38,7 @@ func BuildPlan(planName string, config *PlannerConfig) error {
 		attacknetConfig = types.AttacknetConfig{
 			GrafanaPodName:             "grafana",
 			GrafanaPodPort:             "3000",
-			WaitBeforeInjectionSeconds: 0,
+			WaitBeforeInjectionSeconds: uint32(config.FaultConfig.WaitBeforeFirstTest.Seconds()),
 			ReuseDevnetBetweenRuns:     true,
 			ExistingDevnetNamespace:    config.KubernetesNamespace,
 			AllowPostFaultInspection:   false,
@@ -67,73 +67,3 @@ func BuildPlan(planName string, config *PlannerConfig) error {
 
 	return writePlans(netConfigPath, suiteConfigPath, networkConfig, suiteConfig)
 }
-
-/*
-			run time delay on various el/cl combos
-			-> each target exists in the same suite/network
-
-			run time delay on group of el-cl nodes that use the same CL or EL
-			-> network minority
-			-> 33+ but less than 66%
-
-			re-org on group of el-cl nodes that use the same CL or EL
-
-		there's two steps, identifying targets, and creating the manifest for the target/test types
-
-		targeting criteria types:
-		- percentages of the validator set (32, 33, 34, 50, 65)%
-		- subcategories: by node vs. by client
-		- target by client
-			- a specific node containing an instance of the client
-			- all nodes containing an instance of the client
-			- a specific instance of the client
-			- all instances of the client
-			- subcategories: target node or target client by criterion
-
-
-		clock skew
-		- extra varies:
-			- clock skew nodes by EL
-			- clock skew nodes by CL
-		- criterion: percentage(client, node), target by client(client, node)
-
-
-		restarts
-		- these restarts require resync
-		- criterion: percentages(client, node), target by client(client, node)
-
-		network bandwidth
-		- extra varies:
-			- the amount of bandwidth
-			- whether the constraint is EL<-CL or node <-> network
-		- percentages
-		- client criterion (although not all client selections will be valid)
-
-		network split
-		- percentages
-		- client criterion
-
-		packet drop
-		- extra varies: loss pct, correlation
-
-		latency
-		- extra varies: latency amount, correlation
-		- percentages (although includes 100%)
-		- clients (both type?)
-
-		syncing faults
-		-> restart node, force to sync. inject fault while syncing. this impacts checkpoint sync probably too.
-
-		packet corruption
-
-
-	each test builder needs a way to reject input corpus
-	eventually we'll want a way to block known bad inputs (ie: lodestar doesnt seem to re-establish peers correctly)
-	anotehr example:
-
-	actual tasks:
-	- implement plan builder for each concept
-
-		selector := buildParamsForNodeFault(node)
-*/
-//return nil
diff --git a/pkg/plan/suite/faults.go b/pkg/plan/suite/faults.go
index e7bac06..e42f5d1 100644
--- a/pkg/plan/suite/faults.go
+++ b/pkg/plan/suite/faults.go
@@ -107,7 +107,7 @@ func buildPodRestartFault(description string, expressionSelectors []ChaosExpress
 			Kind:       "PodChaos",
 			ApiVersion: "chaos-mesh.org/v1alpha1",
 			Spec: PodChaosSpec{
-				Duration: "10s",
+				Duration: "1s",
 				Mode:     "all",
 				Selector: Selector{
 					ExpressionSelectors: expressionSelectors,
diff --git a/pkg/plan/suite/suite_builder.go b/pkg/plan/suite/suite_builder.go
index db33cb2..1b56d94 100644
--- a/pkg/plan/suite/suite_builder.go
+++ b/pkg/plan/suite/suite_builder.go
@@ -52,8 +52,19 @@ func ComposeTestSuite(
 						runtimeEstimate += int(d.Seconds())
 					}
 				}
+				var targetingDescription string
+				if targetDimension == TargetMatchingNode {
+					targetingDescription = fmt.Sprintf("Impacting the full node of targeted %s clients. Injecting into %s of the matching targets.", config.TargetClient, attackSize)
+				} else {
+					targetingDescription = fmt.Sprintf("Impacting the client of targeted %s clients. Injecting into %s of the matching targets.", config.TargetClient, attackSize)
+				}
 
-				test, err := composeTestsForFaultType(config.FaultType, faultConfig, targetSelectors)
+				test, err := composeTestForFaultType(
+					config.FaultType,
+					faultConfig,
+					targetSelectors,
+					targetingDescription,
+				)
 				if err != nil {
 					return nil, err
 				}
@@ -67,10 +78,12 @@ func ComposeTestSuite(
 	return tests, nil
 }
 
-func composeTestsForFaultType(
+func composeTestForFaultType(
 	faultType FaultTypeEnum,
 	config map[string]string,
-	targetSelectors []*ChaosTargetSelector) (*types.SuiteTest, error) {
+	targetSelectors []*ChaosTargetSelector,
+	targetingDescription string,
+) (*types.SuiteTest, error) {
 
 	switch faultType {
 	case FaultClockSkew:
@@ -82,11 +95,28 @@ func composeTestsForFaultType(
 		if !ok {
 			return nil, stacktrace.NewError("missing duration field for clock skew fault")
 		}
-		description := fmt.Sprintf("Apply %s clock skew for %s against %d targets", skew, duration, len(targetSelectors))
-		return composeNodeClockSkewTest(description, targetSelectors, skew, duration)
+		grace, ok := config["grace_period"]
+		if !ok {
+			return nil, stacktrace.NewError("missing grace_period field for clock skew fault")
+		}
+		graceDuration, err := time.ParseDuration(grace)
+		if err != nil {
+			return nil, stacktrace.NewError("unable to convert grace_period field to a time duration for clock skew fault")
+		}
+
+		description := fmt.Sprintf("Apply %s clock skew for %s against %d targets. %s", skew, duration, len(targetSelectors), targetingDescription)
+		return composeNodeClockSkewTest(description, targetSelectors, skew, duration, graceDuration)
 	case FaultContainerRestart:
-		description := fmt.Sprintf("Restarting %d targets", len(targetSelectors))
-		return composeNodeRestartTest(description, targetSelectors)
+		grace, ok := config["grace_period"]
+		if !ok {
+			return nil, stacktrace.NewError("missing grace_period field for restsrt fault")
+		}
+		graceDuration, err := time.ParseDuration(grace)
+		if err != nil {
+			return nil, stacktrace.NewError("unable to convert grace_period field to a time duration for clock skew fault")
+		}
+		description := fmt.Sprintf("Restarting %d targets. %s", len(targetSelectors), targetingDescription)
+		return composeNodeRestartTest(description, targetSelectors, graceDuration)
 	}
 
 	return nil, nil
diff --git a/pkg/plan/suite/test_builder.go b/pkg/plan/suite/test_builder.go
index a1fab8d..6a41b55 100644
--- a/pkg/plan/suite/test_builder.go
+++ b/pkg/plan/suite/test_builder.go
@@ -5,10 +5,7 @@ import (
 	"time"
 )
 
-const clockSkewGracePeriod = time.Second * 1800
-const containerRestartGracePeriod = time.Second * 3600
-
-func composeNodeClockSkewTest(description string, targets []*ChaosTargetSelector, skew, duration string) (*types.SuiteTest, error) {
+func composeNodeClockSkewTest(description string, targets []*ChaosTargetSelector, skew, duration string, graceDuration time.Duration) (*types.SuiteTest, error) {
 	var steps []types.PlanStep
 	s, err := composeNodeClockSkewPlanSteps(targets, skew, duration)
 	if err != nil {
@@ -24,14 +21,14 @@ func composeNodeClockSkewTest(description string, targets []*ChaosTargetSelector
 		PlanSteps: steps,
 		HealthConfig: types.HealthCheckConfig{
 			EnableChecks: true,
-			GracePeriod:  clockSkewGracePeriod,
+			GracePeriod:  graceDuration,
 		},
 	}
 
 	return test, nil
 }
 
-func composeNodeRestartTest(description string, targets []*ChaosTargetSelector) (*types.SuiteTest, error) {
+func composeNodeRestartTest(description string, targets []*ChaosTargetSelector, graceDuration time.Duration) (*types.SuiteTest, error) {
 	var steps []types.PlanStep
 
 	s, err := composeNodeRestartSteps(targets)
@@ -48,7 +45,7 @@ func composeNodeRestartTest(description string, targets []*ChaosTargetSelector)
 		PlanSteps: steps,
 		HealthConfig: types.HealthCheckConfig{
 			EnableChecks: true,
-			GracePeriod:  containerRestartGracePeriod,
+			GracePeriod:  graceDuration,
 		},
 	}
 
diff --git a/pkg/plan/suite/types.go b/pkg/plan/suite/types.go
index 837b143..1dfe191 100644
--- a/pkg/plan/suite/types.go
+++ b/pkg/plan/suite/types.go
@@ -1,5 +1,7 @@
 package suite
 
+import "time"
+
 type TargetingSpec string
 
 const (
@@ -49,6 +51,7 @@ var FaultTypes = map[FaultTypeEnum]bool{
 type PlannerFaultConfiguration struct {
 	FaultType             FaultTypeEnum       `yaml:"fault_type"`
 	TargetClient          string              `yaml:"target_client"`
+	WaitBeforeFirstTest   time.Duration       `yaml:"wait_before_first_test"`
 	FaultConfigDimensions []map[string]string `yaml:"fault_config_dimensions"`
 	TargetingDimensions   []TargetingSpec     `yaml:"fault_targeting_dimensions"`
 	AttackSizeDimensions  []AttackSize        `yaml:"fault_attack_size_dimensions"`
diff --git a/planner-configs/clock-skew-reth.yaml b/planner-configs/clock-skew-reth.yaml
index 27eeb3e..ce00c42 100644
--- a/planner-configs/clock-skew-reth.yaml
+++ b/planner-configs/clock-skew-reth.yaml
@@ -32,9 +32,14 @@ kubernetes_namespace: kt-ethereum
 fault_config:
   fault_type: ClockSkew
   target_client: reth
+  wait_before_first_test: 300s
   fault_config_dimensions:
     - skew: -2m
-      duration: 10m
+      duration: 1m
+      grace_period: 1800s
+    - skew: 2m
+      duration: 1m
+      grace_period: 1800s
   fault_targeting_dimensions:
     - MatchingNode
     - MatchingClient

From 41259297e8c59b0d4f88eb8cadb5578bff0c2010 Mon Sep 17 00:00:00 2001
From: Benjamin Samuels <1222451+bsamuels453@users.noreply.github.com>
Date: Thu, 11 Jan 2024 08:55:58 -0800
Subject: [PATCH 5/6] increase health check retries

---
 pkg/health/ethereum/network_checker.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/health/ethereum/network_checker.go b/pkg/health/ethereum/network_checker.go
index 2ab3687..93fad68 100644
--- a/pkg/health/ethereum/network_checker.go
+++ b/pkg/health/ethereum/network_checker.go
@@ -39,7 +39,7 @@ func (e *EthNetworkChecker) RunAllChecks(ctx context.Context, prevHealthCheckRes
 	}
 
 	log.Debug("Ready to query for health checks")
-	latestResult, err := e.getExecBlockConsensus(ctx, execRpcClients, "latest", 5)
+	latestResult, err := e.getExecBlockConsensus(ctx, execRpcClients, "latest", 15)
 	if err != nil {
 		return nil, err
 	}

From 8a41c3c908d248cab13a0e276773bb38701199bc Mon Sep 17 00:00:00 2001
From: Benjamin Samuels <1222451+bsamuels453@users.noreply.github.com>
Date: Thu, 11 Jan 2024 08:56:16 -0800
Subject: [PATCH 6/6] update sample test suites

---
 network-configs/plan/test.yaml    |  88 -------
 network-configs/plan/testing.yaml |  20 +-
 test-suites/plan/reth-reorg.yaml  |  18 --
 test-suites/plan/test.yaml        |  38 ---
 test-suites/plan/testing.yaml     | 369 +++++++++++++++++++++++++++---
 test-suites/suite.yaml            |   4 +-
 6 files changed, 352 insertions(+), 185 deletions(-)
 delete mode 100644 network-configs/plan/test.yaml
 delete mode 100644 test-suites/plan/reth-reorg.yaml
 delete mode 100644 test-suites/plan/test.yaml

diff --git a/network-configs/plan/test.yaml b/network-configs/plan/test.yaml
deleted file mode 100644
index 496a0a5..0000000
--- a/network-configs/plan/test.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-participants:
-    - el_client_type: geth
-      el_client_image: ethereum/client-go:latest
-      cl_client_type: lighthouse
-      cl_client_image: sigp/lighthouse:latest
-      el_min_cpu: 1000
-      el_max_cpu: 1000
-      el_min_mem: 2048
-      el_max_mem: 2048
-      bn_min_cpu: 1000
-      bn_max_cpu: 1000
-      bn_min_mem: 2048
-      bn_max_mem: 2048
-      v_min_cpu: 1000
-      v_max_cpu: 1000
-      v_min_mem: 1024
-      v_max_mem: 1024
-      count: 1
-    - el_client_type: reth
-      el_client_image: ghcr.io/paradigmxyz/reth:v0.1.0-alpha.13
-      cl_client_type: lighthouse
-      cl_client_image: sigp/lighthouse:latest
-      el_min_cpu: 1000
-      el_max_cpu: 1000
-      el_min_mem: 2048
-      el_max_mem: 2048
-      bn_min_cpu: 1000
-      bn_max_cpu: 1000
-      bn_min_mem: 2048
-      bn_max_mem: 2048
-      v_min_cpu: 1000
-      v_max_cpu: 1000
-      v_min_mem: 1024
-      v_max_mem: 1024
-      count: 1
-    - el_client_type: reth
-      el_client_image: ghcr.io/paradigmxyz/reth:v0.1.0-alpha.13
-      cl_client_type: prysm
-      cl_client_image: prysmaticlabs/prysm-beacon-chain:latest,prysmaticlabs/prysm-validator:latest
-      el_min_cpu: 1000
-      el_max_cpu: 1000
-      el_min_mem: 2048
-      el_max_mem: 2048
-      bn_min_cpu: 1000
-      bn_max_cpu: 1000
-      bn_min_mem: 2048
-      bn_max_mem: 2048
-      v_min_cpu: 1000
-      v_max_cpu: 1000
-      v_min_mem: 1024
-      v_max_mem: 1024
-      count: 1
-    - el_client_type: reth
-      el_client_image: ghcr.io/paradigmxyz/reth:v0.1.0-alpha.13
-      cl_client_type: teku
-      cl_client_image: consensys/teku:23.12.0
-      el_min_cpu: 1000
-      el_max_cpu: 1000
-      el_min_mem: 2048
-      el_max_mem: 2048
-      bn_min_cpu: 1000
-      bn_max_cpu: 1000
-      bn_min_mem: 2048
-      bn_max_mem: 2048
-      count: 1
-    - el_client_type: reth
-      el_client_image: ghcr.io/paradigmxyz/reth:v0.1.0-alpha.13
-      cl_client_type: lodestar
-      cl_client_image: chainsafe/lodestar:v1.12.1
-      el_min_cpu: 1000
-      el_max_cpu: 1000
-      el_min_mem: 2048
-      el_max_mem: 2048
-      bn_min_cpu: 1000
-      bn_max_cpu: 1000
-      bn_min_mem: 2048
-      bn_max_mem: 2048
-      v_min_cpu: 1000
-      v_max_cpu: 1000
-      v_min_mem: 1024
-      v_max_mem: 1024
-      count: 1
-network_params:
-    num_validator_keys_per_node: "32"
-additional_services:
-    - prometheus_grafana
-    - dora
-parallel_keystore_generation: false
diff --git a/network-configs/plan/testing.yaml b/network-configs/plan/testing.yaml
index 53e6b9d..1eeaa1f 100644
--- a/network-configs/plan/testing.yaml
+++ b/network-configs/plan/testing.yaml
@@ -5,8 +5,8 @@ participants:
       cl_client_image: sigp/lighthouse:latest
       el_min_cpu: 1000
       el_max_cpu: 1000
-      el_min_mem: 2048
-      el_max_mem: 2048
+      el_min_mem: 1024
+      el_max_mem: 1024
       bn_min_cpu: 1000
       bn_max_cpu: 1000
       bn_min_mem: 2048
@@ -22,8 +22,8 @@ participants:
       cl_client_image: consensys/teku:23.12.0
       el_min_cpu: 1000
       el_max_cpu: 1000
-      el_min_mem: 2048
-      el_max_mem: 2048
+      el_min_mem: 1024
+      el_max_mem: 1024
       bn_min_cpu: 1000
       bn_max_cpu: 1000
       bn_min_mem: 2048
@@ -35,8 +35,8 @@ participants:
       cl_client_image: chainsafe/lodestar:v1.12.1
       el_min_cpu: 1000
       el_max_cpu: 1000
-      el_min_mem: 2048
-      el_max_mem: 2048
+      el_min_mem: 1024
+      el_max_mem: 1024
       bn_min_cpu: 1000
       bn_max_cpu: 1000
       bn_min_mem: 2048
@@ -52,8 +52,8 @@ participants:
       cl_client_image: sigp/lighthouse:latest
       el_min_cpu: 1000
       el_max_cpu: 1000
-      el_min_mem: 2048
-      el_max_mem: 2048
+      el_min_mem: 1024
+      el_max_mem: 1024
       bn_min_cpu: 1000
       bn_max_cpu: 1000
       bn_min_mem: 2048
@@ -69,8 +69,8 @@ participants:
       cl_client_image: prysmaticlabs/prysm-beacon-chain:latest,prysmaticlabs/prysm-validator:latest
       el_min_cpu: 1000
       el_max_cpu: 1000
-      el_min_mem: 2048
-      el_max_mem: 2048
+      el_min_mem: 1024
+      el_max_mem: 1024
       bn_min_cpu: 1000
       bn_max_cpu: 1000
       bn_min_mem: 2048
diff --git a/test-suites/plan/reth-reorg.yaml b/test-suites/plan/reth-reorg.yaml
deleted file mode 100644
index fede38c..0000000
--- a/test-suites/plan/reth-reorg.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-attacknetConfig:
-    grafanaPodName: grafana
-    grafanaPodPort: "3000"
-    allowPostFaultInspection: false
-    waitBeforeInjectionSeconds: 60
-    reuseDevnetBetweenRuns: true
-    existingDevnetNamespace: kt-ethereum
-harnessConfig:
-    networkType: ethereum
-    networkPackage: github.com/kurtosis-tech/ethereum-package
-    networkConfig: reth.yaml
-testConfig:
-    tests:
-        - testName: clock skew
-          planSteps: []
-          health:
-            enableChecks: true
-            gracePeriod: 2m0s
diff --git a/test-suites/plan/test.yaml b/test-suites/plan/test.yaml
deleted file mode 100644
index 52c5adf..0000000
--- a/test-suites/plan/test.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-attacknetConfig:
-    grafanaPodName: grafana
-    grafanaPodPort: "3000"
-    allowPostFaultInspection: false
-    waitBeforeInjectionSeconds: 0
-    reuseDevnetBetweenRuns: true
-    existingDevnetNamespace: kt-ethereum
-harnessConfig:
-    networkType: ethereum
-    networkPackage: github.com/kurtosis-tech/ethereum-package
-    networkConfig: plan/test.yaml
-testConfig:
-  tests:
-  - testName: clock skew
-    health:
-      enableChecks: true
-      gracePeriod: 2m0s
-    planSteps:
-    - stepType: injectFault
-      description: 'Inject clock skew on target reth/teku Node (Node #3)'
-      chaosFaultSpec:
-        apiVersion: chaos-mesh.org/v1alpha1
-        kind: TimeChaos
-        spec:
-            action: delay
-            duration: 1m
-            mode: all
-            selector:
-                expressionSelectors:
-                    - key: kurtosistech.com/id
-                      operator: In
-                      values:
-                        - el-3-reth-teku
-                        - cl-3-teku-reth
-            timeOffset: -5m
-    - stepType: waitForFaultCompletion
-      description: wait for faults to terminate
-
diff --git a/test-suites/plan/testing.yaml b/test-suites/plan/testing.yaml
index f007377..7be7ea8 100644
--- a/test-suites/plan/testing.yaml
+++ b/test-suites/plan/testing.yaml
@@ -2,7 +2,7 @@ attacknetConfig:
     grafanaPodName: grafana
     grafanaPodPort: "3000"
     allowPostFaultInspection: false
-    waitBeforeInjectionSeconds: 0
+    waitBeforeInjectionSeconds: 300
     reuseDevnetBetweenRuns: true
     existingDevnetNamespace: kt-ethereum
 harnessConfig:
@@ -11,7 +11,7 @@ harnessConfig:
     networkConfig: plan/testing.yaml
 testConfig:
     tests:
-        - testName: Apply -2m clock skew for 10m against 1 targets
+        - testName: Apply -2m clock skew for 1m against 1 targets. Impacting the full node of targeted reth clients. Injecting into AttackOneMatching of the matching targets.
           planSteps:
             - stepType: injectFault
               description: 'Inject clock skew on target reth/teku Node (Node #2)'
@@ -20,7 +20,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -34,8 +34,8 @@ testConfig:
               description: wait for faults to terminate
           health:
             enableChecks: true
-            gracePeriod: 2m0s
-        - testName: Apply -2m clock skew for 10m against 3 targets
+            gracePeriod: 30m0s
+        - testName: Apply 2m clock skew for 1m against 1 targets. Impacting the full node of targeted reth clients. Injecting into AttackOneMatching of the matching targets.
           planSteps:
             - stepType: injectFault
               description: 'Inject clock skew on target reth/teku Node (Node #2)'
@@ -44,7 +44,31 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-2-reth-teku
+                                - cl-2-teku-reth
+                    timeOffset: 2m
+            - stepType: waitForFaultCompletion
+              description: wait for faults to terminate
+          health:
+            enableChecks: true
+            gracePeriod: 30m0s
+        - testName: Apply -2m clock skew for 1m against 3 targets. Impacting the full node of targeted reth clients. Injecting into AttackSupermajorityMatching of the matching targets.
+          planSteps:
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth/teku Node (Node #2)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -61,7 +85,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -79,7 +103,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -94,8 +118,68 @@ testConfig:
               description: wait for faults to terminate
           health:
             enableChecks: true
-            gracePeriod: 2m0s
-        - testName: Apply -2m clock skew for 10m against 4 targets
+            gracePeriod: 30m0s
+        - testName: Apply 2m clock skew for 1m against 3 targets. Impacting the full node of targeted reth clients. Injecting into AttackSupermajorityMatching of the matching targets.
+          planSteps:
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth/teku Node (Node #2)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-2-reth-teku
+                                - cl-2-teku-reth
+                    timeOffset: 2m
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth/lodestar Node (Node #3)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-3-reth-lodestar
+                                - cl-3-lodestar-reth
+                                - cl-3-lodestar-reth-validator
+                    timeOffset: 2m
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth/lighthouse Node (Node #4)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-4-reth-lighthouse
+                                - cl-4-lighthouse-reth
+                                - cl-4-lighthouse-reth-validator
+                    timeOffset: 2m
+            - stepType: waitForFaultCompletion
+              description: wait for faults to terminate
+          health:
+            enableChecks: true
+            gracePeriod: 30m0s
+        - testName: Apply -2m clock skew for 1m against 4 targets. Impacting the full node of targeted reth clients. Injecting into AttackAllMatching of the matching targets.
           planSteps:
             - stepType: injectFault
               description: 'Inject clock skew on target reth/teku Node (Node #2)'
@@ -104,7 +188,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -121,7 +205,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -139,7 +223,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -157,7 +241,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -172,8 +256,86 @@ testConfig:
               description: wait for faults to terminate
           health:
             enableChecks: true
-            gracePeriod: 2m0s
-        - testName: Apply -2m clock skew for 10m against 1 targets
+            gracePeriod: 30m0s
+        - testName: Apply 2m clock skew for 1m against 4 targets. Impacting the full node of targeted reth clients. Injecting into AttackAllMatching of the matching targets.
+          planSteps:
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth/teku Node (Node #2)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-2-reth-teku
+                                - cl-2-teku-reth
+                    timeOffset: 2m
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth/lodestar Node (Node #3)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-3-reth-lodestar
+                                - cl-3-lodestar-reth
+                                - cl-3-lodestar-reth-validator
+                    timeOffset: 2m
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth/lighthouse Node (Node #4)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-4-reth-lighthouse
+                                - cl-4-lighthouse-reth
+                                - cl-4-lighthouse-reth-validator
+                    timeOffset: 2m
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth/prysm Node (Node #5)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-5-reth-prysm
+                                - cl-5-prysm-reth
+                                - cl-5-prysm-reth-validator
+                    timeOffset: 2m
+            - stepType: waitForFaultCompletion
+              description: wait for faults to terminate
+          health:
+            enableChecks: true
+            gracePeriod: 30m0s
+        - testName: Apply -2m clock skew for 1m against 1 targets. Impacting the client of targeted reth clients. Injecting into AttackOneMatching of the matching targets.
           planSteps:
             - stepType: injectFault
               description: 'Inject clock skew on target reth client of reth/teku Node (Node #2)'
@@ -182,7 +344,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -195,8 +357,31 @@ testConfig:
               description: wait for faults to terminate
           health:
             enableChecks: true
-            gracePeriod: 2m0s
-        - testName: Apply -2m clock skew for 10m against 3 targets
+            gracePeriod: 30m0s
+        - testName: Apply 2m clock skew for 1m against 1 targets. Impacting the client of targeted reth clients. Injecting into AttackOneMatching of the matching targets.
+          planSteps:
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth client of reth/teku Node (Node #2)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-2-reth-teku
+                    timeOffset: 2m
+            - stepType: waitForFaultCompletion
+              description: wait for faults to terminate
+          health:
+            enableChecks: true
+            gracePeriod: 30m0s
+        - testName: Apply -2m clock skew for 1m against 3 targets. Impacting the client of targeted reth clients. Injecting into AttackSupermajorityMatching of the matching targets.
           planSteps:
             - stepType: injectFault
               description: 'Inject clock skew on target reth client of reth/teku Node (Node #2)'
@@ -205,7 +390,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -221,7 +406,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -237,7 +422,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -250,8 +435,8 @@ testConfig:
               description: wait for faults to terminate
           health:
             enableChecks: true
-            gracePeriod: 2m0s
-        - testName: Apply -2m clock skew for 10m against 4 targets
+            gracePeriod: 30m0s
+        - testName: Apply 2m clock skew for 1m against 3 targets. Impacting the client of targeted reth clients. Injecting into AttackSupermajorityMatching of the matching targets.
           planSteps:
             - stepType: injectFault
               description: 'Inject clock skew on target reth client of reth/teku Node (Node #2)'
@@ -260,7 +445,62 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-2-reth-teku
+                    timeOffset: 2m
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth client of reth/lodestar Node (Node #3)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-3-reth-lodestar
+                    timeOffset: 2m
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth client of reth/lighthouse Node (Node #4)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-4-reth-lighthouse
+                    timeOffset: 2m
+            - stepType: waitForFaultCompletion
+              description: wait for faults to terminate
+          health:
+            enableChecks: true
+            gracePeriod: 30m0s
+        - testName: Apply -2m clock skew for 1m against 4 targets. Impacting the client of targeted reth clients. Injecting into AttackAllMatching of the matching targets.
+          planSteps:
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth client of reth/teku Node (Node #2)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -276,7 +516,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -292,7 +532,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -308,7 +548,7 @@ testConfig:
                 kind: TimeChaos
                 spec:
                     action: delay
-                    duration: 10m
+                    duration: 1m
                     mode: all
                     selector:
                         expressionSelectors:
@@ -321,4 +561,75 @@ testConfig:
               description: wait for faults to terminate
           health:
             enableChecks: true
-            gracePeriod: 2m0s
+            gracePeriod: 30m0s
+        - testName: Apply 2m clock skew for 1m against 4 targets. Impacting the client of targeted reth clients. Injecting into AttackAllMatching of the matching targets.
+          planSteps:
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth client of reth/teku Node (Node #2)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-2-reth-teku
+                    timeOffset: 2m
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth client of reth/lodestar Node (Node #3)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-3-reth-lodestar
+                    timeOffset: 2m
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth client of reth/lighthouse Node (Node #4)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-4-reth-lighthouse
+                    timeOffset: 2m
+            - stepType: injectFault
+              description: 'Inject clock skew on target reth client of reth/prysm Node (Node #5)'
+              chaosFaultSpec:
+                apiVersion: chaos-mesh.org/v1alpha1
+                kind: TimeChaos
+                spec:
+                    action: delay
+                    duration: 1m
+                    mode: all
+                    selector:
+                        expressionSelectors:
+                            - key: kurtosistech.com/id
+                              operator: In
+                              values:
+                                - el-5-reth-prysm
+                    timeOffset: 2m
+            - stepType: waitForFaultCompletion
+              description: wait for faults to terminate
+          health:
+            enableChecks: true
+            gracePeriod: 30m0s
diff --git a/test-suites/suite.yaml b/test-suites/suite.yaml
index 43e64a2..be249be 100644
--- a/test-suites/suite.yaml
+++ b/test-suites/suite.yaml
@@ -15,7 +15,7 @@ testConfig:
   - testName: clock-skew
     health:
       enableChecks: true
-      gracePeriod: 2m0s
+      gracePeriod: 30s
     planSteps:
     - stepType: injectFault
       description: 'Inject clock skew on target'
@@ -30,6 +30,6 @@ testConfig:
           mode: all
           action: delay
           timeOffset: '-5m'
-          duration: 10m
+          duration: 1m
     - stepType: waitForFaultCompletion
       description: wait for faults to terminate