Skip to content

Commit a9e5f39

Browse files
uditgauravnicjohnson145andrewhu-hcl
authored
[ Cherry-pick for 2.4.0 ] (#473)
* Refactor/experiment contributing (#470) * docs: add instructions for building litmus-sdk binary Non Linux AMD64 users will need to build the binary for their target platform. Signed-off-by: Nic Johnson <[email protected]> * docs: update generated code & docs to aid experiment contribution It wasn't very clear what generated code needed to be kept, and what generated code needed to be replaced with experiment-specific code. Attempt to make that more clear by expanding README & adding grep-able tags inside generated code. Signed-off-by: Nic Johnson <[email protected]> * fix issue-3350 (#468) Signed-off-by: Andrew Hu <[email protected]> Co-authored-by: Udit Gaurav <[email protected]> * Remove the stress process on timeout without failure (#472) Signed-off-by: udit <[email protected]> * update image tag Signed-off-by: udit <[email protected]> Co-authored-by: Nic Johnson <[email protected]> Co-authored-by: Andrew Hu <[email protected]>
1 parent c4fb546 commit a9e5f39

File tree

6 files changed

+83
-29
lines changed

6 files changed

+83
-29
lines changed

build/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ RUN apk --update add \
2424
# Packaging stage
2525
# Image source: https://github.com/litmuschaos/test-tools/blob/master/custom/hardened-alpine/experiment/Dockerfile
2626
# The base image is non-root (have litmus user) with default litmus directory.
27-
FROM litmuschaos/experiment-alpine:2.3.0
27+
FROM litmuschaos/experiment-alpine:2.4.0
2828

2929
LABEL maintainer="LitmusChaos"
3030

chaoslib/litmus/node-drain/lib/node-drain.go

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"os/exec"
77
"os/signal"
88
"strconv"
9+
"strings"
910
"syscall"
1011
"time"
1112

@@ -19,6 +20,8 @@ import (
1920
"github.com/litmuschaos/litmus-go/pkg/utils/common"
2021
"github.com/litmuschaos/litmus-go/pkg/utils/retry"
2122
"github.com/pkg/errors"
23+
apierrors "k8s.io/apimachinery/pkg/api/errors"
24+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2225
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2326
)
2427

@@ -149,29 +152,50 @@ func drainNode(experimentsDetails *experimentTypes.ExperimentDetails, clients cl
149152
// uncordonNode uncordon the application node
150153
func uncordonNode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error {
151154

152-
log.Infof("[Recover]: Uncordon the %v node", experimentsDetails.TargetNode)
155+
targetNodes := strings.Split(experimentsDetails.TargetNode, ",")
156+
for _, targetNode := range targetNodes {
153157

154-
command := exec.Command("kubectl", "uncordon", experimentsDetails.TargetNode)
155-
var out, stderr bytes.Buffer
156-
command.Stdout = &out
157-
command.Stderr = &stderr
158-
if err := command.Run(); err != nil {
159-
log.Infof("Error String: %v", stderr.String())
160-
return errors.Errorf("unable to uncordon the %v node, err: %v", experimentsDetails.TargetNode, err)
161-
}
158+
//Check node exist before uncordon the node
159+
_, err := clients.KubeClient.CoreV1().Nodes().Get(targetNode, metav1.GetOptions{})
160+
if err != nil {
161+
if apierrors.IsNotFound(err) {
162+
log.Infof("[Info]: The %v node is no longer exist, skip uncordon the node", targetNode)
163+
common.SetTargets(targetNode, "noLongerExist", "node", chaosDetails)
164+
continue
165+
} else {
166+
return errors.Errorf("unable to get the %v node, err: %v", targetNode, err)
167+
}
168+
}
162169

163-
common.SetTargets(experimentsDetails.TargetNode, "reverted", "node", chaosDetails)
170+
log.Infof("[Recover]: Uncordon the %v node", targetNode)
171+
command := exec.Command("kubectl", "uncordon", targetNode)
172+
var out, stderr bytes.Buffer
173+
command.Stdout = &out
174+
command.Stderr = &stderr
175+
if err := command.Run(); err != nil {
176+
log.Infof("Error String: %v", stderr.String())
177+
return errors.Errorf("unable to uncordon the %v node, err: %v", targetNode, err)
178+
}
179+
common.SetTargets(targetNode, "reverted", "node", chaosDetails)
180+
}
164181

165182
return retry.
166183
Times(uint(experimentsDetails.Timeout / experimentsDetails.Delay)).
167184
Wait(time.Duration(experimentsDetails.Delay) * time.Second).
168185
Try(func(attempt uint) error {
169-
nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(experimentsDetails.TargetNode, v1.GetOptions{})
170-
if err != nil {
171-
return err
172-
}
173-
if nodeSpec.Spec.Unschedulable {
174-
return errors.Errorf("%v node is in unschedulable state", experimentsDetails.TargetNode)
186+
targetNodes := strings.Split(experimentsDetails.TargetNode, ",")
187+
for _, targetNode := range targetNodes {
188+
nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(targetNode, v1.GetOptions{})
189+
if err != nil {
190+
if apierrors.IsNotFound(err) {
191+
continue
192+
} else {
193+
return err
194+
}
195+
}
196+
if nodeSpec.Spec.Unschedulable {
197+
return errors.Errorf("%v node is in unschedulable state", experimentsDetails.TargetNode)
198+
}
175199
}
176200
return nil
177201
})

chaoslib/litmus/stress-chaos/helper/stress-helper.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,15 @@ func prepareStressChaos(experimentsDetails *experimentTypes.ExperimentDetails, c
176176
select {
177177
case <-timeout:
178178
// the stress process gets timeout before completion
179-
log.Infof("[Timeout] Stress output: %v", buf.String())
180-
log.Info("[Cleanup]: Killing the stress process")
181-
terminateProcess(cmd.Process.Pid)
179+
log.Infof("[Chaos] The stress process is not yet completed after the chaos duration of %vs", experimentsDetails.ChaosDuration+30)
180+
log.Info("[Timeout]: Killing the stress process")
181+
if err = terminateProcess(cmd.Process.Pid); err != nil {
182+
return err
183+
}
182184
if err = result.AnnotateChaosResult(resultDetails.Name, chaosDetails.ChaosNamespace, "reverted", "pod", experimentsDetails.TargetPods); err != nil {
183185
return err
184186
}
185-
return errors.Errorf("the stress process is timeout after %vs", experimentsDetails.ChaosDuration+30)
187+
return nil
186188
case err := <-done:
187189
if err != nil {
188190
err, ok := err.(*exec.ExitError)

contribute/developer-guide/README.md

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ The *generate_experiment.go* script is a simple way to bootstrap your experiment
3434
$ cd litmus-go/contribute/developer-guide
3535
```
3636

37+
- Build litmus-sdk
38+
39+
```
40+
go build -o ./litmus-sdk ./bin/main.go
41+
```
42+
3743
- Populate the `attributes.yaml` with details of the chaos experiment (or chart). Use the [attributes.yaml.sample](/contribute/developer-guide/attributes.yaml.sample) as reference.
3844

3945
As an example, let us consider an experiment to kill one of the replicas of a nginx deployment. The attributes.yaml can be constructed like this:
@@ -158,17 +164,29 @@ The *generate_experiment.go* script is a simple way to bootstrap your experiment
158164
drwxr-xr-x 2 shubham shubham 4096 Jun 10 22:41 icons/
159165
```
160166

161-
- Proceed with construction of business logic inside the `sample-exec-chaos.go` file, by making
162-
the appropriate modifications listed below to achieve the desired effect:
163167

164-
- variables
165-
- entry & exit criteria checks for the experiment
166-
- helper utils in either [pkg](/pkg/) or new [base chaos libraries](/chaoslib)
168+
- Proceed with construction of business logic, by making the appropriate modifications listed below
169+
to achieve the desired effect:
170+
171+
- Pre-Chaos Checks: Additional experiment-specific checks to run before chaos. Checks should be
172+
added at the `@TODO: user PRE-CHAOS-CHECK` marker in the
173+
`experiments/<category>/<name>/experiment/<name>.go` file
174+
175+
- Inject Chaos: The heart of your experiment, actually enact the choas. By default, the generated
176+
code will call out to the generated library. However, if your experiment simply makes use of
177+
exising libraries, modify the chaos injection at the `@TODO: user INVOKE-CHAOSLIB` marker in the
178+
`experiments/<category>/<name>/experiment/<name>.go` file
167179

180+
- Library Modifications: This is where the low level chaos execution code should live. Populate
181+
the `runChaos`, `experimentExecution`, and `injectChaos` functions as appropriate in the
182+
`chaosLib/litmus/<name>/lib/<name>.go` file.
168183

169-
- The chaoslib is created at `chaoslib/litmus/sample-exec-chaos/lib/sample-exec-chaos.go` path. It contains some pre-defined steps which runs the `ChaosInject` command (explicitly provided as an ENV var in the experiment CR). Which will induce chaos in the target application. It will wait for the given chaos duration and finally runs the `ChaosKill` command (also provided as an ENV var) for cleanup purposes. Update this chaoslib to achieve the desired effect based on the use-case or reuse the other existing chaoslib.
184+
- Post-Chaos Checks: Additional experiment-specific checks to run after achos. Checks should be
185+
added at the `@TODO: user POST-CHAOS-CHECK` marker in the
186+
`experiments/<category>/<name>/experiment/<name>.go` file
170187

171-
- Create an experiment README explaining, briefly, the *what*, *why* & *how* of the experiment to aid users of this experiment.
188+
- Create an experiment README explaining, briefly, the *what*, *why* & *how* of the experiment to aid users of this experiment. This README
189+
should live at `experiments/<category>/<name>/README.md`
172190

173191
### Steps to Test Experiment
174192

contribute/developer-guide/templates/experiment.tmpl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/sirupsen/logrus"
1717
)
1818

19+
1920
// Experiment contains steps to inject chaos
2021
func Experiment(clients clients.ClientSets){
2122

@@ -69,6 +70,7 @@ func Experiment(clients clients.ClientSets){
6970
// Calling AbortWatcher go routine, it will continuously watch for the abort signal and generate the required events and result
7071
go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails)
7172

73+
// @TODO: user PRE-CHAOS-CHECK
7274
// ADD A PRE-CHAOS CHECK OF YOUR CHOICE HERE
7375
// POD STATUS CHECKS FOR THE APPLICATION UNDER TEST AND AUXILIARY APPLICATIONS ARE ADDED BY DEFAULT
7476

@@ -121,6 +123,7 @@ func Experiment(clients clients.ClientSets){
121123
// INVOKE THE CHAOSLIB OF YOUR CHOICE HERE, WHICH WILL CONTAIN
122124
// THE BUSINESS LOGIC OF THE ACTUAL CHAOS
123125
// IT CAN BE A NEW CHAOSLIB YOU HAVE CREATED SPECIALLY FOR THIS EXPERIMENT OR ANY EXISTING ONE
126+
// @TODO: user INVOKE-CHAOSLIB
124127

125128
// Including the litmus lib
126129
switch experimentsDetails.ChaosLib {
@@ -138,6 +141,7 @@ func Experiment(clients clients.ClientSets){
138141
return
139142
}
140143

144+
// @TODO: user POST-CHAOS-CHECK
141145
// ADD A POST-CHAOS CHECK OF YOUR CHOICE HERE
142146
// POD STATUS CHECKS FOR THE APPLICATION UNDER TEST AND AUXILIARY APPLICATIONS ARE ADDED BY DEFAULT
143147

pkg/status/nodes.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"github.com/pkg/errors"
1111
logrus "github.com/sirupsen/logrus"
1212
apiv1 "k8s.io/api/core/v1"
13+
apierrors "k8s.io/apimachinery/pkg/api/errors"
1314
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1415
)
1516

@@ -26,7 +27,12 @@ func CheckNodeStatus(nodes string, timeout, delay int, clients clients.ClientSet
2627
for index := range targetNodes {
2728
node, err := clients.KubeClient.CoreV1().Nodes().Get(targetNodes[index], metav1.GetOptions{})
2829
if err != nil {
29-
return err
30+
if apierrors.IsNotFound(err) {
31+
log.Infof("[Info]: The %v node is not exist", targetNodes[index])
32+
continue
33+
} else {
34+
return err
35+
}
3036
}
3137
nodeList.Items = append(nodeList.Items, *node)
3238
}

0 commit comments

Comments
 (0)