diff --git a/bin/experiment/experiment.go b/bin/experiment/experiment.go index ef01e1f2f..5c7bee086 100755 --- a/bin/experiment/experiment.go +++ b/bin/experiment/experiment.go @@ -4,6 +4,7 @@ import ( "context" "errors" "flag" + "fmt" "os" // Uncomment to load all auth plugins @@ -68,6 +69,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) func init() { @@ -106,6 +108,8 @@ func main() { //Getting kubeConfig and Generate ClientSets if err := clients.GenerateClientSetFromKubeConfig(); err != nil { log.Errorf("Unable to Get the kubeconfig, err: %v", err) + span.SetStatus(codes.Error, "Unable to Get the kubeconfig") + span.RecordError(err) return } @@ -211,6 +215,7 @@ func main() { k6Loadgen.Experiment(ctx, clients) default: log.Errorf("Unsupported -name %v, please provide the correct value of -name args", *experimentName) + span.SetStatus(codes.Error, fmt.Sprintf("Unsupported -name %v", *experimentName)) return } } diff --git a/chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go b/chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go index 0a54489fd..c855954df 100644 --- a/chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go +++ b/chaoslib/litmus/aws-ssm-chaos/lib/ssm-chaos.go @@ -17,6 +17,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) // InjectChaosInSerialMode will inject the aws ssm chaos in serial mode that is one after other @@ -51,6 +52,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment ec2IDList := strings.Fields(ec2ID) commandId, err := ssm.SendSSMCommand(experimentsDetails, ec2IDList) if err != nil { + span.SetStatus(codes.Error, "failed to send ssm command") + span.RecordError(err) return stacktrace.Propagate(err, "failed to send ssm command") } //prepare commands for abort recovery @@ -59,6 +62,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //wait for the ssm command to get in running state log.Info("[Wait]: Waiting for the ssm command to get in InProgress state") if err := ssm.WaitForCommandStatus("InProgress", commandId, ec2ID, experimentsDetails.Region, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.Delay); err != nil { + span.SetStatus(codes.Error, "failed to start ssm command") + span.RecordError(err) return stacktrace.Propagate(err, "failed to start ssm command") } common.SetTargets(ec2ID, "injected", "EC2", chaosDetails) @@ -66,6 +71,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -73,6 +80,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //wait for the ssm command to get succeeded in the given chaos duration log.Info("[Wait]: Waiting for the ssm command to get completed") if err := ssm.WaitForCommandStatus("Success", commandId, ec2ID, experimentsDetails.Region, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.Delay); err != nil { + span.SetStatus(codes.Error, "failed to send ssm command") + span.RecordError(err) return stacktrace.Propagate(err, "failed to send ssm command") } common.SetTargets(ec2ID, "reverted", "EC2", chaosDetails) @@ -117,6 +126,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Chaos]: Starting the ssm command") commandId, err := ssm.SendSSMCommand(experimentsDetails, instanceIDList) if err != nil { + span.SetStatus(codes.Error, "failed to send ssm command") + span.RecordError(err) return stacktrace.Propagate(err, "failed to send ssm command") } //prepare commands for abort recovery @@ -126,6 +137,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //wait for the ssm command to get in running state log.Info("[Wait]: Waiting for the ssm command to get in InProgress state") if err := ssm.WaitForCommandStatus("InProgress", commandId, ec2ID, experimentsDetails.Region, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.Delay); err != nil { + span.SetStatus(codes.Error, "failed to start ssm command") + span.RecordError(err) return stacktrace.Propagate(err, "failed to start ssm command") } } @@ -133,6 +146,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -141,6 +156,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //wait for the ssm command to get succeeded in the given chaos duration log.Info("[Wait]: Waiting for the ssm command to get completed") if err := ssm.WaitForCommandStatus("Success", commandId, ec2ID, experimentsDetails.Region, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.Delay); err != nil { + span.SetStatus(codes.Error, "failed to send ssm command") + span.RecordError(err) return stacktrace.Propagate(err, "failed to send ssm command") } } diff --git a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go index e4bb5a50b..0be649c13 100644 --- a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go +++ b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-id.go @@ -19,6 +19,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -49,6 +50,8 @@ func PrepareAWSSSMChaosByID(ctx context.Context, experimentsDetails *experimentT //create and upload the ssm document on the given aws service monitoring docs if err = ssm.CreateAndUploadDocument(experimentsDetails.DocumentName, experimentsDetails.DocumentType, experimentsDetails.DocumentFormat, experimentsDetails.DocumentPath, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "could not create and upload the ssm document") + span.RecordError(err) return stacktrace.Propagate(err, "could not create and upload the ssm document") } experimentsDetails.IsDocsUploaded = true @@ -60,25 +63,37 @@ func PrepareAWSSSMChaosByID(ctx context.Context, experimentsDetails *experimentT //get the instance id or list of instance ids instanceIDList := strings.Split(experimentsDetails.EC2InstanceID, ",") if experimentsDetails.EC2InstanceID == "" || len(instanceIDList) == 0 { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"} + span.SetStatus(codes.Error, "no instance id found for chaos injection") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"} + span.RecordError(err) + return err } switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = lib.InjectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = lib.InjectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Delete the ssm document on the given aws service monitoring docs err = ssm.SSMDeleteDocument(experimentsDetails.DocumentName, experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to delete ssm doc") + span.RecordError(err) return stacktrace.Propagate(err, "failed to delete ssm doc") } diff --git a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go index c7e872c7b..d0baf474e 100644 --- a/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go +++ b/chaoslib/litmus/aws-ssm-chaos/lib/ssm/aws-ssm-chaos-by-tag.go @@ -19,6 +19,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) // PrepareAWSSSMChaosByTag contains the prepration and injection steps for the experiment @@ -44,6 +45,8 @@ func PrepareAWSSSMChaosByTag(ctx context.Context, experimentsDetails *experiment //create and upload the ssm document on the given aws service monitoring docs if err = ssm.CreateAndUploadDocument(experimentsDetails.DocumentName, experimentsDetails.DocumentType, experimentsDetails.DocumentFormat, experimentsDetails.DocumentPath, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "could not create and upload the ssm document") + span.RecordError(err) return stacktrace.Propagate(err, "could not create and upload the ssm document") } experimentsDetails.IsDocsUploaded = true @@ -55,25 +58,37 @@ func PrepareAWSSSMChaosByTag(ctx context.Context, experimentsDetails *experiment log.Infof("[Chaos]:Number of Instance targeted: %v", len(instanceIDList)) if len(instanceIDList) == 0 { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"} + span.SetStatus(codes.Error, "no instance id found for chaos injection") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no instance id found for chaos injection"} + span.RecordError(err) + return err } switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = lib.InjectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = lib.InjectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails, inject); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Delete the ssm document on the given aws service monitoring docs err = ssm.SSMDeleteDocument(experimentsDetails.DocumentName, experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to delete ssm doc") + span.RecordError(err) return stacktrace.Propagate(err, "failed to delete ssm doc") } diff --git a/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go b/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go index 210377809..3464df48b 100644 --- a/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go +++ b/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go @@ -24,6 +24,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/retry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -55,11 +56,16 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper //get the disk name or list of disk names diskNameList := strings.Split(experimentsDetails.VirtualDiskNames, ",") if experimentsDetails.VirtualDiskNames == "" || len(diskNameList) == 0 { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no volume names found to detach"} + span.SetStatus(codes.Error, "no volume names found to detach") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no volume names found to detach"} + span.RecordError(err) + return err } instanceNamesWithDiskNames, err := diskStatus.GetInstanceNameForDisks(diskNameList, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup) if err != nil { + span.SetStatus(codes.Error, "error fetching attached instances for disks") + span.RecordError(err) return stacktrace.Propagate(err, "error fetching attached instances for disks") } @@ -69,6 +75,8 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper for instanceName := range instanceNamesWithDiskNames { attachedDisksWithInstance[instanceName], err = diskStatus.GetInstanceDiskList(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, experimentsDetails.ScaleSet, instanceName) if err != nil { + span.SetStatus(codes.Error, "error fetching virtual disks") + span.RecordError(err) return stacktrace.Propagate(err, "error fetching virtual disks") } } @@ -85,14 +93,21 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNamesWithDiskNames, attachedDisksWithInstance, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -125,6 +140,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Chaos]: Detaching the virtual disks from the instances") for instanceName, diskNameList := range instanceNamesWithDiskNames { if err = diskStatus.DetachDisks(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, diskNameList); err != nil { + span.SetStatus(codes.Error, "failed to detach disks") + span.RecordError(err) return stacktrace.Propagate(err, "failed to detach disks") } } @@ -133,6 +150,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, diskName := range diskNameList { log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName) if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil { + span.SetStatus(codes.Error, "disk detachment check failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment check failed") } } @@ -147,6 +166,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -159,6 +180,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Chaos]: Attaching the Virtual disks back to the instances") for instanceName, diskNameList := range attachedDisksWithInstance { if err = diskStatus.AttachDisk(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, diskNameList); err != nil { + span.SetStatus(codes.Error, "virtual disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "virtual disk attachment failed") } @@ -167,6 +190,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, diskName := range diskNameList { log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName) if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil { + span.SetStatus(codes.Error, "disk attachment check failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment check failed") } } @@ -209,12 +234,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Detaching the virtual disks log.Infof("[Chaos]: Detaching %v from the instance", diskName) if err = diskStatus.DetachDisks(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, diskNameToList); err != nil { + span.SetStatus(codes.Error, "failed to detach disks") + span.RecordError(err) return stacktrace.Propagate(err, "failed to detach disks") } // Waiting for disk to be detached log.Infof("[Wait]: Waiting for Disk '%v' to detach", diskName) if err := diskStatus.WaitForDiskToDetach(experimentsDetails, diskName); err != nil { + span.SetStatus(codes.Error, "disk detachment check failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment check failed") } @@ -224,6 +253,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -235,12 +266,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Attaching the virtual disks to the instance log.Infof("[Chaos]: Attaching %v back to the instance", diskName) if err = diskStatus.AttachDisk(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, attachedDisksWithInstance[instanceName]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } // Waiting for disk to be attached log.Infof("[Wait]: Waiting for Disk '%v' to attach", diskName) if err := diskStatus.WaitForDiskToAttach(experimentsDetails, diskName); err != nil { + span.SetStatus(codes.Error, "disk attachment check failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment check failed") } diff --git a/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go b/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go index eefd1c54a..f019b0440 100644 --- a/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go +++ b/chaoslib/litmus/azure-instance-stop/lib/azure-instance-stop.go @@ -22,6 +22,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -61,14 +62,21 @@ func PrepareAzureStop(ctx context.Context, experimentsDetails *experimentTypes.E switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceNameList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } // Waiting for the ramp time after chaos injection @@ -110,10 +118,14 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Infof("[Chaos]: Stopping the Azure instance: %v", vmName) if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "unable to stop the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to stop the Azure instance") } } else { if err := azureStatus.AzureInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "unable to stop the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to stop the Azure instance") } } @@ -121,6 +133,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Wait for Azure instance to completely stop log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the stopped state", vmName) if err := azureStatus.WaitForAzureComputeDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "instance poweroff status check failed") + span.RecordError(err) return stacktrace.Propagate(err, "instance poweroff status check failed") } @@ -128,6 +142,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -140,10 +156,14 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Chaos]: Starting back the Azure instance") if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "unable to start the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } } else { if err := azureStatus.AzureInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "unable to start the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } } @@ -151,6 +171,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Wait for Azure instance to get in running state log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the running state", vmName) if err := azureStatus.WaitForAzureComputeUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "instance power on status check failed") + span.RecordError(err) return stacktrace.Propagate(err, "instance power on status check failed") } } @@ -190,10 +212,14 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Infof("[Chaos]: Stopping the Azure instance: %v", vmName) if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "unable to stop Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to stop Azure instance") } } else { if err := azureStatus.AzureInstanceStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "unable to stop Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to stop Azure instance") } } @@ -203,6 +229,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, vmName := range instanceNameList { log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the stopped state", vmName) if err := azureStatus.WaitForAzureComputeDown(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "instance poweroff status check failed") + span.RecordError(err) return stacktrace.Propagate(err, "instance poweroff status check failed") } } @@ -210,6 +238,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // Run probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -223,10 +253,14 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Infof("[Chaos]: Starting back the Azure instance: %v", vmName) if experimentsDetails.ScaleSet == "enable" { if err := azureStatus.AzureScaleSetInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "unable to start the Azure instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } } else { if err := azureStatus.AzureInstanceStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "unable to start the Azure instancee") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the Azure instance") } } @@ -236,6 +270,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, vmName := range instanceNameList { log.Infof("[Wait]: Waiting for Azure instance '%v' to get in the running state", vmName) if err := azureStatus.WaitForAzureComputeUp(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, vmName); err != nil { + span.SetStatus(codes.Error, "instance power on status check failed") + span.RecordError(err) return stacktrace.Propagate(err, "instance power on status check failed") } } diff --git a/chaoslib/litmus/container-kill/helper/container-kill.go b/chaoslib/litmus/container-kill/helper/container-kill.go index 81e6b1a67..37eee907d 100644 --- a/chaoslib/litmus/container-kill/helper/container-kill.go +++ b/chaoslib/litmus/container-kill/helper/container-kill.go @@ -4,8 +4,11 @@ import ( "bytes" "context" "fmt" + "github.com/litmuschaos/litmus-go/pkg/telemetry" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" + "os/exec" "strconv" "time" @@ -52,8 +55,12 @@ func Helper(ctx context.Context, clients clients.ClientSets) { if err := killContainer(&experimentsDetails, clients, &eventsDetails, &chaosDetails, &resultDetails); err != nil { // update failstep inside chaosresult if resultErr := result.UpdateFailedStepFromHelper(&resultDetails, &chaosDetails, clients, err); resultErr != nil { + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(resultErr) log.Fatalf("helper pod failed, err: %v, resultErr: %v", err, resultErr) } + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) log.Fatalf("helper pod failed, err: %v", err) } } diff --git a/chaoslib/litmus/container-kill/lib/container-kill.go b/chaoslib/litmus/container-kill/lib/container-kill.go index 95d81bf96..13b2a52be 100644 --- a/chaoslib/litmus/container-kill/lib/container-kill.go +++ b/chaoslib/litmus/container-kill/lib/container-kill.go @@ -11,6 +11,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/container-kill/types" @@ -46,6 +47,8 @@ func PrepareContainerKill(ctx context.Context, experimentsDetails *experimentTyp targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "Unable to get the target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -59,12 +62,16 @@ func PrepareContainerKill(ctx context.Context, experimentsDetails *experimentTyp if experimentsDetails.ChaosServiceAccount == "" { experimentsDetails.ChaosServiceAccount, err = common.GetServiceAccount(experimentsDetails.ChaosNamespace, experimentsDetails.ChaosPodName, clients) if err != nil { + span.SetStatus(codes.Error, "Unable to get the experiment service account") + span.RecordError(err) return stacktrace.Propagate(err, "could not experiment service account") } } if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "Unable to set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -73,14 +80,21 @@ func PrepareContainerKill(ctx context.Context, experimentsDetails *experimentTyp switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "Unable to run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "Unable to run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "Sequence not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -98,6 +112,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -113,6 +129,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment runID := stringutils.GetRunID() if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, fmt.Sprintf("%s:%s:%s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), pod.Spec.NodeName, runID); err != nil { + span.SetStatus(codes.Error, "failed to create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -122,6 +140,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "failed to check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -131,12 +151,17 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + err := common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "failed to wait for completion of helper pod") + span.RecordError(err) + return err } //Deleting all the helper pod for container-kill chaos log.Info("[Cleanup]: Deleting all the helper pods") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "failed to delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -150,6 +175,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -164,6 +191,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, strings.Join(targetsPerNode, ";"), node, runID); err != nil { + span.SetStatus(codes.Error, "failed to create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -174,6 +203,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "failed to check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -183,12 +214,17 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + err := common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "failed to wait for completion of helper pod") + span.RecordError(err) + return err } //Deleting all the helper pod for container-kill chaos log.Info("[Cleanup]: Deleting all the helper pods") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "failed to delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -264,7 +300,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "failed to create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/disk-fill/helper/disk-fill.go b/chaoslib/litmus/disk-fill/helper/disk-fill.go index c851ba26f..8feca9400 100644 --- a/chaoslib/litmus/disk-fill/helper/disk-fill.go +++ b/chaoslib/litmus/disk-fill/helper/disk-fill.go @@ -3,10 +3,6 @@ package helper import ( "context" "fmt" - "github.com/litmuschaos/litmus-go/pkg/cerrors" - "github.com/litmuschaos/litmus-go/pkg/telemetry" - "github.com/palantir/stacktrace" - "go.opentelemetry.io/otel" "os" "os/exec" "os/signal" @@ -15,6 +11,12 @@ import ( "syscall" "time" + "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/telemetry" + "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" + "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/disk-fill/types" @@ -67,8 +69,12 @@ func Helper(ctx context.Context, clients clients.ClientSets) { if err := diskFill(&experimentsDetails, clients, &eventsDetails, &chaosDetails, &resultDetails); err != nil { // update failstep inside chaosresult if resultErr := result.UpdateFailedStepFromHelper(&resultDetails, &chaosDetails, clients, err); resultErr != nil { + span.SetStatus(codes.Error, "Helper pod failed") + span.RecordError(resultErr) log.Fatalf("helper pod failed, err: %v, resultErr: %v", err, resultErr) } + span.SetStatus(codes.Error, "Helper pod failed") + span.RecordError(err) log.Fatalf("helper pod failed, err: %v", err) } } diff --git a/chaoslib/litmus/disk-fill/lib/disk-fill.go b/chaoslib/litmus/disk-fill/lib/disk-fill.go index 0c63f84b2..571d2dd96 100644 --- a/chaoslib/litmus/disk-fill/lib/disk-fill.go +++ b/chaoslib/litmus/disk-fill/lib/disk-fill.go @@ -11,6 +11,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/disk-fill/types" @@ -37,7 +38,10 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } //set up the tunables if provided in range setChaosTunables(experimentsDetails) @@ -51,6 +55,8 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -64,12 +70,16 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex if experimentsDetails.ChaosServiceAccount == "" { experimentsDetails.ChaosServiceAccount, err = common.GetServiceAccount(experimentsDetails.ChaosNamespace, experimentsDetails.ChaosPodName, clients) if err != nil { + span.SetStatus(codes.Error, "could not experiment service account") + span.RecordError(err) return stacktrace.Propagate(err, "could not experiment service account") } } if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -78,14 +88,21 @@ func PrepareDiskFill(ctx context.Context, experimentsDetails *experimentTypes.Ex switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, execCommandDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, execCommandDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -103,6 +120,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -117,6 +136,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment runID := stringutils.GetRunID() if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, fmt.Sprintf("%s:%s:%s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), pod.Spec.NodeName, runID); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -126,6 +147,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -135,12 +158,15 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not wait for completion of helper pod") return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) } //Deleting all the helper pod for disk-fill chaos log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -157,6 +183,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -171,6 +199,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, strings.Join(targetsPerNode, ";"), node, runID); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -181,6 +211,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -190,12 +222,16 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not wait for completion of helper pod") + span.RecordError(err) return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) } //Deleting all the helper pod for disk-fill chaos log.Info("[Cleanup]: Deleting all the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -270,7 +306,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "unable to create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go b/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go index f1fef9c9e..ef1932e61 100644 --- a/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go +++ b/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go @@ -9,6 +9,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -34,6 +35,8 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen //Select node for docker-service-kill experimentsDetails.TargetNode, err = common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) if err != nil { + span.SetStatus(codes.Error, "could not get node name") + span.RecordError(err) return stacktrace.Propagate(err, "could not get node name") } } @@ -58,12 +61,16 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } // Creating the helper pod to perform docker-service-kill if err = createHelperPod(ctx, experimentsDetails, clients, chaosDetails, experimentsDetails.TargetNode); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -73,6 +80,8 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen log.Info("[Status]: Checking the status of the helper pod") if err = status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -80,6 +89,8 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -88,6 +99,8 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen log.Info("[Status]: Check for the node to be in NotReady state") if err = status.CheckNodeNotReadyState(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check for NOT READY state") + span.RecordError(err) return stacktrace.Propagate(err, "could not check for NOT READY state") } @@ -96,12 +109,16 @@ func PrepareDockerServiceKill(ctx context.Context, experimentsDetails *experimen podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) } //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeletePod(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod") } @@ -204,7 +221,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "unable to create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go index dbc504628..152d1463f 100644 --- a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go +++ b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-id/lib/ebs-loss-by-id.go @@ -18,6 +18,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -63,14 +64,21 @@ func PrepareEBSLossByID(ctx context.Context, experimentsDetails *experimentTypes switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = ebsloss.InjectChaosInSerialMode(ctx, experimentsDetails, volumeIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = ebsloss.InjectChaosInParallelMode(ctx, experimentsDetails, volumeIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection diff --git a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go index 6e8589129..a8107e0ca 100644 --- a/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go +++ b/chaoslib/litmus/ebs-loss/lib/ebs-loss-by-tag/lib/ebs-loss-by-tag.go @@ -18,6 +18,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -61,14 +62,21 @@ func PrepareEBSLossByTag(ctx context.Context, experimentsDetails *experimentType switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = ebsloss.InjectChaosInSerialMode(ctx, experimentsDetails, targetEBSVolumeIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = ebsloss.InjectChaosInParallelMode(ctx, experimentsDetails, targetEBSVolumeIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { diff --git a/chaoslib/litmus/ebs-loss/lib/ebs-loss.go b/chaoslib/litmus/ebs-loss/lib/ebs-loss.go index 8fa9bb0e4..5dc1d87d3 100644 --- a/chaoslib/litmus/ebs-loss/lib/ebs-loss.go +++ b/chaoslib/litmus/ebs-loss/lib/ebs-loss.go @@ -18,6 +18,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) // InjectChaosInSerialMode will inject the ebs loss chaos in serial mode which means one after other @@ -41,12 +42,16 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Get volume attachment details ec2InstanceID, device, err := ebs.GetVolumeAttachmentDetails(volumeID, experimentsDetails.VolumeTag, experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to get the attachment info") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the attachment info") } //Detaching the ebs volume from the instance log.Info("[Chaos]: Detaching the EBS volume from the instance") if err = ebs.EBSVolumeDetach(volumeID, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ebs detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs detachment failed") } @@ -55,6 +60,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Wait for ebs volume detachment log.Infof("[Wait]: Wait for EBS volume detachment for volume %v", volumeID) if err = ebs.WaitForVolumeDetachment(volumeID, ec2InstanceID, experimentsDetails.Region, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "ebs detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs detachment failed") } @@ -62,6 +69,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -73,6 +82,8 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Getting the EBS volume attachment status ebsState, err := ebs.GetEBSStatus(volumeID, ec2InstanceID, experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to get the ebs status") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the ebs status") } @@ -83,12 +94,16 @@ func InjectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Attaching the ebs volume from the instance log.Info("[Chaos]: Attaching the EBS volume back to the instance") if err = ebs.EBSVolumeAttach(volumeID, ec2InstanceID, device, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ebs attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs attachment failed") } //Wait for ebs volume attachment log.Infof("[Wait]: Wait for EBS volume attachment for %v volume", volumeID) if err = ebs.WaitForVolumeAttachment(volumeID, ec2InstanceID, experimentsDetails.Region, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "ebs attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs attachment failed") } } @@ -139,6 +154,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Detaching the ebs volume from the instance log.Info("[Chaos]: Detaching the EBS volume from the instance") if err := ebs.EBSVolumeDetach(volumeID, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ebs detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs detachment failed") } common.SetTargets(volumeID, "injected", "EBS", chaosDetails) @@ -146,6 +163,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Info]: Checking if the detachment process initiated") if err := ebs.CheckEBSDetachmentInitialisation(targetEBSVolumeIDList, ec2InstanceIDList, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "failed to initialise the detachment") + span.RecordError(err) return stacktrace.Propagate(err, "failed to initialise the detachment") } @@ -153,6 +172,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ebs volume detachment log.Infof("[Wait]: Wait for EBS volume detachment for volume %v", volumeID) if err := ebs.WaitForVolumeDetachment(volumeID, ec2InstanceIDList[i], experimentsDetails.Region, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "ebs detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs detachment failed") } } @@ -160,6 +181,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -173,6 +196,8 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Getting the EBS volume attachment status ebsState, err := ebs.GetEBSStatus(volumeID, ec2InstanceIDList[i], experimentsDetails.Region) if err != nil { + span.SetStatus(codes.Error, "failed to get the ebs status") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the ebs status") } @@ -183,12 +208,16 @@ func InjectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Attaching the ebs volume from the instance log.Info("[Chaos]: Attaching the EBS volume from the instance") if err = ebs.EBSVolumeAttach(volumeID, ec2InstanceIDList[i], deviceList[i], experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ebs attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs attachment failed") } //Wait for ebs volume attachment log.Infof("[Wait]: Wait for EBS volume attachment for volume %v", volumeID) if err = ebs.WaitForVolumeAttachment(volumeID, ec2InstanceIDList[i], experimentsDetails.Region, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "ebs attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "ebs attachment failed") } } diff --git a/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go b/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go index 5a844099a..1483f6fd7 100644 --- a/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go +++ b/chaoslib/litmus/ec2-terminate-by-id/lib/ec2-terminate-by-id.go @@ -21,6 +21,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var ( @@ -52,7 +53,10 @@ func PrepareEC2TerminateByID(ctx context.Context, experimentsDetails *experiment //get the instance id or list of instance ids instanceIDList := strings.Split(experimentsDetails.Ec2InstanceID, ",") if experimentsDetails.Ec2InstanceID == "" || len(instanceIDList) == 0 { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no EC2 instance ID found to terminate"} + span.SetStatus(codes.Error, "no EC2 instance ID found to terminate") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no EC2 instance ID found to terminate"} + span.RecordError(err) + return err } // watching for the abort signal and revert the chaos @@ -61,14 +65,21 @@ func PrepareEC2TerminateByID(ctx context.Context, experimentsDetails *experiment switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -109,6 +120,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Stopping the EC2 instance log.Info("[Chaos]: Stopping the desired EC2 instance") if err := awslib.EC2Stop(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } @@ -117,6 +130,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Wait for ec2 instance to completely stop log.Infof("[Wait]: Wait for EC2 instance '%v' to get in stopped state", id) if err := awslib.WaitForEC2Down(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } @@ -124,6 +139,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -136,12 +153,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment if experimentsDetails.ManagedNodegroup != "enable" { log.Info("[Chaos]: Starting back the EC2 instance") if err := awslib.EC2Start(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } //Wait for ec2 instance to get in running state log.Infof("[Wait]: Wait for EC2 instance '%v' to get in running state", id) if err := awslib.WaitForEC2Up(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } @@ -182,6 +203,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Stopping the EC2 instance log.Info("[Chaos]: Stopping the desired EC2 instance") if err := awslib.EC2Stop(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } common.SetTargets(id, "injected", "EC2", chaosDetails) @@ -191,6 +214,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ec2 instance to completely stop log.Infof("[Wait]: Wait for EC2 instance '%v' to get in stopped state", id) if err := awslib.WaitForEC2Down(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } common.SetTargets(id, "reverted", "EC2 Instance ID", chaosDetails) @@ -199,6 +224,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -213,6 +240,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, id := range instanceIDList { log.Info("[Chaos]: Starting back the EC2 instance") if err := awslib.EC2Start(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } @@ -221,6 +250,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ec2 instance to get in running state log.Infof("[Wait]: Wait for EC2 instance '%v' to get in running state", id) if err := awslib.WaitForEC2Up(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } diff --git a/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go b/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go index 2c34b83b4..eb2ac319a 100644 --- a/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go +++ b/chaoslib/litmus/ec2-terminate-by-tag/lib/ec2-terminate-by-tag.go @@ -22,6 +22,7 @@ import ( "github.com/palantir/stacktrace" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var inject, abort chan os.Signal @@ -56,14 +57,21 @@ func PrepareEC2TerminateByTag(ctx context.Context, experimentsDetails *experimen switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err := injectChaosInSerialMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err := injectChaosInParallelMode(ctx, experimentsDetails, instanceIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not valid") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -104,6 +112,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Stopping the EC2 instance log.Info("[Chaos]: Stopping the desired EC2 instance") if err := awslib.EC2Stop(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } @@ -112,6 +122,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Wait for ec2 instance to completely stop log.Infof("[Wait]: Wait for EC2 instance '%v' to get in stopped state", id) if err := awslib.WaitForEC2Down(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } @@ -119,6 +131,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -131,12 +145,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment if experimentsDetails.ManagedNodegroup != "enable" { log.Info("[Chaos]: Starting back the EC2 instance") if err := awslib.EC2Start(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } //Wait for ec2 instance to get in running state log.Infof("[Wait]: Wait for EC2 instance '%v' to get in running state", id) if err := awslib.WaitForEC2Up(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } @@ -176,6 +194,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Stopping the EC2 instance log.Info("[Chaos]: Stopping the desired EC2 instance") if err := awslib.EC2Stop(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } common.SetTargets(id, "injected", "EC2", chaosDetails) @@ -185,6 +205,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ec2 instance to completely stop log.Infof("[Wait]: Wait for EC2 instance '%v' to get in stopped state", id) if err := awslib.WaitForEC2Down(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to stop") } } @@ -192,6 +214,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -206,6 +230,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, id := range instanceIDList { log.Info("[Chaos]: Starting back the EC2 instance") if err := awslib.EC2Start(id, experimentsDetails.Region); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } @@ -214,6 +240,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for ec2 instance to get in running state log.Infof("[Wait]: Wait for EC2 instance '%v' to get in running state", id) if err := awslib.WaitForEC2Up(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.ManagedNodegroup, experimentsDetails.Region, id); err != nil { + span.SetStatus(codes.Error, "ec2 instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "ec2 instance failed to start") } } diff --git a/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go b/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go index 42efdf8bd..15032769e 100644 --- a/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go +++ b/chaoslib/litmus/gcp-vm-disk-loss-by-label/lib/gcp-vm-disk-loss-by-label.go @@ -21,6 +21,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "google.golang.org/api/compute/v1" ) @@ -69,14 +70,21 @@ func PrepareDiskVolumeLossByLabel(ctx context.Context, computeService *compute.S switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, computeService, experimentsDetails, diskVolumeNamesList, experimentsDetails.TargetDiskInstanceNamesList, experimentsDetails.Zones, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "failed to run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, computeService, experimentsDetails, diskVolumeNamesList, experimentsDetails.TargetDiskInstanceNamesList, experimentsDetails.Zones, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "failed to run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } } @@ -111,6 +119,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Detaching the disk volume from the instance log.Info("[Chaos]: Detaching the disk volume from the instance") if err = gcp.DiskVolumeDetach(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, zone, experimentsDetails.DeviceNamesList[i]); err != nil { + span.SetStatus(codes.Error, "failed to detach the disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment failed") } @@ -119,6 +129,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Wait for disk volume detachment log.Infof("[Wait]: Wait for disk volume detachment for volume %v", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeDetachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "failed to detach the disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to detach the disk volume from the vm instance") } @@ -126,6 +138,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -137,6 +151,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Getting the disk volume attachment status diskState, err := gcp.GetDiskVolumeState(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone) if err != nil { + span.SetStatus(codes.Error, "failed to get the disk volume status") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the disk volume status") } @@ -147,12 +163,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Attaching the disk volume to the instance log.Info("[Chaos]: Attaching the disk volume back to the instance") if err = gcp.DiskVolumeAttach(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, zone, experimentsDetails.DeviceNamesList[i], targetDiskVolumeNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } //Wait for disk volume attachment log.Infof("[Wait]: Wait for disk volume attachment for %v volume", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeAttachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "failed to attach the disk volume to the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to attach the disk volume to the vm instance") } } @@ -188,6 +208,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Detaching the disk volume from the instance log.Info("[Chaos]: Detaching the disk volume from the instance") if err = gcp.DiskVolumeDetach(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, zone, experimentsDetails.DeviceNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment failed") } @@ -199,6 +221,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Wait for disk volume detachment log.Infof("[Wait]: Wait for disk volume detachment for volume %v", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeDetachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to detach the disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to detach the disk volume from the vm instance") } } @@ -206,6 +230,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -219,6 +245,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Getting the disk volume attachment status diskState, err := gcp.GetDiskVolumeState(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone) if err != nil { + span.SetStatus(codes.Error, "failed to get the disk status") + span.RecordError(err) return stacktrace.Propagate(err, "failed to get the disk status") } @@ -229,12 +257,16 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Attaching the disk volume to the instance log.Info("[Chaos]: Attaching the disk volume to the instance") if err = gcp.DiskVolumeAttach(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, zone, experimentsDetails.DeviceNamesList[i], targetDiskVolumeNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } //Wait for disk volume attachment log.Infof("[Wait]: Wait for disk volume attachment for volume %v", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeAttachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, instanceNamesList[i], zone, experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to attach the disk volume to the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to attach the disk volume to the vm instance") } } diff --git a/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go b/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go index 6a99010d9..e19e8a59d 100644 --- a/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go +++ b/chaoslib/litmus/gcp-vm-disk-loss/lib/gcp-vm-disk-loss.go @@ -22,6 +22,7 @@ import ( "github.com/palantir/stacktrace" "github.com/pkg/errors" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "google.golang.org/api/compute/v1" ) @@ -59,6 +60,8 @@ func PrepareDiskVolumeLoss(ctx context.Context, computeService *compute.Service, //get the device names for the given disks if err := getDeviceNamesList(computeService, experimentsDetails, diskNamesList, diskZonesList); err != nil { + span.SetStatus(codes.Error, "failed to fetch the disk device names") + span.RecordError(err) return stacktrace.Propagate(err, "failed to fetch the disk device names") } @@ -74,14 +77,21 @@ func PrepareDiskVolumeLoss(ctx context.Context, computeService *compute.Service, switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, computeService, experimentsDetails, diskNamesList, diskZonesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, computeService, experimentsDetails, diskNamesList, diskZonesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } } @@ -114,6 +124,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Detaching the disk volume from the instance log.Infof("[Chaos]: Detaching %s disk volume from the instance", targetDiskVolumeNamesList[i]) if err = gcp.DiskVolumeDetach(computeService, experimentsDetails.TargetDiskInstanceNamesList[i], experimentsDetails.GCPProjectID, diskZonesList[i], experimentsDetails.DeviceNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment failed") } @@ -122,6 +134,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Wait for disk volume detachment log.Infof("[Wait]: Wait for %s disk volume detachment", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeDetachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i], experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to detach disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to detach disk volume from the vm instance") } @@ -129,6 +143,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -140,6 +156,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Getting the disk volume attachment status diskState, err := gcp.GetDiskVolumeState(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i]) if err != nil { + span.SetStatus(codes.Error, "failed to get disk status") + span.RecordError(err) return stacktrace.Propagate(err, fmt.Sprintf("failed to get %s disk volume status", targetDiskVolumeNamesList[i])) } @@ -150,12 +168,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Attaching the disk volume to the instance log.Infof("[Chaos]: Attaching %s disk volume back to the instance", targetDiskVolumeNamesList[i]) if err = gcp.DiskVolumeAttach(computeService, experimentsDetails.TargetDiskInstanceNamesList[i], experimentsDetails.GCPProjectID, diskZonesList[i], experimentsDetails.DeviceNamesList[i], targetDiskVolumeNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } //Wait for disk volume attachment log.Infof("[Wait]: Wait for %s disk volume attachment", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeAttachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i], experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to attach disk volume to the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to attach disk volume to the vm instance") } } @@ -188,6 +210,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Detaching the disk volume from the instance log.Infof("[Chaos]: Detaching %s disk volume from the instance", targetDiskVolumeNamesList[i]) if err = gcp.DiskVolumeDetach(computeService, experimentsDetails.TargetDiskInstanceNamesList[i], experimentsDetails.GCPProjectID, diskZonesList[i], experimentsDetails.DeviceNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk detachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk detachment failed") } @@ -199,6 +223,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Wait for disk volume detachment log.Infof("[Wait]: Wait for %s disk volume detachment", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeDetachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i], experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to detach disk volume from the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to detach disk volume from the vm instance") } } @@ -206,6 +232,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -219,6 +247,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Getting the disk volume attachment status diskState, err := gcp.GetDiskVolumeState(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i]) if err != nil { + span.SetStatus(codes.Error, "failed to get disk status") + span.RecordError(err) return errors.Errorf("failed to get the disk status, err: %v", err) } @@ -229,12 +259,16 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv //Attaching the disk volume to the instance log.Infof("[Chaos]: Attaching %s disk volume to the instance", targetDiskVolumeNamesList[i]) if err = gcp.DiskVolumeAttach(computeService, experimentsDetails.TargetDiskInstanceNamesList[i], experimentsDetails.GCPProjectID, diskZonesList[i], experimentsDetails.DeviceNamesList[i], targetDiskVolumeNamesList[i]); err != nil { + span.SetStatus(codes.Error, "disk attachment failed") + span.RecordError(err) return stacktrace.Propagate(err, "disk attachment failed") } //Wait for disk volume attachment log.Infof("[Wait]: Wait for %s disk volume attachment", targetDiskVolumeNamesList[i]) if err = gcp.WaitForVolumeAttachment(computeService, targetDiskVolumeNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.TargetDiskInstanceNamesList[i], diskZonesList[i], experimentsDetails.Delay, experimentsDetails.Timeout); err != nil { + span.SetStatus(codes.Error, "unable to attach disk volume to the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to attach disk volume to the vm instance") } } diff --git a/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go b/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go index 644a02137..3672b9d96 100644 --- a/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go +++ b/chaoslib/litmus/gcp-vm-instance-stop-by-label/lib/gcp-vm-instance-stop-by-label.go @@ -21,6 +21,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "google.golang.org/api/compute/v1" ) @@ -56,14 +57,21 @@ func PrepareVMStopByLabel(ctx context.Context, computeService *compute.Service, switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err := injectChaosInSerialMode(ctx, computeService, experimentsDetails, instanceNamesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err := injectChaosInParallelMode(ctx, computeService, experimentsDetails, instanceNamesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -105,6 +113,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Stopping the VM instance log.Infof("[Chaos]: Stopping %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStop(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "VM instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "VM instance failed to stop") } @@ -113,6 +123,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Wait for VM instance to completely stop log.Infof("[Wait]: Wait for VM instance %s to stop", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceDown(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "VM instance failed to fully shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to fully shutdown") } @@ -120,6 +132,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -134,6 +148,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // wait for VM instance to get in running state log.Infof("[Wait]: Wait for VM instance %s to get in RUNNING state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "unable to start the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start %s vm instance") } @@ -142,12 +158,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // starting the VM instance log.Infof("[Chaos]: Starting back %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStart(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "vm instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to start") } // wait for VM instance to get in running state log.Infof("[Wait]: Wait for VM instance %s to get in RUNNING state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "unable to start the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start %s vm instance") } } @@ -191,6 +211,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // stopping the VM instance log.Infof("[Chaos]: Stopping %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStop(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "vm instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to stop") } @@ -202,6 +224,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // wait for VM instance to completely stop log.Infof("[Wait]: Wait for VM instance %s to get in stopped state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceDown(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "vm instance failed to fully shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to fully shutdown") } } @@ -209,6 +233,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -225,6 +251,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Infof("[Wait]: Wait for VM instance '%v' to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "unable to start the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the vm instance") } @@ -238,6 +266,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Info("[Chaos]: Starting back the VM instance") if err := gcplib.VMInstanceStart(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "vm instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to start") } } @@ -247,6 +277,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Infof("[Wait]: Wait for VM instance '%v' to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { + span.SetStatus(codes.Error, "unable to start the vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start the vm instance") } diff --git a/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go b/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go index 281e1c211..9ba8a0900 100644 --- a/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go +++ b/chaoslib/litmus/gcp-vm-instance-stop/lib/gcp-vm-instance-stop.go @@ -21,6 +21,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "google.golang.org/api/compute/v1" ) @@ -61,14 +62,21 @@ func PrepareVMStop(ctx context.Context, computeService *compute.Service, experim switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, computeService, experimentsDetails, instanceNamesList, instanceZonesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, computeService, experimentsDetails, instanceNamesList, instanceZonesList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } // wait for the ramp time after chaos injection @@ -110,6 +118,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Stopping the VM instance log.Infof("[Chaos]: Stopping %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStop(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to stop") } @@ -118,6 +128,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic //Wait for VM instance to completely stop log.Infof("[Wait]: Wait for VM instance %s to get in stopped state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceDown(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to fully shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to fully shutdown") } @@ -125,6 +137,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // the OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -139,12 +153,16 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // starting the VM instance log.Infof("[Chaos]: Starting back %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStart(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to start") } // wait for VM instance to get in running state log.Infof("[Wait]: Wait for VM instance %s to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "unable to start vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start vm instance") } @@ -153,6 +171,8 @@ func injectChaosInSerialMode(ctx context.Context, computeService *compute.Servic // wait for VM instance to get in running state log.Infof("[Wait]: Wait for VM instance %s to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "unable to start vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start vm instance") } } @@ -197,6 +217,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // stopping the VM instance log.Infof("[Chaos]: Stopping %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStop(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to stop") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to stop") } @@ -208,6 +230,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // wait for VM instance to completely stop log.Infof("[Wait]: Wait for VM instance %s to get in stopped state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceDown(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to fully shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to fully shutdown") } } @@ -215,6 +239,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -230,6 +256,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv for i := range instanceNamesList { log.Infof("[Chaos]: Starting back %s VM instance", instanceNamesList[i]) if err := gcplib.VMInstanceStart(computeService, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "vm instance failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm instance failed to start") } } @@ -239,6 +267,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Infof("[Wait]: Wait for VM instance %s to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "unable to start vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start vm instance") } @@ -252,6 +282,8 @@ func injectChaosInParallelMode(ctx context.Context, computeService *compute.Serv log.Infof("[Wait]: Wait for VM instance %s to get in running state", instanceNamesList[i]) if err := gcplib.WaitForVMInstanceUp(computeService, experimentsDetails.Timeout, experimentsDetails.Delay, instanceNamesList[i], experimentsDetails.GCPProjectID, instanceZonesList[i]); err != nil { + span.SetStatus(codes.Error, "unable to start vm instance") + span.RecordError(err) return stacktrace.Propagate(err, "unable to start vm instance") } diff --git a/chaoslib/litmus/http-chaos/helper/http-helper.go b/chaoslib/litmus/http-chaos/helper/http-helper.go index b544df448..8be34154b 100644 --- a/chaoslib/litmus/http-chaos/helper/http-helper.go +++ b/chaoslib/litmus/http-chaos/helper/http-helper.go @@ -3,10 +3,6 @@ package helper import ( "context" "fmt" - "github.com/litmuschaos/litmus-go/pkg/cerrors" - "github.com/litmuschaos/litmus-go/pkg/telemetry" - "github.com/palantir/stacktrace" - "go.opentelemetry.io/otel" "os" "os/signal" "strconv" @@ -14,6 +10,12 @@ import ( "syscall" "time" + "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/telemetry" + "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" + clients "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/http-chaos/types" @@ -67,8 +69,12 @@ func Helper(ctx context.Context, clients clients.ClientSets) { if err != nil { // update failstep inside chaosresult if resultErr := result.UpdateFailedStepFromHelper(&resultDetails, &chaosDetails, clients, err); resultErr != nil { + span.SetStatus(codes.Error, "Helper pod failed") + span.RecordError(resultErr) log.Fatalf("helper pod failed, err: %v, resultErr: %v", err, resultErr) } + span.SetStatus(codes.Error, "Helper pod failed") + span.RecordError(err) log.Fatalf("helper pod failed, err: %v", err) } } diff --git a/chaoslib/litmus/http-chaos/lib/http-chaos.go b/chaoslib/litmus/http-chaos/lib/http-chaos.go index 59323f0b8..011ef403f 100644 --- a/chaoslib/litmus/http-chaos/lib/http-chaos.go +++ b/chaoslib/litmus/http-chaos/lib/http-chaos.go @@ -11,6 +11,8 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/http-chaos/types" @@ -27,18 +29,24 @@ import ( // PrepareAndInjectChaos contains the preparation & injection steps func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails, args string) error { + span := trace.SpanFromContext(ctx) var err error // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } //set up the tunables if provided in range SetChaosTunables(experimentsDetails) targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -52,12 +60,16 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy if experimentsDetails.ChaosServiceAccount == "" { experimentsDetails.ChaosServiceAccount, err = common.GetServiceAccount(experimentsDetails.ChaosNamespace, experimentsDetails.ChaosPodName, clients) if err != nil { + span.SetStatus(codes.Error, "could not get experiment service account") + span.RecordError(err) return stacktrace.Propagate(err, "could not experiment service account") } } if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -67,14 +79,21 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetPodList, args, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetPodList, args, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } return nil @@ -108,6 +127,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment runID := stringutils.GetRunID() if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, fmt.Sprintf("%s:%s:%s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), pod.Spec.NodeName, runID, args); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -117,6 +138,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -126,12 +149,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) } //Deleting all the helper pod for http chaos log.Info("[Cleanup]: Deleting the helper pod") if err := common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -147,6 +174,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return err } } @@ -161,6 +190,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, strings.Join(targetsPerNode, ";"), node, runID, args); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -171,6 +202,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -180,12 +213,16 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) } // Deleting all the helper pod for http chaos log.Info("[Cleanup]: Deleting all the helper pod") if err := common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -266,7 +303,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go b/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go index 79ce56b30..aaf929182 100644 --- a/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go +++ b/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go @@ -19,6 +19,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/stringutils" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" corev1 "k8s.io/api/core/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -35,6 +36,8 @@ func experimentExecution(ctx context.Context, experimentsDetails *experimentType // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "Probe failed") + span.RecordError(err) return err } } @@ -43,6 +46,8 @@ func experimentExecution(ctx context.Context, experimentsDetails *experimentType // creating the helper pod to perform k6-loadgen chaos if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, runID); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -52,6 +57,8 @@ func experimentExecution(ctx context.Context, experimentsDetails *experimentType log.Info("[Status]: Checking the status of the helper pod") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -61,12 +68,17 @@ func experimentExecution(ctx context.Context, experimentsDetails *experimentType podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + err := common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting all the helper pod for k6-loadgen chaos log.Info("[Cleanup]: Deleting all the helper pods") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -86,6 +98,8 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper // Starting the k6-loadgen experiment if err := experimentExecution(ctx, experimentsDetails, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not execute chaos") + span.RecordError(err) return stacktrace.Propagate(err, "could not execute chaos") } @@ -178,7 +192,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/kafka-broker-pod-failure/lib/pod-delete.go b/chaoslib/litmus/kafka-broker-pod-failure/lib/pod-delete.go index d828f614d..465ad036c 100644 --- a/chaoslib/litmus/kafka-broker-pod-failure/lib/pod-delete.go +++ b/chaoslib/litmus/kafka-broker-pod-failure/lib/pod-delete.go @@ -12,6 +12,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/workloads" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -39,14 +40,21 @@ func PreparePodDelete(ctx context.Context, experimentsDetails *experimentTypes.E switch strings.ToLower(experimentsDetails.ChaoslibDetail.Sequence) { case "serial": if err := injectChaosInSerialMode(ctx, experimentsDetails, clients, chaosDetails, eventsDetails, resultDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err := injectChaosInParallelMode(ctx, experimentsDetails, clients, chaosDetails, eventsDetails, resultDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.ChaoslibDetail.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.ChaoslibDetail.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -64,6 +72,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -77,12 +87,17 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.KafkaBroker == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "please provide one of the appLabel or KAFKA_BROKER"} + span.SetStatus(codes.Error, "target selection error") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "please provide one of the appLabel or KAFKA_BROKER"} + span.RecordError(err) + return err } podsAffectedPerc, _ := strconv.Atoi(experimentsDetails.ChaoslibDetail.PodsAffectedPerc) targetPodList, err := common.GetPodList(experimentsDetails.KafkaBroker, podsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return err } @@ -90,6 +105,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment for _, pod := range targetPodList.Items { kind, parentName, err := workloads.GetPodOwnerTypeAndName(&pod, clients.DynamicClient) if err != nil { + span.SetStatus(codes.Error, "could not get pod owner name and kind") + span.RecordError(err) return err } common.SetParentName(parentName, kind, pod.Namespace, chaosDetails) @@ -116,12 +133,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment err = clients.KubeClient.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) } if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{podName: %s, namespace: %s}", pod.Name, pod.Namespace), Reason: fmt.Sprintf("failed to delete the target pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not delete the target pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{podName: %s, namespace: %s}", pod.Name, pod.Namespace), Reason: fmt.Sprintf("failed to delete the target pod: %s", err.Error())} + return err } switch chaosDetails.Randomness { case true: if err := common.RandomInterval(experimentsDetails.ChaoslibDetail.ChaosInterval); err != nil { + span.SetStatus(codes.Error, "could not get random chaos interval") + span.RecordError(err) return stacktrace.Propagate(err, "could not get random chaos interval") } default: @@ -142,6 +163,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment Namespace: parent.Namespace, } if err = status.CheckUnTerminatedPodStatusesByWorkloadName(target, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not check pod statuses by workload names") + span.RecordError(err) return stacktrace.Propagate(err, "could not check pod statuses by workload names") } } @@ -160,6 +183,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -178,6 +203,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podsAffectedPerc, _ := strconv.Atoi(experimentsDetails.ChaoslibDetail.PodsAffectedPerc) targetPodList, err := common.GetPodList(experimentsDetails.KafkaBroker, podsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -185,6 +212,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, pod := range targetPodList.Items { kind, parentName, err := workloads.GetPodOwnerTypeAndName(&pod, clients.DynamicClient) if err != nil { + span.SetStatus(codes.Error, "could not get pod owner name and kind") + span.RecordError(err) return stacktrace.Propagate(err, "could not get pod owner name and kind") } common.SetParentName(parentName, kind, pod.Namespace, chaosDetails) @@ -211,13 +240,17 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime err = clients.KubeClient.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) } if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{podName: %s, namespace: %s}", pod.Name, pod.Namespace), Reason: fmt.Sprintf("failed to delete the target pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not delete the target pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{podName: %s, namespace: %s}", pod.Name, pod.Namespace), Reason: fmt.Sprintf("failed to delete the target pod: %s", err.Error())} + return err } } switch chaosDetails.Randomness { case true: if err := common.RandomInterval(experimentsDetails.ChaoslibDetail.ChaosInterval); err != nil { + span.SetStatus(codes.Error, "could not get random chaos interval") + span.RecordError(err) return stacktrace.Propagate(err, "could not get random chaos interval") } default: @@ -238,6 +271,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime Namespace: parent.Namespace, } if err = status.CheckUnTerminatedPodStatusesByWorkloadName(target, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not check pod statuses by workload names") + span.RecordError(err) return stacktrace.Propagate(err, "could not check pod statuses by workload names") } } diff --git a/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go b/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go index 350a8b390..e607e07a2 100644 --- a/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go +++ b/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go @@ -9,6 +9,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -58,12 +59,16 @@ func PrepareKubeletKill(ctx context.Context, experimentsDetails *experimentTypes if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } // Creating the helper pod to perform node memory hog if err = createHelperPod(ctx, experimentsDetails, clients, chaosDetails, experimentsDetails.TargetNode); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -73,6 +78,8 @@ func PrepareKubeletKill(ctx context.Context, experimentsDetails *experimentTypes log.Info("[Status]: Checking the status of the helper pod") if err = status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -82,6 +89,8 @@ func PrepareKubeletKill(ctx context.Context, experimentsDetails *experimentTypes if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -90,6 +99,8 @@ func PrepareKubeletKill(ctx context.Context, experimentsDetails *experimentTypes log.Info("[Status]: Check for the node to be in NotReady state") if err = status.CheckNodeNotReadyState(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check for NOT READY state") + span.RecordError(err) return stacktrace.Propagate(err, "could not check for NOT READY state") } @@ -98,12 +109,17 @@ func PrepareKubeletKill(ctx context.Context, experimentsDetails *experimentTypes podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) + err := common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeletePod(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod") } @@ -206,7 +222,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/network-chaos/helper/netem.go b/chaoslib/litmus/network-chaos/helper/netem.go index b5d200c24..cb3ff1051 100644 --- a/chaoslib/litmus/network-chaos/helper/netem.go +++ b/chaoslib/litmus/network-chaos/helper/netem.go @@ -3,11 +3,6 @@ package helper import ( "context" "fmt" - "github.com/litmuschaos/litmus-go/pkg/cerrors" - "github.com/litmuschaos/litmus-go/pkg/events" - "github.com/litmuschaos/litmus-go/pkg/telemetry" - "github.com/palantir/stacktrace" - "go.opentelemetry.io/otel" "os" "os/exec" "os/signal" @@ -16,6 +11,13 @@ import ( "syscall" "time" + "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/events" + "github.com/litmuschaos/litmus-go/pkg/telemetry" + "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" + clients "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/network-chaos/types" "github.com/litmuschaos/litmus-go/pkg/log" @@ -74,8 +76,12 @@ func Helper(ctx context.Context, clients clients.ClientSets) { if err != nil { // update failstep inside chaosresult if resultErr := result.UpdateFailedStepFromHelper(&resultDetails, &chaosDetails, clients, err); resultErr != nil { + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(resultErr) log.Fatalf("helper pod failed, err: %v, resultErr: %v", err, resultErr) } + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) log.Fatalf("helper pod failed, err: %v", err) } diff --git a/chaoslib/litmus/network-chaos/lib/network-chaos.go b/chaoslib/litmus/network-chaos/lib/network-chaos.go index dc73645f8..eae4e5d07 100644 --- a/chaoslib/litmus/network-chaos/lib/network-chaos.go +++ b/chaoslib/litmus/network-chaos/lib/network-chaos.go @@ -12,6 +12,8 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" k8serrors "k8s.io/apimachinery/pkg/api/errors" "github.com/litmuschaos/litmus-go/pkg/clients" @@ -33,12 +35,16 @@ var destIps string // PrepareAndInjectChaos contains the preparation & injection steps func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails, args string) error { + span := trace.SpanFromContext(ctx) var err error // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } //set up the tunables if provided in range SetChaosTunables(experimentsDetails) @@ -46,6 +52,8 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -59,12 +67,16 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy if experimentsDetails.ChaosServiceAccount == "" { experimentsDetails.ChaosServiceAccount, err = common.GetServiceAccount(experimentsDetails.ChaosNamespace, experimentsDetails.ChaosPodName, clients) if err != nil { + span.SetStatus(codes.Error, "could not experiment service account") + span.RecordError(err) return stacktrace.Propagate(err, "could not experiment service account") } } if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -73,14 +85,21 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, args, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, args, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } return nil @@ -94,6 +113,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -103,6 +124,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment serviceMesh, err := setDestIps(pod, experimentsDetails, clients) if err != nil { + span.SetStatus(codes.Error, "could not set destination ips") + span.RecordError(err) return stacktrace.Propagate(err, "could not set destination ips") } @@ -114,6 +137,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment runID := stringutils.GetRunID() if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, fmt.Sprintf("%s:%s:%s:%s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer, serviceMesh), pod.Spec.NodeName, runID, args); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -123,6 +148,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -132,12 +159,17 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) + err := common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting all the helper pod for network chaos log.Info("[Cleanup]: Deleting the helper pod") if err := common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -154,12 +186,16 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } targets, err := filterPodsForNodes(targetPodList, experimentsDetails, clients) if err != nil { + span.SetStatus(codes.Error, "could not filter target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not filter target pods") } @@ -172,6 +208,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, strings.Join(targetsPerNode, ";"), node, runID, args); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -182,6 +220,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -191,12 +231,17 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) + err := common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting all the helper pod for container-kill chaos log.Info("[Cleanup]: Deleting all the helper pod") if err := common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -277,7 +322,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go b/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go index 57bd9adb0..fccb7f1fc 100644 --- a/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go +++ b/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go @@ -10,6 +10,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -50,6 +51,8 @@ func PrepareNodeCPUHog(ctx context.Context, experimentsDetails *experimentTypes. nodesAffectedPerc, _ := strconv.Atoi(experimentsDetails.NodesAffectedPerc) targetNodeList, err := common.GetNodeList(experimentsDetails.TargetNodes, experimentsDetails.NodeLabel, nodesAffectedPerc, clients) if err != nil { + span.SetStatus(codes.Error, "could not get node list") + span.RecordError(err) return stacktrace.Propagate(err, "could not get node list") } @@ -60,6 +63,8 @@ func PrepareNodeCPUHog(ctx context.Context, experimentsDetails *experimentTypes. if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -67,10 +72,14 @@ func PrepareNodeCPUHog(ctx context.Context, experimentsDetails *experimentTypes. switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetNodeList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetNodeList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: @@ -95,6 +104,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "Probe failed") + span.RecordError(err) return err } } @@ -110,6 +121,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // When number of cpu cores for hogging is not defined , it will take it from node capacity if nodeCPUCores == "0" { if err := setCPUCapacity(experimentsDetails, appNode, clients); err != nil { + span.SetStatus(codes.Error, "could not get node cpu capacity") + span.RecordError(err) return stacktrace.Propagate(err, "could not get node cpu capacity") } } @@ -123,6 +136,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Creating the helper pod to perform node cpu hog if err := createHelperPod(ctx, experimentsDetails, chaosDetails, appNode, clients); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -132,6 +147,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pod") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -142,12 +159,17 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.ExperimentName) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) + err := common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, false) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err := common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -164,6 +186,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "Probe failed") + span.RecordError(err) return err } } @@ -181,6 +205,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // When number of cpu cores for hogging is not defined , it will take it from node capacity if nodeCPUCores == "0" { if err := setCPUCapacity(experimentsDetails, appNode, clients); err != nil { + span.SetStatus(codes.Error, "could not get node cpu capacity") + span.RecordError(err) return stacktrace.Propagate(err, "could not get node cpu capacity") } } @@ -192,6 +218,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // Creating the helper pod to perform node cpu hog if err := createHelperPod(ctx, experimentsDetails, chaosDetails, appNode, clients); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -202,6 +230,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -214,12 +244,17 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) + err := common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -284,7 +319,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/node-drain/lib/node-drain.go b/chaoslib/litmus/node-drain/lib/node-drain.go index b46dd34bf..c39623164 100644 --- a/chaoslib/litmus/node-drain/lib/node-drain.go +++ b/chaoslib/litmus/node-drain/lib/node-drain.go @@ -15,6 +15,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -59,6 +60,8 @@ func PrepareNodeDrain(ctx context.Context, experimentsDetails *experimentTypes.E //Select node for kubelet-service-kill experimentsDetails.TargetNode, err = common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) if err != nil { + span.SetStatus(codes.Error, "could not get node name") + span.RecordError(err) return stacktrace.Propagate(err, "could not get node name") } } @@ -72,6 +75,8 @@ func PrepareNodeDrain(ctx context.Context, experimentsDetails *experimentTypes.E // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "Probe failed") + span.RecordError(err) return err } } @@ -83,7 +88,10 @@ func PrepareNodeDrain(ctx context.Context, experimentsDetails *experimentTypes.E if err := drainNode(ctx, experimentsDetails, clients, chaosDetails); err != nil { log.Info("[Revert]: Reverting chaos because error during draining of node") if uncordonErr := uncordonNode(experimentsDetails, clients, chaosDetails); uncordonErr != nil { - return cerrors.PreserveError{ErrString: fmt.Sprintf("[%s,%s]", stacktrace.RootCause(err).Error(), stacktrace.RootCause(uncordonErr).Error())} + span.SetStatus(codes.Error, "could not drain node") + err := cerrors.PreserveError{ErrString: fmt.Sprintf("[%s,%s]", stacktrace.RootCause(err).Error(), stacktrace.RootCause(uncordonErr).Error())} + span.RecordError(err) + return err } return stacktrace.Propagate(err, "could not drain node") } @@ -93,7 +101,10 @@ func PrepareNodeDrain(ctx context.Context, experimentsDetails *experimentTypes.E if err = status.AUTStatusCheck(clients, chaosDetails); err != nil { log.Info("[Revert]: Reverting chaos because application status check failed") if uncordonErr := uncordonNode(experimentsDetails, clients, chaosDetails); uncordonErr != nil { - return cerrors.PreserveError{ErrString: fmt.Sprintf("[%s,%s]", stacktrace.RootCause(err).Error(), stacktrace.RootCause(uncordonErr).Error())} + span.SetStatus(codes.Error, "could not check application status") + err := cerrors.PreserveError{ErrString: fmt.Sprintf("[%s,%s]", stacktrace.RootCause(err).Error(), stacktrace.RootCause(uncordonErr).Error())} + span.RecordError(err) + return err } return err } @@ -104,7 +115,10 @@ func PrepareNodeDrain(ctx context.Context, experimentsDetails *experimentTypes.E if err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Info("[Revert]: Reverting chaos because auxiliary application status check failed") if uncordonErr := uncordonNode(experimentsDetails, clients, chaosDetails); uncordonErr != nil { - return cerrors.PreserveError{ErrString: fmt.Sprintf("[%s,%s]", stacktrace.RootCause(err).Error(), stacktrace.RootCause(uncordonErr).Error())} + span.SetStatus(codes.Error, "could not check auxiliary application status") + err := cerrors.PreserveError{ErrString: fmt.Sprintf("[%s,%s]", stacktrace.RootCause(err).Error(), stacktrace.RootCause(uncordonErr).Error())} + span.RecordError(err) + return err } return err } @@ -118,6 +132,8 @@ func PrepareNodeDrain(ctx context.Context, experimentsDetails *experimentTypes.E // Uncordon the application node if err := uncordonNode(experimentsDetails, clients, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not uncordon the target node") + span.RecordError(err) return stacktrace.Propagate(err, "could not uncordon the target node") } @@ -143,6 +159,8 @@ func drainNode(ctx context.Context, experimentsDetails *experimentTypes.Experime command := exec.Command("kubectl", "drain", experimentsDetails.TargetNode, "--ignore-daemonsets", "--delete-emptydir-data", "--force", "--timeout", strconv.Itoa(experimentsDetails.ChaosDuration)+"s") if err := common.RunCLICommands(command, "", fmt.Sprintf("{node: %s}", experimentsDetails.TargetNode), "failed to drain the target node", cerrors.ErrorTypeChaosInject); err != nil { + span.SetStatus(codes.Error, "could not drain the target node") + span.RecordError(err) return err } @@ -154,10 +172,16 @@ func drainNode(ctx context.Context, experimentsDetails *experimentTypes.Experime Try(func(attempt uint) error { nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(context.Background(), experimentsDetails.TargetNode, v1.GetOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{node: %s}", experimentsDetails.TargetNode), Reason: err.Error()} + span.SetStatus(codes.Error, "could not get node details") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{node: %s}", experimentsDetails.TargetNode), Reason: err.Error()} + span.RecordError(err) + return err } if !nodeSpec.Spec.Unschedulable { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{node: %s}", experimentsDetails.TargetNode), Reason: "node is not in unschedule state"} + span.SetStatus(codes.Error, "target node is not in unschedule state") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{node: %s}", experimentsDetails.TargetNode), Reason: "target node is not in unschedule state"} + span.RecordError(err) + return err } return nil }) diff --git a/chaoslib/litmus/node-io-stress/lib/node-io-stress.go b/chaoslib/litmus/node-io-stress/lib/node-io-stress.go index 94b269a81..075f576d5 100644 --- a/chaoslib/litmus/node-io-stress/lib/node-io-stress.go +++ b/chaoslib/litmus/node-io-stress/lib/node-io-stress.go @@ -7,20 +7,20 @@ import ( "strings" "github.com/litmuschaos/litmus-go/pkg/cerrors" - "github.com/litmuschaos/litmus-go/pkg/telemetry" - "github.com/palantir/stacktrace" - "go.opentelemetry.io/otel" - "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/node-io-stress/types" "github.com/litmuschaos/litmus-go/pkg/log" "github.com/litmuschaos/litmus-go/pkg/probe" "github.com/litmuschaos/litmus-go/pkg/status" + "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/litmuschaos/litmus-go/pkg/utils/stringutils" + "github.com/palantir/stacktrace" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" apiv1 "k8s.io/api/core/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -60,6 +60,8 @@ func PrepareNodeIOStress(ctx context.Context, experimentsDetails *experimentType if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -67,14 +69,21 @@ func PrepareNodeIOStress(ctx context.Context, experimentsDetails *experimentType switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetNodeList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetNodeList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -115,6 +124,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Creating the helper pod to perform node io stress if err := createHelperPod(ctx, experimentsDetails, chaosDetails, appNode, clients); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -124,6 +135,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pod") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } common.SetTargets(appNode, "injected", "node", chaosDetails) @@ -133,12 +146,17 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment common.SetTargets(appNode, "reverted", "node", chaosDetails) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) + err := common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, false) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err := common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -153,6 +171,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -175,6 +195,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // Creating the helper pod to perform node io stress if err := createHelperPod(ctx, experimentsDetails, chaosDetails, appNode, clients); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -185,6 +207,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pod") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -199,12 +223,17 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) + err := common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -251,7 +280,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go b/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go index 6562d817f..6dba1fe1a 100644 --- a/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go +++ b/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go @@ -10,6 +10,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -52,6 +53,8 @@ func PrepareNodeMemoryHog(ctx context.Context, experimentsDetails *experimentTyp nodesAffectedPerc, _ := strconv.Atoi(experimentsDetails.NodesAffectedPerc) targetNodeList, err := common.GetNodeList(experimentsDetails.TargetNodes, experimentsDetails.NodeLabel, nodesAffectedPerc, clients) if err != nil { + span.SetStatus(codes.Error, "could not get node list") + span.RecordError(err) return stacktrace.Propagate(err, "could not get node list") } @@ -62,6 +65,8 @@ func PrepareNodeMemoryHog(ctx context.Context, experimentsDetails *experimentTyp if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -69,14 +74,21 @@ func PrepareNodeMemoryHog(ctx context.Context, experimentsDetails *experimentTyp switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetNodeList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetNodeList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -95,6 +107,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -118,17 +132,23 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Getting node memory details memoryCapacity, memoryAllocatable, err := getNodeMemoryDetails(appNode, clients) if err != nil { + span.SetStatus(codes.Error, "could not get node memory details") + span.RecordError(err) return stacktrace.Propagate(err, "could not get node memory details") } //Getting the exact memory value to exhaust MemoryConsumption, err := calculateMemoryConsumption(experimentsDetails, memoryCapacity, memoryAllocatable) if err != nil { + span.SetStatus(codes.Error, "could not calculate memory consumption value") + span.RecordError(err) return stacktrace.Propagate(err, "could not calculate memory consumption value") } // Creating the helper pod to perform node memory hog if err = createHelperPod(ctx, experimentsDetails, chaosDetails, appNode, clients, MemoryConsumption); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -138,6 +158,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pod") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -148,15 +170,23 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, experimentsDetails.ExperimentName) if err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) + err := common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } else if podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return errors.Errorf("helper pod status is %v", podStatus) + err := errors.Errorf("helper pod status is %v", podStatus) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err := common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -171,6 +201,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -194,17 +226,23 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Getting node memory details memoryCapacity, memoryAllocatable, err := getNodeMemoryDetails(appNode, clients) if err != nil { + span.SetStatus(codes.Error, "could not get node memory details") + span.RecordError(err) return stacktrace.Propagate(err, "could not get node memory details") } //Getting the exact memory value to exhaust MemoryConsumption, err := calculateMemoryConsumption(experimentsDetails, memoryCapacity, memoryAllocatable) if err != nil { + span.SetStatus(codes.Error, "could not calculate memory consumption value") + span.RecordError(err) return stacktrace.Propagate(err, "could not calculate memory consumption value") } // Creating the helper pod to perform node memory hog if err = createHelperPod(ctx, experimentsDetails, chaosDetails, appNode, clients, MemoryConsumption); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -215,6 +253,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pod") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -227,12 +267,17 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) + err := common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -367,7 +412,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/node-restart/lib/node-restart.go b/chaoslib/litmus/node-restart/lib/node-restart.go index d4750912e..d85f19ac0 100644 --- a/chaoslib/litmus/node-restart/lib/node-restart.go +++ b/chaoslib/litmus/node-restart/lib/node-restart.go @@ -10,6 +10,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -49,6 +50,8 @@ func PrepareNodeRestart(ctx context.Context, experimentsDetails *experimentTypes //Select node for node-restart experimentsDetails.TargetNode, err = common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) if err != nil { + span.SetStatus(codes.Error, "could not get node name") + span.RecordError(err) return stacktrace.Propagate(err, "could not get node name") } } @@ -57,6 +60,8 @@ func PrepareNodeRestart(ctx context.Context, experimentsDetails *experimentTypes if experimentsDetails.TargetNodeIP == "" { experimentsDetails.TargetNodeIP, err = getInternalIP(experimentsDetails.TargetNode, clients) if err != nil { + span.SetStatus(codes.Error, "could not get internal ip") + span.RecordError(err) return stacktrace.Propagate(err, "could not get internal ip") } } @@ -80,12 +85,16 @@ func PrepareNodeRestart(ctx context.Context, experimentsDetails *experimentTypes events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return err } } // Creating the helper pod to perform node restart if err = createHelperPod(ctx, experimentsDetails, chaosDetails, clients); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -95,6 +104,8 @@ func PrepareNodeRestart(ctx context.Context, experimentsDetails *experimentTypes log.Info("[Status]: Checking the status of the helper pod") if err = status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -104,6 +115,8 @@ func PrepareNodeRestart(ctx context.Context, experimentsDetails *experimentTypes if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -113,12 +126,17 @@ func PrepareNodeRestart(ctx context.Context, experimentsDetails *experimentTypes podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, false) + err := common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeletePod(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod") } @@ -216,7 +234,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/node-taint/lib/node-taint.go b/chaoslib/litmus/node-taint/lib/node-taint.go index 8efd0a289..5f3caf636 100644 --- a/chaoslib/litmus/node-taint/lib/node-taint.go +++ b/chaoslib/litmus/node-taint/lib/node-taint.go @@ -13,6 +13,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -56,6 +57,8 @@ func PrepareNodeTaint(ctx context.Context, experimentsDetails *experimentTypes.E //Select node for kubelet-service-kill experimentsDetails.TargetNode, err = common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) if err != nil { + span.SetStatus(codes.Error, "could not get node name") + span.RecordError(err) return stacktrace.Propagate(err, "could not get node name") } } @@ -69,6 +72,8 @@ func PrepareNodeTaint(ctx context.Context, experimentsDetails *experimentTypes.E // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "Probe failed") + span.RecordError(err) return err } } @@ -78,6 +83,8 @@ func PrepareNodeTaint(ctx context.Context, experimentsDetails *experimentTypes.E // taint the application node if err := taintNode(ctx, experimentsDetails, clients, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not taint node") + span.RecordError(err) return stacktrace.Propagate(err, "could not taint node") } @@ -86,7 +93,10 @@ func PrepareNodeTaint(ctx context.Context, experimentsDetails *experimentTypes.E if err = status.AUTStatusCheck(clients, chaosDetails); err != nil { log.Info("[Revert]: Reverting chaos because application status check failed") if taintErr := removeTaintFromNode(experimentsDetails, clients, chaosDetails); taintErr != nil { - return cerrors.PreserveError{ErrString: fmt.Sprintf("[%s,%s]", stacktrace.RootCause(err).Error(), stacktrace.RootCause(taintErr).Error())} + span.SetStatus(codes.Error, "could not remove taint from node") + err := cerrors.PreserveError{ErrString: fmt.Sprintf("[%s,%s]", stacktrace.RootCause(err).Error(), stacktrace.RootCause(taintErr).Error())} + span.RecordError(err) + return err } return err } @@ -96,7 +106,10 @@ func PrepareNodeTaint(ctx context.Context, experimentsDetails *experimentTypes.E if err = status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Info("[Revert]: Reverting chaos because auxiliary application status check failed") if taintErr := removeTaintFromNode(experimentsDetails, clients, chaosDetails); taintErr != nil { - return cerrors.PreserveError{ErrString: fmt.Sprintf("[%s,%s]", stacktrace.RootCause(err).Error(), stacktrace.RootCause(taintErr).Error())} + span.SetStatus(codes.Error, "could not remove taint from node") + err := cerrors.PreserveError{ErrString: fmt.Sprintf("[%s,%s]", stacktrace.RootCause(err).Error(), stacktrace.RootCause(taintErr).Error())} + span.RecordError(err) + return err } return err } @@ -110,6 +123,8 @@ func PrepareNodeTaint(ctx context.Context, experimentsDetails *experimentTypes.E // remove taint from the application node if err := removeTaintFromNode(experimentsDetails, clients, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not remove taint from node") + span.RecordError(err) return stacktrace.Propagate(err, "could not remove taint from node") } @@ -134,6 +149,9 @@ func taintNode(ctx context.Context, experimentsDetails *experimentTypes.Experime // get the node details node, err := clients.KubeClient.CoreV1().Nodes().Get(context.Background(), experimentsDetails.TargetNode, v1.GetOptions{}) if err != nil { + span.SetStatus(codes.Error, "could not get node details") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{nodeName: %s}", experimentsDetails.TargetNode), Reason: err.Error()} + span.RecordError(err) return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{nodeName: %s}", experimentsDetails.TargetNode), Reason: err.Error()} } @@ -160,7 +178,10 @@ func taintNode(ctx context.Context, experimentsDetails *experimentTypes.Experime _, err := clients.KubeClient.CoreV1().Nodes().Update(context.Background(), node, v1.UpdateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{nodeName: %s}", node.Name), Reason: fmt.Sprintf("failed to add taints: %s", err.Error())} + span.SetStatus(codes.Error, "failed to add taints") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{nodeName: %s}", node.Name), Reason: fmt.Sprintf("failed to add taints: %s", err.Error())} + span.RecordError(err) + return err } } diff --git a/chaoslib/litmus/pod-autoscaler/lib/pod-autoscaler.go b/chaoslib/litmus/pod-autoscaler/lib/pod-autoscaler.go index feefad7f6..60a4ff4d2 100644 --- a/chaoslib/litmus/pod-autoscaler/lib/pod-autoscaler.go +++ b/chaoslib/litmus/pod-autoscaler/lib/pod-autoscaler.go @@ -13,6 +13,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/pod-autoscaler/types" @@ -54,6 +55,8 @@ func PreparePodAutoscaler(ctx context.Context, experimentsDetails *experimentTyp appsUnderTest, err := getDeploymentDetails(experimentsDetails) if err != nil { + span.SetStatus(codes.Error, "could not get deployment details") + span.RecordError(err) return stacktrace.Propagate(err, "could not get deployment details") } @@ -70,10 +73,14 @@ func PreparePodAutoscaler(ctx context.Context, experimentsDetails *experimentTyp go abortPodAutoScalerChaos(appsUnderTest, experimentsDetails, clients, resultDetails, eventsDetails, chaosDetails) if err = podAutoscalerChaosInDeployment(ctx, experimentsDetails, clients, appsUnderTest, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not scale deployment") + span.RecordError(err) return stacktrace.Propagate(err, "could not scale deployment") } if err = autoscalerRecoveryInDeployment(experimentsDetails, clients, appsUnderTest, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not revert scaling in deployment") + span.RecordError(err) return stacktrace.Propagate(err, "could not revert scaling in deployment") } @@ -81,6 +88,8 @@ func PreparePodAutoscaler(ctx context.Context, experimentsDetails *experimentTyp appsUnderTest, err := getStatefulsetDetails(experimentsDetails) if err != nil { + span.SetStatus(codes.Error, "could not get statefulset details") + span.RecordError(err) return stacktrace.Propagate(err, "could not get statefulset details") } @@ -97,15 +106,22 @@ func PreparePodAutoscaler(ctx context.Context, experimentsDetails *experimentTyp go abortPodAutoScalerChaos(appsUnderTest, experimentsDetails, clients, resultDetails, eventsDetails, chaosDetails) if err = podAutoscalerChaosInStatefulset(ctx, experimentsDetails, clients, appsUnderTest, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not scale statefulset") + span.RecordError(err) return stacktrace.Propagate(err, "could not scale statefulset") } if err = autoscalerRecoveryInStatefulset(experimentsDetails, clients, appsUnderTest, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not revert scaling in statefulset") + span.RecordError(err) return stacktrace.Propagate(err, "could not revert scaling in statefulset") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Target: fmt.Sprintf("{kind: %s}", experimentsDetails.AppKind), Reason: "application type is not supported"} + span.SetStatus(codes.Error, "application type is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Target: fmt.Sprintf("{kind: %s}", experimentsDetails.AppKind), Reason: "application type is not supported"} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection diff --git a/chaoslib/litmus/pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go b/chaoslib/litmus/pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go index f28be58f7..b48cb7458 100644 --- a/chaoslib/litmus/pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go +++ b/chaoslib/litmus/pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go @@ -13,6 +13,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -45,6 +46,8 @@ func PrepareCPUExecStress(ctx context.Context, experimentsDetails *experimentTyp } //Starting the CPU stress experiment if err := experimentCPU(ctx, experimentsDetails, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return stacktrace.Propagate(err, "could not stress cpu") } //Waiting for the ramp time after chaos injection @@ -111,6 +114,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "Probe failed") + span.RecordError(err) return err } } @@ -172,7 +177,9 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Warn("Chaos process OOM killed") return nil } - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("podName: %s, namespace: %s, container: %s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), Reason: fmt.Sprintf("failed to stress cpu of target pod: %s", err.Error())} + span.SetStatus(codes.Error, "failed to stress cpu of target pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("podName: %s, namespace: %s, container: %s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), Reason: fmt.Sprintf("failed to stress cpu of target pod: %s", err.Error())} + return err } case <-signChan: log.Info("[Chaos]: Revert Started") @@ -195,6 +202,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment } } if err := killStressCPUSerial(experimentsDetails, pod.Name, pod.Namespace, clients, chaosDetails); err != nil { + span.SetStatus(codes.Error, "failed to revert cpu stress") + span.RecordError(err) return stacktrace.Propagate(err, "could not revert cpu stress") } } @@ -213,6 +222,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "Probe failed") + span.RecordError(err) return err } } @@ -270,7 +281,10 @@ loop: log.Warn("Chaos process OOM killed") return nil } - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Reason: fmt.Sprintf("failed to stress cpu of target pod: %s", err.Error())} + span.SetStatus(codes.Error, "failed to stress cpu of target pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Reason: fmt.Sprintf("failed to stress cpu of target pod: %s", err.Error())} + span.RecordError(err) + return err } case <-signChan: log.Info("[Chaos]: Revert Started") diff --git a/chaoslib/litmus/pod-delete/lib/pod-delete.go b/chaoslib/litmus/pod-delete/lib/pod-delete.go index aa4fec6e8..0528ad781 100644 --- a/chaoslib/litmus/pod-delete/lib/pod-delete.go +++ b/chaoslib/litmus/pod-delete/lib/pod-delete.go @@ -21,6 +21,7 @@ import ( "github.com/palantir/stacktrace" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -46,14 +47,22 @@ func PreparePodDelete(ctx context.Context, experimentsDetails *experimentTypes.E switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err := injectChaosInSerialMode(ctx, experimentsDetails, clients, chaosDetails, eventsDetails, resultDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err := injectChaosInParallelMode(ctx, experimentsDetails, clients, chaosDetails, eventsDetails, resultDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + errReason := fmt.Sprintf("sequence '%s' is not supported", experimentsDetails.Sequence) + span.SetStatus(codes.Error, errReason) + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: errReason} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -72,6 +81,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run the probes during chaos") + span.RecordError(err) return err } } @@ -85,11 +96,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -97,6 +113,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment for _, pod := range targetPodList.Items { kind, parentName, err := workloads.GetPodOwnerTypeAndName(&pod, clients.DynamicClient) if err != nil { + span.SetStatus(codes.Error, "could not get pod owner name and kind") + span.RecordError(err) return stacktrace.Propagate(err, "could not get pod owner name and kind") } common.SetParentName(parentName, kind, pod.Namespace, chaosDetails) @@ -123,12 +141,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment err = clients.KubeClient.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) } if err != nil { + span.SetStatus(codes.Error, "could not delete the target pod") + span.RecordError(err) return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{podName: %s, namespace: %s}", pod.Name, pod.Namespace), Reason: fmt.Sprintf("failed to delete the target pod: %s", err.Error())} } switch chaosDetails.Randomness { case true: if err := common.RandomInterval(experimentsDetails.ChaosInterval); err != nil { + span.SetStatus(codes.Error, "could not get random chaos interval") + span.RecordError(err) return stacktrace.Propagate(err, "could not get random chaos interval") } default: @@ -149,6 +171,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment Namespace: parent.Namespace, } if err = status.CheckUnTerminatedPodStatusesByWorkloadName(target, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not check pod statuses by workload names") + span.RecordError(err) return stacktrace.Propagate(err, "could not check pod statuses by workload names") } } @@ -184,10 +208,15 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "please provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "please provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "please provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -195,6 +224,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime for _, pod := range targetPodList.Items { kind, parentName, err := workloads.GetPodOwnerTypeAndName(&pod, clients.DynamicClient) if err != nil { + span.SetStatus(codes.Error, "could not get pod owner name and kind") + span.RecordError(err) return stacktrace.Propagate(err, "could not get pod owner name and kind") } common.SetParentName(parentName, kind, pod.Namespace, chaosDetails) @@ -221,13 +252,18 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime err = clients.KubeClient.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) } if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{podName: %s, namespace: %s}", pod.Name, pod.Namespace), Reason: fmt.Sprintf("failed to delete the target pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not delete the target pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("{podName: %s, namespace: %s}", pod.Name, pod.Namespace), Reason: fmt.Sprintf("failed to delete the target pod: %s", err.Error())} + span.RecordError(err) + return err } } switch chaosDetails.Randomness { case true: if err := common.RandomInterval(experimentsDetails.ChaosInterval); err != nil { + span.SetStatus(codes.Error, "could not get random chaos interval") + span.RecordError(err) return stacktrace.Propagate(err, "could not get random chaos interval") } default: @@ -248,6 +284,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime Namespace: parent.Namespace, } if err = status.CheckUnTerminatedPodStatusesByWorkloadName(target, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not check pod statuses by workload names") + span.RecordError(err) return stacktrace.Propagate(err, "could not check pod statuses by workload names") } } diff --git a/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go b/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go index 04b895f9b..093b12011 100644 --- a/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go +++ b/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go @@ -4,10 +4,6 @@ import ( "bytes" "context" "fmt" - "github.com/litmuschaos/litmus-go/pkg/cerrors" - "github.com/litmuschaos/litmus-go/pkg/telemetry" - "github.com/palantir/stacktrace" - "go.opentelemetry.io/otel" "os" "os/exec" "os/signal" @@ -16,6 +12,12 @@ import ( "syscall" "time" + "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/telemetry" + "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" + clients "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/pod-dns-chaos/types" @@ -72,8 +74,12 @@ func Helper(ctx context.Context, clients clients.ClientSets) { if err := preparePodDNSChaos(&experimentsDetails, clients, &eventsDetails, &chaosDetails, &resultDetails); err != nil { // update failstep inside chaosresult if resultErr := result.UpdateFailedStepFromHelper(&resultDetails, &chaosDetails, clients, err); resultErr != nil { + span.SetStatus(codes.Error, "Helper pod failed") + span.RecordError(resultErr) log.Fatalf("helper pod failed, err: %v, resultErr: %v", err, resultErr) } + span.SetStatus(codes.Error, "Helper pod failed") + span.RecordError(err) log.Fatalf("helper pod failed, err: %v", err) } diff --git a/chaoslib/litmus/pod-dns-chaos/lib/pod-dns-chaos.go b/chaoslib/litmus/pod-dns-chaos/lib/pod-dns-chaos.go index ce608e564..70c4a99ba 100644 --- a/chaoslib/litmus/pod-dns-chaos/lib/pod-dns-chaos.go +++ b/chaoslib/litmus/pod-dns-chaos/lib/pod-dns-chaos.go @@ -11,6 +11,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/pod-dns-chaos/types" @@ -33,10 +34,15 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } targetPodList, err := common.GetPodList(experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -56,12 +62,16 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy if experimentsDetails.ChaosServiceAccount == "" { experimentsDetails.ChaosServiceAccount, err = common.GetServiceAccount(experimentsDetails.ChaosNamespace, experimentsDetails.ChaosPodName, clients) if err != nil { + span.SetStatus(codes.Error, "could not get experiment service account") + span.RecordError(err) return stacktrace.Propagate(err, "could not experiment service account") } } if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -70,14 +80,21 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } return nil @@ -91,6 +108,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -110,6 +129,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment }) runID := stringutils.GetRunID() if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, fmt.Sprintf("%s:%s:%s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), pod.Spec.NodeName, runID); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -119,6 +140,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "helper pods are not in running state") + span.RecordError(err) return errors.Errorf("helper pods are not in running state, err: %v", err) } @@ -128,12 +151,17 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) + err := common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting all the helper pod for pod-dns chaos log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -150,6 +178,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -164,6 +194,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, strings.Join(targetsPerNode, ";"), node, runID); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -174,6 +206,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -187,12 +221,17 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, containerNames...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) + err := common.HelperFailedError(err, appLabel, experimentsDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting all the helper pod for pod-dns chaos log.Info("[Cleanup]: Deleting all the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + span.SetStatus(codes.Error, "could not delete helper pod(s)") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -266,7 +305,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/pod-fio-stress/lib/pod-fio-stress.go b/chaoslib/litmus/pod-fio-stress/lib/pod-fio-stress.go index 56cecf6e1..9058ebfe9 100644 --- a/chaoslib/litmus/pod-fio-stress/lib/pod-fio-stress.go +++ b/chaoslib/litmus/pod-fio-stress/lib/pod-fio-stress.go @@ -14,6 +14,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -39,6 +40,8 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper } //Starting the Fio stress experiment if err := experimentExecution(ctx, experimentsDetails, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return stacktrace.Propagate(err, "could not inject chaos") } //Waiting for the ramp time after chaos injection @@ -118,6 +121,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "Probe failed") + span.RecordError(err) return err } } @@ -164,7 +169,10 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Warn("Chaos process OOM killed") return nil } - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("podName: %s, namespace: %s, container: %s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), Reason: fmt.Sprintf("failed to stress cpu of target pod: %s", err.Error())} + span.SetStatus(codes.Error, "Chaos injection failed") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("podName: %s, namespace: %s, container: %s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), Reason: fmt.Sprintf("failed to stress storage of target pod: %s", err.Error())} + span.RecordError(err) + return err } case <-signChan: log.Info("[Chaos]: Revert Started") @@ -186,6 +194,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment } } if err := killStressSerial(experimentsDetails.TargetContainer, pod.Name, pod.Namespace, experimentsDetails.ChaosKillCmd, clients); err != nil { + span.SetStatus(codes.Error, "Chaos revert failed") + span.RecordError(err) return stacktrace.Propagate(err, "could not revert chaos") } } @@ -202,6 +212,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "Probe failed") + span.RecordError(err) return err } } @@ -248,7 +260,10 @@ loop: log.Warn("Chaos process OOM killed") return nil } - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Reason: fmt.Sprintf("failed to injcet chaos: %s", err.Error())} + span.SetStatus(codes.Error, "Chaos injection failed") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Reason: fmt.Sprintf("failed to inject chaos: %s", err.Error())} + span.RecordError(err) + return err } case <-signChan: log.Info("[Chaos]: Revert Started") @@ -269,6 +284,8 @@ loop: } } if err := killStressParallel(experimentsDetails.TargetContainer, targetPodList, experimentsDetails.ChaosKillCmd, clients); err != nil { + span.SetStatus(codes.Error, "Chaos revert failed") + span.RecordError(err) return stacktrace.Propagate(err, "could revert chaos") } diff --git a/chaoslib/litmus/pod-memory-hog-exec/lib/pod-memory-hog-exec.go b/chaoslib/litmus/pod-memory-hog-exec/lib/pod-memory-hog-exec.go index cbd9c0f4f..46f208bcb 100644 --- a/chaoslib/litmus/pod-memory-hog-exec/lib/pod-memory-hog-exec.go +++ b/chaoslib/litmus/pod-memory-hog-exec/lib/pod-memory-hog-exec.go @@ -14,6 +14,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" "github.com/litmuschaos/litmus-go/pkg/events" @@ -47,6 +48,8 @@ func PrepareMemoryExecStress(ctx context.Context, experimentsDetails *experiment } //Starting the Memory stress experiment if err := experimentMemory(ctx, experimentsDetails, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return stacktrace.Propagate(err, "could not stress memory") } //Waiting for the ramp time after chaos injection @@ -121,6 +124,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "Probe failed") + span.RecordError(err) return err } } @@ -179,7 +184,9 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Warn("Chaos process OOM killed") return nil } - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("podName: %s, namespace: %s, container: %s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), Reason: fmt.Sprintf("failed to stress memory of target pod: %s", err.Error())} + span.SetStatus(codes.Error, "Chaos injection failed") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Target: fmt.Sprintf("podName: %s, namespace: %s, container: %s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), Reason: fmt.Sprintf("failed to stress memory of target pod: %s", err.Error())} + return err } case <-signChan: log.Info("[Chaos]: Revert Started") @@ -202,6 +209,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment } } if err := killStressMemorySerial(experimentsDetails.TargetContainer, pod.Name, pod.Namespace, experimentsDetails.ChaosKillCmd, clients, chaosDetails); err != nil { + span.SetStatus(codes.Error, "could not revert memory stress") + span.RecordError(err) return stacktrace.Propagate(err, "could not revert memory stress") } } @@ -219,6 +228,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "Probe failed") + span.RecordError(err) return err } } @@ -276,7 +287,10 @@ loop: log.Warn("Chaos process OOM killed") return nil } - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Reason: fmt.Sprintf("failed to stress memory of target pod: %s", err.Error())} + span.SetStatus(codes.Error, "failed to stress memory of target pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Reason: fmt.Sprintf("failed to stress memory of target pod: %s", err.Error())} + span.RecordError(err) + return err } case <-signChan: log.Info("[Chaos]: Revert Started") diff --git a/chaoslib/litmus/pod-network-partition/lib/pod-network-partition.go b/chaoslib/litmus/pod-network-partition/lib/pod-network-partition.go index ac1b37717..cf6238ef5 100644 --- a/chaoslib/litmus/pod-network-partition/lib/pod-network-partition.go +++ b/chaoslib/litmus/pod-network-partition/lib/pod-network-partition.go @@ -13,6 +13,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/pod-network-partition/types" @@ -50,12 +51,17 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy // validate the appLabels if chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide the appLabel"} + span.SetStatus(codes.Error, "appDetail is empty") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide the appLabel"} + span.RecordError(err) + return err } // Get the target pod details for the chaos execution targetPodList, err := common.GetPodList("", 100, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -77,6 +83,8 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy // collect all the data for the network policy np := initialize() if err := np.getNetworkPolicyDetails(experimentsDetails); err != nil { + span.SetStatus(codes.Error, "could not get network policy details") + span.RecordError(err) return stacktrace.Propagate(err, "could not get network policy details") } @@ -96,6 +104,8 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -107,6 +117,8 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy default: // creating the network policy to block the traffic if err := createNetworkPolicy(ctx, experimentsDetails, clients, np, runID); err != nil { + span.SetStatus(codes.Error, "could not create network policy") + span.RecordError(err) return stacktrace.Propagate(err, "could not create network policy") } // updating chaos status to injected for the target pods @@ -117,6 +129,8 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy // verify the presence of network policy inside cluster if err := checkExistenceOfPolicy(experimentsDetails, clients, experimentsDetails.Timeout, experimentsDetails.Delay, runID); err != nil { + span.SetStatus(codes.Error, "could not check existence of network policy") + span.RecordError(err) return stacktrace.Propagate(err, "could not check existence of network policy") } @@ -125,6 +139,8 @@ func PrepareAndInjectChaos(ctx context.Context, experimentsDetails *experimentTy // deleting the network policy after chaos duration over if err := deleteNetworkPolicy(experimentsDetails, clients, &targetPodList, chaosDetails, experimentsDetails.Timeout, experimentsDetails.Delay, runID); err != nil { + span.SetStatus(codes.Error, "could not delete network policy") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete network policy") } @@ -170,7 +186,10 @@ func createNetworkPolicy(ctx context.Context, experimentsDetails *experimentType _, err := clients.KubeClient.NetworkingV1().NetworkPolicies(experimentsDetails.AppNS).Create(context.Background(), np, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Reason: fmt.Sprintf("failed to create network policy: %s", err.Error())} + span.SetStatus(codes.Error, "could not create network policy") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Reason: fmt.Sprintf("failed to create network policy: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/redfish-node-restart/lib/redfish-node-restart.go b/chaoslib/litmus/redfish-node-restart/lib/redfish-node-restart.go index 61fa9c44d..4bda6144f 100644 --- a/chaoslib/litmus/redfish-node-restart/lib/redfish-node-restart.go +++ b/chaoslib/litmus/redfish-node-restart/lib/redfish-node-restart.go @@ -16,6 +16,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) // injectChaos initiates node restart chaos on the target node @@ -64,6 +65,8 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper } //Starting the Redfish node restart experiment if err := experimentExecution(ctx, experimentsDetails, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return err } common.SetTargets(experimentsDetails.IPMIIP, "targeted", "node", chaosDetails) diff --git a/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go b/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go index 6980608a6..76e6659f8 100644 --- a/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go +++ b/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go @@ -16,6 +16,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" corev1 "k8s.io/api/core/v1" "github.com/litmuschaos/litmus-go/pkg/clients" @@ -76,14 +77,21 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err := injectChaosInSerialMode(ctx, experimentsDetails, clients, chaosDetails, eventsDetails, resultDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err := injectChaosInParallelMode(ctx, experimentsDetails, clients, chaosDetails, eventsDetails, resultDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } // Waiting for the ramp time after chaos injection @@ -255,16 +263,22 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment if err := setChaosMonkeyWatchers(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.ChaosMonkeyWatchers, pod); err != nil { log.Errorf("[Chaos]: Failed to set watchers, err: %v ", err) + span.SetStatus(codes.Error, "failed to set watchers") + span.RecordError(err) return err } if err := startAssault(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.ChaosMonkeyAssault, pod); err != nil { log.Errorf("[Chaos]: Failed to set assault, err: %v ", err) + span.SetStatus(codes.Error, "failed to set assault") + span.RecordError(err) return err } if err := enableChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, pod); err != nil { log.Errorf("[Chaos]: Failed to enable chaos, err: %v ", err) + span.SetStatus(codes.Error, "failed to enable chaos") + span.RecordError(err) return err } common.SetTargets(pod.Name, "injected", "pod", chaosDetails) @@ -296,6 +310,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment } if err := disableChaosMonkey(ctx, experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, pod); err != nil { + span.SetStatus(codes.Error, "failed to disable chaos monkey") + span.RecordError(err) return err } @@ -314,6 +330,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -345,16 +363,22 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime if err := setChaosMonkeyWatchers(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.ChaosMonkeyWatchers, pod); err != nil { log.Errorf("[Chaos]: Failed to set watchers, err: %v", err) + span.SetStatus(codes.Error, "failed to set watchers") + span.RecordError(err) return err } if err := startAssault(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.ChaosMonkeyAssault, pod); err != nil { log.Errorf("[Chaos]: Failed to set assault, err: %v", err) + span.SetStatus(codes.Error, "failed to set assault") + span.RecordError(err) return err } if err := enableChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, pod); err != nil { log.Errorf("[Chaos]: Failed to enable chaos, err: %v", err) + span.SetStatus(codes.Error, "failed to enable chaos") + span.RecordError(err) return err } common.SetTargets(pod.Name, "injected", "pod", chaosDetails) @@ -397,7 +421,10 @@ loop: } if len(errorList) != 0 { - return cerrors.PreserveError{ErrString: fmt.Sprintf("error in disabling chaos monkey, [%s]", strings.Join(errorList, ","))} + span.SetStatus(codes.Error, "failed to disable chaos monkey") + err := cerrors.PreserveError{ErrString: fmt.Sprintf("error in disabling chaos monkey, [%s]", strings.Join(errorList, ","))} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/stress-chaos/helper/stress-helper.go b/chaoslib/litmus/stress-chaos/helper/stress-helper.go index dd310f07a..b3660f006 100644 --- a/chaoslib/litmus/stress-chaos/helper/stress-helper.go +++ b/chaoslib/litmus/stress-chaos/helper/stress-helper.go @@ -5,10 +5,13 @@ import ( "bytes" "context" "fmt" + "github.com/litmuschaos/litmus-go/pkg/cerrors" "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" + "io" "os" "os/exec" @@ -90,8 +93,12 @@ func Helper(ctx context.Context, clients clients.ClientSets) { if err := prepareStressChaos(&experimentsDetails, clients, &eventsDetails, &chaosDetails, &resultDetails); err != nil { // update failstep inside chaosresult if resultErr := result.UpdateFailedStepFromHelper(&resultDetails, &chaosDetails, clients, err); resultErr != nil { + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(resultErr) log.Fatalf("helper pod failed, err: %v, resultErr: %v", err, resultErr) } + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) log.Fatalf("helper pod failed, err: %v", err) } } diff --git a/chaoslib/litmus/stress-chaos/lib/stress-chaos.go b/chaoslib/litmus/stress-chaos/lib/stress-chaos.go index bfa6c878c..d37d5f7c3 100644 --- a/chaoslib/litmus/stress-chaos/lib/stress-chaos.go +++ b/chaoslib/litmus/stress-chaos/lib/stress-chaos.go @@ -11,6 +11,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" "github.com/litmuschaos/litmus-go/pkg/clients" experimentTypes "github.com/litmuschaos/litmus-go/pkg/generic/stress-chaos/types" @@ -63,10 +64,15 @@ func PrepareAndInjectStressChaos(ctx context.Context, experimentsDetails *experi // Get the target pod details for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail == nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.SetStatus(codes.Error, "provide one of the appLabel or TARGET_PODS") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "provide one of the appLabel or TARGET_PODS"} + span.RecordError(err) + return err } targetPodList, err := common.GetTargetPods(experimentsDetails.NodeLabel, experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails) if err != nil { + span.SetStatus(codes.Error, "could not get target pods") + span.RecordError(err) return stacktrace.Propagate(err, "could not get target pods") } @@ -80,12 +86,16 @@ func PrepareAndInjectStressChaos(ctx context.Context, experimentsDetails *experi if experimentsDetails.ChaosServiceAccount == "" { experimentsDetails.ChaosServiceAccount, err = common.GetServiceAccount(experimentsDetails.ChaosNamespace, experimentsDetails.ChaosPodName, clients) if err != nil { + span.SetStatus(codes.Error, "could not get experiment service account") + span.RecordError(err) return stacktrace.Propagate(err, "could not experiment service account") } } if experimentsDetails.EngineName != "" { if err := common.SetHelperData(chaosDetails, experimentsDetails.SetHelperData, clients); err != nil { + span.SetStatus(codes.Error, "could not set helper data") + span.RecordError(err) return stacktrace.Propagate(err, "could not set helper data") } } @@ -94,14 +104,21 @@ func PrepareAndInjectStressChaos(ctx context.Context, experimentsDetails *experi switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in serial mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetPodList, clients, chaosDetails, resultDetails, eventsDetails); err != nil { + span.SetStatus(codes.Error, "could not run chaos in parallel mode") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } return nil @@ -115,6 +132,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -134,6 +153,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment }) runID := stringutils.GetRunID() if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, fmt.Sprintf("%s:%s:%s", pod.Name, pod.Namespace, experimentsDetails.TargetContainer), pod.Spec.NodeName, runID); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } @@ -143,6 +164,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -152,13 +175,18 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) + err := common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting all the helper pod for stress chaos log.Info("[Cleanup]: Deleting the helper pod") err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients) if err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } } @@ -174,6 +202,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "probe failed") + span.RecordError(err) return err } } @@ -188,6 +218,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime } if err := createHelperPod(ctx, experimentsDetails, clients, chaosDetails, strings.Join(targetsPerNode, ";"), node, runID); err != nil { + span.SetStatus(codes.Error, "could not create helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not create helper pod") } } @@ -198,6 +230,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime log.Info("[Status]: Checking the status of the helper pods") if err := status.CheckHelperStatus(experimentsDetails.ChaosNamespace, appLabel, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) + span.SetStatus(codes.Error, "could not check helper status") + span.RecordError(err) return stacktrace.Propagate(err, "could not check helper status") } @@ -207,13 +241,18 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime podStatus, err := status.WaitForCompletion(experimentsDetails.ChaosNamespace, appLabel, clients, experimentsDetails.ChaosDuration+experimentsDetails.Timeout, common.GetContainerNames(chaosDetails)...) if err != nil || podStatus == "Failed" { common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - return common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) + err := common.HelperFailedError(err, appLabel, chaosDetails.ChaosNamespace, true) + span.SetStatus(codes.Error, "helper pod failed") + span.RecordError(err) + return err } //Deleting all the helper pod for stress chaos log.Info("[Cleanup]: Deleting all the helper pod") err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients) if err != nil { + span.SetStatus(codes.Error, "could not delete helper pod") + span.RecordError(err) return stacktrace.Propagate(err, "could not delete helper pod(s)") } @@ -307,7 +346,10 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.SetStatus(codes.Error, "could not create helper pod") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + span.RecordError(err) + return err } return nil } diff --git a/chaoslib/litmus/vm-poweroff/lib/vm-poweroff.go b/chaoslib/litmus/vm-poweroff/lib/vm-poweroff.go index e828be326..ee8203e52 100644 --- a/chaoslib/litmus/vm-poweroff/lib/vm-poweroff.go +++ b/chaoslib/litmus/vm-poweroff/lib/vm-poweroff.go @@ -21,6 +21,7 @@ import ( experimentTypes "github.com/litmuschaos/litmus-go/pkg/vmware/vm-poweroff/types" "github.com/palantir/stacktrace" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" ) var inject, abort chan os.Signal @@ -54,14 +55,21 @@ func InjectVMPowerOffChaos(ctx context.Context, experimentsDetails *experimentTy switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err := injectChaosInSerialMode(ctx, experimentsDetails, vmIdList, cookie, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err := injectChaosInParallelMode(ctx, experimentsDetails, vmIdList, cookie, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: - return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.SetStatus(codes.Error, "sequence is not supported") + err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + span.RecordError(err) + return err } //Waiting for the ramp time after chaos injection @@ -102,6 +110,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Stopping the VM log.Infof("[Chaos]: Stopping %s VM", vmId) if err := vmware.StopVM(experimentsDetails.VcenterServer, vmId, cookie); err != nil { + span.SetStatus(codes.Error, "failed to stop vm") + span.RecordError(err) return stacktrace.Propagate(err, fmt.Sprintf("failed to stop %s vm", vmId)) } @@ -110,6 +120,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Wait for the VM to completely stop log.Infof("[Wait]: Wait for VM '%s' to get in POWERED_OFF state", vmId) if err := vmware.WaitForVMStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.VcenterServer, vmId, cookie); err != nil { + span.SetStatus(codes.Error, "VM shutdown failed") + span.RecordError(err) return stacktrace.Propagate(err, "VM shutdown failed") } @@ -117,6 +129,8 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //The OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -128,12 +142,16 @@ func injectChaosInSerialMode(ctx context.Context, experimentsDetails *experiment //Starting the VM log.Infof("[Chaos]: Starting back %s VM", vmId) if err := vmware.StartVM(experimentsDetails.VcenterServer, vmId, cookie); err != nil { + span.SetStatus(codes.Error, "failed to start back vm") + span.RecordError(err) return stacktrace.Propagate(err, "failed to start back vm") } //Wait for the VM to completely start log.Infof("[Wait]: Wait for VM '%s' to get in POWERED_ON state", vmId) if err := vmware.WaitForVMStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.VcenterServer, vmId, cookie); err != nil { + span.SetStatus(codes.Error, "vm failed to start") + span.RecordError(err) return stacktrace.Propagate(err, "vm failed to start") } @@ -176,6 +194,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Stopping the VM log.Infof("[Chaos]: Stopping %s VM", vmId) if err := vmware.StopVM(experimentsDetails.VcenterServer, vmId, cookie); err != nil { + span.SetStatus(codes.Error, "failed to stop vm") + span.RecordError(err) return stacktrace.Propagate(err, fmt.Sprintf("failed to stop %s vm", vmId)) } @@ -187,6 +207,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for the VM to completely stop log.Infof("[Wait]: Wait for VM '%s' to get in POWERED_OFF state", vmId) if err := vmware.WaitForVMStop(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.VcenterServer, vmId, cookie); err != nil { + span.SetStatus(codes.Error, "vm failed to shutdown") + span.RecordError(err) return stacktrace.Propagate(err, "vm failed to shutdown") } } @@ -194,6 +216,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Running the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + span.SetStatus(codes.Error, "failed to run probes") + span.RecordError(err) return stacktrace.Propagate(err, "failed to run probes") } } @@ -207,6 +231,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Starting the VM log.Infof("[Chaos]: Starting back %s VM", vmId) if err := vmware.StartVM(experimentsDetails.VcenterServer, vmId, cookie); err != nil { + span.SetStatus(codes.Error, "failed to start back vm") + span.RecordError(err) return stacktrace.Propagate(err, fmt.Sprintf("failed to start back %s vm", vmId)) } } @@ -216,6 +242,8 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime //Wait for the VM to completely start log.Infof("[Wait]: Wait for VM '%s' to get in POWERED_ON state", vmId) if err := vmware.WaitForVMStart(experimentsDetails.Timeout, experimentsDetails.Delay, experimentsDetails.VcenterServer, vmId, cookie); err != nil { + span.SetStatus(codes.Error, "vm failed to successfully start") + span.RecordError(err) return stacktrace.Propagate(err, "vm failed to successfully start") } } diff --git a/contribute/developer-guide/templates/chaoslib_exec.tmpl b/contribute/developer-guide/templates/chaoslib_exec.tmpl index a5a6d5704..dc05d99ec 100644 --- a/contribute/developer-guide/templates/chaoslib_exec.tmpl +++ b/contribute/developer-guide/templates/chaoslib_exec.tmpl @@ -133,6 +133,9 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper } //Starting the CPU stress experiment if err := experimentExecution(ctx, experimentsDetails, clients, resultDetails, eventsDetails, chaosDetails);err != nil { + // @TODO: setup tracing + // span.SetStatus(codes.Error, "could not execute experiment") + // span.RecordError(err) return stacktrace.Propagate(err, "could not execute experiment") } //Waiting for the ramp time after chaos injection diff --git a/contribute/developer-guide/templates/chaoslib_helper.tmpl b/contribute/developer-guide/templates/chaoslib_helper.tmpl index 92b091f07..940b2e0fe 100644 --- a/contribute/developer-guide/templates/chaoslib_helper.tmpl +++ b/contribute/developer-guide/templates/chaoslib_helper.tmpl @@ -136,6 +136,9 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper } //Starting the CPU stress experiment if err := experimentExecution(ctx, experimentsDetails, clients, resultDetails, eventsDetails, chaosDetails);err != nil { + // @TODO: setup tracing + // span.SetStatus(codes.Error, "could not execute chaoss") + // span.RecordError(err) return stacktrace.Propagate(err, "could not execute chaos") } //Waiting for the ramp time after chaos injection @@ -185,6 +188,11 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex _, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.ChaosNamespace).Create(context.Background(), helperPod, v1.CreateOptions{}) if err != nil { + // @TODO: setup tracing + // span.SetStatus(codes.Error, "could not create helper pod") + // err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} + // span.RecordError(err) + // return err return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("unable to create helper pod: %s", err.Error())} } return nil diff --git a/contribute/developer-guide/templates/chaoslib_non-k8s.tmpl b/contribute/developer-guide/templates/chaoslib_non-k8s.tmpl index 717d39438..09edfc824 100644 --- a/contribute/developer-guide/templates/chaoslib_non-k8s.tmpl +++ b/contribute/developer-guide/templates/chaoslib_non-k8s.tmpl @@ -49,6 +49,11 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper // THIS TEMPLATE CONTAINS THE SELECTION BY ID FOR TAG YOU NEED TO ADD/CALL A FUNCTION HERE targetIDList := strings.Split(experimentsDetails.TargetID, ",") if experimentsDetails.TargetID == "" { + // @TODO: setup tracing + // span.SetStatus(codes.Error, "no target id found") + // err := cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no target id found"} + // span.RecordError(err) + // return err return cerrors.Error{ErrorCode: cerrors.ErrorTypeTargetSelection, Reason: "no target id found"} } @@ -58,13 +63,24 @@ func PrepareChaos(ctx context.Context, experimentsDetails *experimentTypes.Exper switch strings.ToLower(experimentsDetails.Sequence) { case "serial": if err = injectChaosInSerialMode(ctx, experimentsDetails, targetIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + // @TODO: setup tracing + // span.SetStatus(codes.Error, "could not run chaos in serial mode") + // span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in serial mode") } case "parallel": if err = injectChaosInParallelMode(ctx, experimentsDetails, targetIDList, clients, resultDetails, eventsDetails, chaosDetails); err != nil { + // @TODO: setup tracing + // span.SetStatus(codes.Error, "could not run chaos in parallel mode") + // span.RecordError(err) return stacktrace.Propagate(err, "could not run chaos in parallel mode") } default: + // @TODO: setup tracing + // span.SetStatus(codes.Error, "sequence is not supported") + // err := cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} + // span.RecordError(err) + // return err return cerrors.Error{ErrorCode: cerrors.ErrorTypeGeneric, Reason: fmt.Sprintf("'%s' sequence is not supported", experimentsDetails.Sequence)} } @@ -119,6 +135,9 @@ func injectChaosInSerialMode(ctx context.Contxt, experimentsDetails *experimentT // The OnChaos probes execution will start in the first iteration and keep running for the entire chaos duration if len(resultDetails.ProbeDetails) != 0 && i == 0 { if err = probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + // @TODO: setup tracing + // span.SetStatus(codes.Error, "could not run probes") + // span.RecordError(err) return err } } @@ -188,6 +207,9 @@ func injectChaosInParallelMode(ctx context.Context, experimentsDetails *experime // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(ctx, chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + // @TODO: setup tracing + // span.SetStatus(codes.Error, "could not run probes") + // span.RecordError(err) return err } } diff --git a/experiments/aws-ssm/aws-ssm-chaos-by-id/experiment/aws-ssm-chaos-by-id.go b/experiments/aws-ssm/aws-ssm-chaos-by-id/experiment/aws-ssm-chaos-by-id.go index 4edeaf6d3..8c6399792 100644 --- a/experiments/aws-ssm/aws-ssm-chaos-by-id/experiment/aws-ssm-chaos-by-id.go +++ b/experiments/aws-ssm/aws-ssm-chaos-by-id/experiment/aws-ssm-chaos-by-id.go @@ -18,10 +18,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // AWSSSMChaosByID inject the ssm chaos on ec2 instance func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -42,6 +45,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -51,6 +56,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -89,6 +96,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -104,6 +113,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { if err := ssm.CheckInstanceInformation(&experimentsDetails); err != nil { log.Errorf("Failed perform ssm api calls: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to perform ssm api calls") + span.RecordError(err) return } @@ -112,6 +123,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { if err := ec2.InstanceStatusCheckByID(experimentsDetails.EC2InstanceID, experimentsDetails.Region); err != nil { log.Errorf("Failed to get the ec2 instance status: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the ec2 instance status") + span.RecordError(err) return } log.Info("[Status]: EC2 instance is in running state") @@ -129,6 +142,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to delete ssm doc: %v", err) } } + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -142,6 +157,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { if err := ec2.InstanceStatusCheckByID(experimentsDetails.EC2InstanceID, experimentsDetails.Region); err != nil { log.Errorf("Failed to get the ec2 instance status: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the ec2 instance status") + span.RecordError(err) return } log.Info("[Status]: EC2 instance is in running state (post chaos)") @@ -161,6 +178,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -177,6 +196,8 @@ func AWSSSMChaosByID(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/aws-ssm/aws-ssm-chaos-by-tag/experiment/aws-ssm-chaos-by-tag.go b/experiments/aws-ssm/aws-ssm-chaos-by-tag/experiment/aws-ssm-chaos-by-tag.go index 1799ac322..29c7fd4e0 100644 --- a/experiments/aws-ssm/aws-ssm-chaos-by-tag/experiment/aws-ssm-chaos-by-tag.go +++ b/experiments/aws-ssm/aws-ssm-chaos-by-tag/experiment/aws-ssm-chaos-by-tag.go @@ -18,10 +18,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // AWSSSMChaosByTag inject the ssm chaos on ec2 instance func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -42,6 +45,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -51,6 +56,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { if err := ssm.CheckInstanceInformation(&experimentsDetails); err != nil { log.Errorf("Target instance status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target instance status check failed") + span.RecordError(err) return } @@ -97,6 +106,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -120,6 +131,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to delete ssm document: %v", err) } } + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -133,6 +146,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { if err := ec2.InstanceStatusCheck(experimentsDetails.TargetInstanceIDList, experimentsDetails.Region); err != nil { log.Errorf("Failed to get the ec2 instance status: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the ec2 instance status") + span.RecordError(err) return } log.Info("[Status]: EC2 instance is in running state (post chaos)") @@ -152,6 +167,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -167,6 +184,8 @@ func AWSSSMChaosByTag(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go b/experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go index 85507b7ad..4a5a785a2 100644 --- a/experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go +++ b/experiments/azure/azure-disk-loss/experiment/azure-disk-loss.go @@ -18,10 +18,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // AzureDiskLoss contains steps to inject chaos func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var err error experimentsDetails := experimentTypes.ExperimentDetails{} @@ -43,6 +46,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -52,6 +57,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { if experimentsDetails.SubscriptionID, err = azureCommon.GetSubscriptionID(); err != nil { log.Errorf("fail to get the subscription id: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "fail to get the subscription id") + span.RecordError(err) return } @@ -89,6 +98,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { if err = azureStatus.CheckVirtualDiskWithInstance(experimentsDetails.SubscriptionID, experimentsDetails.VirtualDiskNames, experimentsDetails.ResourceGroup); err != nil { log.Errorf("Virtual disk status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Virtual disk status check failed") + span.RecordError(err) return } } @@ -108,6 +119,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -124,6 +137,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Chaos injection failed: %v", err) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -138,6 +153,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { if err = azureStatus.CheckVirtualDiskWithInstance(experimentsDetails.SubscriptionID, experimentsDetails.VirtualDiskNames, experimentsDetails.ResourceGroup); err != nil { log.Errorf("Virtual disk status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Virtual disk status check failed") + span.RecordError(err) return } } @@ -156,6 +173,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -172,6 +191,8 @@ func AzureDiskLoss(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to Update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/azure/instance-stop/experiment/azure-instance-stop.go b/experiments/azure/instance-stop/experiment/azure-instance-stop.go index 74b3424c8..cef126bb3 100644 --- a/experiments/azure/instance-stop/experiment/azure-instance-stop.go +++ b/experiments/azure/instance-stop/experiment/azure-instance-stop.go @@ -11,6 +11,8 @@ import ( "github.com/litmuschaos/litmus-go/pkg/clients" azureCommon "github.com/litmuschaos/litmus-go/pkg/cloud/azure/common" azureStatus "github.com/litmuschaos/litmus-go/pkg/cloud/azure/instance" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "github.com/litmuschaos/litmus-go/pkg/events" "github.com/litmuschaos/litmus-go/pkg/log" @@ -23,6 +25,7 @@ import ( // AzureInstanceStop inject the azure instance stop chaos func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var err error experimentsDetails := experimentTypes.ExperimentDetails{} @@ -44,6 +47,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) } } @@ -53,6 +58,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -74,6 +81,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { if experimentsDetails.SubscriptionID, err = azureCommon.GetSubscriptionID(); err != nil { log.Errorf("Failed to get the subscription id: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "fail to get the subscription id") + span.RecordError(err) return } @@ -100,6 +109,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -116,6 +127,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { if err = azureStatus.InstanceStatusCheckByName(experimentsDetails.AzureInstanceNames, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup); err != nil { log.Errorf("Azure instance status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Azure instance status check failed") + span.RecordError(err) return } log.Info("[Status]: Azure instance(s) is in running state (pre-chaos)") @@ -126,6 +139,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareAzureStop(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -139,6 +154,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { if err = azureStatus.InstanceStatusCheckByName(experimentsDetails.AzureInstanceNames, experimentsDetails.ScaleSet, experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup); err != nil { log.Errorf("Azure instance status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Azure instance status check failed") + span.RecordError(err) return } log.Info("[Status]: Azure instance is in running state (post chaos)") @@ -159,6 +176,8 @@ func AzureInstanceStop(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" diff --git a/experiments/baremetal/redfish-node-restart/experiment/redfish-node-restart.go b/experiments/baremetal/redfish-node-restart/experiment/redfish-node-restart.go index 7b9ae654d..a480b9c49 100644 --- a/experiments/baremetal/redfish-node-restart/experiment/redfish-node-restart.go +++ b/experiments/baremetal/redfish-node-restart/experiment/redfish-node-restart.go @@ -18,10 +18,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeRestart contains steps to inject chaos func NodeRestart(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -42,6 +45,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -51,6 +56,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Create the Chaos Result") + span.RecordError(err) return } @@ -77,6 +84,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -87,6 +96,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -97,11 +108,15 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("[Verification]: Unable to get node power status(pre-chaos). Error: %v", err) + span.SetStatus(codes.Error, "Unable to get node power status") + span.RecordError(err) return } if nodeStatus != "On" { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("[Verification]: Node is not in running state(pre-chaos)") + span.SetStatus(codes.Error, "Node is not in running state") + span.RecordError(err) return } log.Info("[Verification]: Node is in running state(pre-chaos)") @@ -119,6 +134,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Running, Probes: Successful" @@ -133,6 +150,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Chaos injection failed, err: %v", err) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -147,6 +166,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err = status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -157,6 +178,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -167,11 +190,15 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("[Verification]: Unable to get node power status. Error: %v ", err) + span.SetStatus(codes.Error, "Unable to get node power status") + span.RecordError(err) return } if nodeStatus != "On" { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("[Verification]: Node is not in running state(post-chaos)") + span.SetStatus(codes.Error, "Node is not in running state") + span.RecordError(err) return } log.Info("[Verification]: Node is in running state(post-chaos)") @@ -188,6 +215,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Running, Probes: Successful" @@ -202,6 +231,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/cassandra/pod-delete/experiment/pod-delete.go b/experiments/cassandra/pod-delete/experiment/pod-delete.go index 6e7417d48..81bcf07a4 100644 --- a/experiments/cassandra/pod-delete/experiment/pod-delete.go +++ b/experiments/cassandra/pod-delete/experiment/pod-delete.go @@ -18,10 +18,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // CasssandraPodDelete inject the cassandra-pod-delete chaos func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var err error var ResourceVersionBefore string @@ -44,6 +47,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -53,6 +58,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Create the Chaos Result") + span.RecordError(err) return } @@ -84,6 +91,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -92,6 +101,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { if err = cassandra.NodeToolStatusCheck(&experimentsDetails, clients); err != nil { log.Errorf("[Status]: Chaos node tool status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos node tool status check failed") + span.RecordError(err) return } } @@ -109,6 +120,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -124,6 +137,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("[Liveness]: Cassandra liveness check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Cassandra liveness check failed") + span.RecordError(err) return } log.Info("[Confirmation]: The cassandra application liveness pod created successfully") @@ -136,6 +151,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PreparePodDelete(ctx, experimentsDetails.ChaoslibDetail, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -152,6 +169,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -160,6 +179,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { if err = cassandra.NodeToolStatusCheck(&experimentsDetails, clients); err != nil { log.Errorf("[Status]: Chaos node tool status check is failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos node tool status check failed") + span.RecordError(err) return } } @@ -176,6 +197,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -193,11 +216,15 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { if err = status.CheckApplicationStatusesByLabels(experimentsDetails.ChaoslibDetail.AppNS, "name=cassandra-liveness-deploy-"+experimentsDetails.RunID, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients); err != nil { log.Errorf("Liveness status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Liveness status check failed") + span.RecordError(err) return } if err = cassandra.LivenessCleanup(&experimentsDetails, clients, ResourceVersionBefore); err != nil { log.Errorf("Liveness cleanup failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Liveness cleanup failed") + span.RecordError(err) return } } @@ -205,6 +232,8 @@ func CasssandraPodDelete(ctx context.Context, clients clients.ClientSets) { log.Info("[The End]: Updating the chaos result of cassandra pod delete experiment (EOT)") if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/gcp/gcp-vm-disk-loss-by-label/experiment/gcp-vm-disk-loss-by-label.go b/experiments/gcp/gcp-vm-disk-loss-by-label/experiment/gcp-vm-disk-loss-by-label.go index 644aea20f..3c295e113 100644 --- a/experiments/gcp/gcp-vm-disk-loss-by-label/experiment/gcp-vm-disk-loss-by-label.go +++ b/experiments/gcp/gcp-vm-disk-loss-by-label/experiment/gcp-vm-disk-loss-by-label.go @@ -17,11 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "google.golang.org/api/compute/v1" ) // GCPVMDiskLossByLabel contains steps to inject chaos func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var ( computeService *compute.Service @@ -47,6 +50,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -56,6 +61,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -90,6 +97,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -104,6 +113,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Failed to obtain a gcp compute service, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to obtain a gcp compute service") + span.RecordError(err) return } @@ -111,6 +122,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { if err := gcp.SetTargetDiskVolumes(computeService, &experimentsDetails); err != nil { log.Errorf("Failed to get the target gcp disk volumes, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the target gcp disk volumes") + span.RecordError(err) return } @@ -121,6 +134,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareDiskVolumeLossByLabel(ctx, computeService, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -135,6 +150,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { if err != nil || instanceName == "" { log.Errorf("Failed to verify disk volume attachment status, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to verify disk volume attachment status") + span.RecordError(err) return } } @@ -153,6 +170,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -167,6 +186,8 @@ func GCPVMDiskLossByLabel(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/gcp/gcp-vm-disk-loss/experiment/gcp-vm-disk-loss.go b/experiments/gcp/gcp-vm-disk-loss/experiment/gcp-vm-disk-loss.go index be1adbc8a..870edb2d7 100644 --- a/experiments/gcp/gcp-vm-disk-loss/experiment/gcp-vm-disk-loss.go +++ b/experiments/gcp/gcp-vm-disk-loss/experiment/gcp-vm-disk-loss.go @@ -17,11 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "google.golang.org/api/compute/v1" ) // VMDiskLoss injects the disk volume loss chaos func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var ( computeService *compute.Service @@ -47,6 +50,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -56,6 +61,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the Chaos Result") + span.RecordError(err) return } @@ -90,6 +97,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -104,6 +113,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Failed to obtain a gcp compute service, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to obtain a gcp compute service") + span.RecordError(err) return } @@ -112,6 +123,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { if err := gcp.DiskVolumeStateCheck(computeService, &experimentsDetails); err != nil { log.Errorf("Volume status check failed pre chaos, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Volume status check failed pre chaos") + span.RecordError(err) return } log.Info("[Status]: Disk volumes are attached to the VM instances (pre-chaos)") @@ -121,6 +134,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { if err := gcp.SetTargetDiskInstanceNames(computeService, &experimentsDetails); err != nil { log.Errorf("Failed to fetch the disk instance names, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to fetch the disk instance names") + span.RecordError(err) return } @@ -129,6 +144,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareDiskVolumeLoss(ctx, computeService, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -142,6 +159,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { if err := gcp.DiskVolumeStateCheck(computeService, &experimentsDetails); err != nil { log.Errorf("Volume status check failed post chaos, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Volume status check failed post chaos") + span.RecordError(err) return } log.Info("[Status]: Disk volumes are attached to the VM instances (post-chaos)") @@ -159,6 +178,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -173,6 +194,8 @@ func VMDiskLoss(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/gcp/gcp-vm-instance-stop-by-label/experiment/gcp-vm-instance-stop-by-label.go b/experiments/gcp/gcp-vm-instance-stop-by-label/experiment/gcp-vm-instance-stop-by-label.go index 30dc7d7c5..55f362641 100644 --- a/experiments/gcp/gcp-vm-instance-stop-by-label/experiment/gcp-vm-instance-stop-by-label.go +++ b/experiments/gcp/gcp-vm-instance-stop-by-label/experiment/gcp-vm-instance-stop-by-label.go @@ -17,11 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "google.golang.org/api/compute/v1" ) // GCPVMInstanceStopByLabel contains steps to inject chaos func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var ( computeService *compute.Service @@ -47,6 +50,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -56,6 +61,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -91,6 +98,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -105,6 +114,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Failed to obtain a gcp compute service, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to obtain a gcp compute service") + span.RecordError(err) return } @@ -112,6 +123,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { if err = gcp.SetTargetInstance(computeService, &experimentsDetails); err != nil { log.Errorf("Failed to get the target VM instances, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the target VM instances") + span.RecordError(err) return } @@ -122,6 +135,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareVMStopByLabel(ctx, computeService, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -135,6 +150,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { if err := gcp.InstanceStatusCheck(computeService, experimentsDetails.TargetVMInstanceNameList, experimentsDetails.GCPProjectID, []string{experimentsDetails.Zones}); err != nil { log.Errorf("Failed to get VM instance status, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get VM instance status") + span.RecordError(err) return } } @@ -153,6 +170,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -167,6 +186,8 @@ func GCPVMInstanceStopByLabel(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to update the chaos result") + span.RecordError(err) return } diff --git a/experiments/gcp/gcp-vm-instance-stop/experiment/gcp-vm-instance-stop.go b/experiments/gcp/gcp-vm-instance-stop/experiment/gcp-vm-instance-stop.go index 8da11f7f7..2d0de8b0a 100644 --- a/experiments/gcp/gcp-vm-instance-stop/experiment/gcp-vm-instance-stop.go +++ b/experiments/gcp/gcp-vm-instance-stop/experiment/gcp-vm-instance-stop.go @@ -17,11 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "google.golang.org/api/compute/v1" ) // VMInstanceStop executes the experiment steps by injecting chaos into the specified vm instances func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var ( computeService *compute.Service @@ -47,6 +50,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -56,6 +61,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Create the Chaos Result") + span.RecordError(err) return } @@ -90,6 +97,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -104,6 +113,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Failed to obtain a gcp compute service, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to obtain a gcp compute service") + span.RecordError(err) return } @@ -112,6 +123,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { if err := gcp.InstanceStatusCheckByName(computeService, experimentsDetails.ManagedInstanceGroup, experimentsDetails.Delay, experimentsDetails.Timeout, "pre-chaos", experimentsDetails.VMInstanceName, experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { log.Errorf("Failed to get the vm instance status, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the vm instance status") + span.RecordError(err) return } @@ -123,6 +136,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareVMStop(ctx, computeService, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -136,6 +151,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { if err := gcp.InstanceStatusCheckByName(computeService, experimentsDetails.ManagedInstanceGroup, experimentsDetails.Delay, experimentsDetails.Timeout, "post-chaos", experimentsDetails.VMInstanceName, experimentsDetails.GCPProjectID, experimentsDetails.Zones); err != nil { log.Errorf("failed to get the vm instance status, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the vm instance status") + span.RecordError(err) return } @@ -154,6 +171,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -168,6 +187,8 @@ func VMInstanceStop(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/container-kill/experiment/container-kill.go b/experiments/generic/container-kill/experiment/container-kill.go index 05d3f8f80..0d81e11e5 100644 --- a/experiments/generic/container-kill/experiment/container-kill.go +++ b/experiments/generic/container-kill/experiment/container-kill.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // ContainerKill inject the container-kill chaos func ContainerKill(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareContainerKill(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func ContainerKill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/generic/disk-fill/experiment/disk-fill.go b/experiments/generic/disk-fill/experiment/disk-fill.go index 19546d0e6..b3bb4f226 100644 --- a/experiments/generic/disk-fill/experiment/disk-fill.go +++ b/experiments/generic/disk-fill/experiment/disk-fill.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // DiskFill inject the disk-fill chaos func DiskFill(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -96,6 +105,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -109,6 +120,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareDiskFill(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -124,6 +137,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -140,6 +155,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -155,6 +172,8 @@ func DiskFill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result err: %v\n", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/docker-service-kill/experiment/docker-service-kill.go b/experiments/generic/docker-service-kill/experiment/docker-service-kill.go index 06ed1d565..22d5b817f 100644 --- a/experiments/generic/docker-service-kill/experiment/docker-service-kill.go +++ b/experiments/generic/docker-service-kill/experiment/docker-service-kill.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // DockerServiceKill inject the docker-service-kill chaos func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -77,6 +84,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -86,6 +95,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -97,6 +108,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -114,6 +127,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -127,6 +142,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareDockerServiceKill(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Chaos injection failed, err: %v", err) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -140,6 +157,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -149,6 +168,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -174,6 +195,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -189,6 +212,8 @@ func DockerServiceKill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/kubelet-service-kill/experiment/kubelet-service-kill.go b/experiments/generic/kubelet-service-kill/experiment/kubelet-service-kill.go index c78f065ed..3fb8b2045 100644 --- a/experiments/generic/kubelet-service-kill/experiment/kubelet-service-kill.go +++ b/experiments/generic/kubelet-service-kill/experiment/kubelet-service-kill.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // KubeletServiceKill inject the kubelet-service-kill chaos func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -77,6 +84,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -86,6 +95,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -97,6 +108,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -114,6 +127,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -127,6 +142,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareKubeletKill(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -140,6 +157,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -149,6 +168,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -174,6 +195,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -189,6 +212,8 @@ func KubeletServiceKill(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/node-cpu-hog/experiment/node-cpu-hog.go b/experiments/generic/node-cpu-hog/experiment/node-cpu-hog.go index 5fbd0248a..b265e4268 100644 --- a/experiments/generic/node-cpu-hog/experiment/node-cpu-hog.go +++ b/experiments/generic/node-cpu-hog/experiment/node-cpu-hog.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeCPUHog inject the node-cpu-hog chaos func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -78,6 +85,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -87,6 +96,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -98,6 +109,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -115,6 +128,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -128,6 +143,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareNodeCPUHog(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: CPU hog failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -141,6 +158,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -150,6 +169,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -175,6 +196,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -190,6 +213,8 @@ func NodeCPUHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/node-drain/experiment/node-drain.go b/experiments/generic/node-drain/experiment/node-drain.go index 9b6533b17..efc8fefa5 100644 --- a/experiments/generic/node-drain/experiment/node-drain.go +++ b/experiments/generic/node-drain/experiment/node-drain.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeDrain inject the node-drain chaos func NodeDrain(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -77,6 +84,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -86,6 +95,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -97,6 +108,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -114,6 +127,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -127,6 +142,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareNodeDrain(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -140,6 +157,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -149,6 +168,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -174,6 +195,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -189,6 +212,8 @@ func NodeDrain(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/node-io-stress/experiment/node-io-stress.go b/experiments/generic/node-io-stress/experiment/node-io-stress.go index a58e0f2e5..610001f99 100644 --- a/experiments/generic/node-io-stress/experiment/node-io-stress.go +++ b/experiments/generic/node-io-stress/experiment/node-io-stress.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeIOStress inject the node-io-stress chaos func NodeIOStress(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -89,6 +98,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -100,6 +111,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -117,6 +130,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -130,6 +145,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareNodeIOStress(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: node io stress failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -143,6 +160,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -152,6 +171,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -177,6 +198,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -192,6 +215,8 @@ func NodeIOStress(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/node-memory-hog/experiment/node-memory-hog.go b/experiments/generic/node-memory-hog/experiment/node-memory-hog.go index cd040c5a0..8de0e0eff 100644 --- a/experiments/generic/node-memory-hog/experiment/node-memory-hog.go +++ b/experiments/generic/node-memory-hog/experiment/node-memory-hog.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeMemoryHog inject the node-memory-hog chaos func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -88,6 +97,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -99,6 +110,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -116,6 +129,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -129,6 +144,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareNodeMemoryHog(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: node memory hog failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -142,6 +159,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -151,6 +170,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -176,6 +197,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -191,6 +214,8 @@ func NodeMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/node-restart/experiment/node-restart.go b/experiments/generic/node-restart/experiment/node-restart.go index 788836f76..75dc23ad7 100644 --- a/experiments/generic/node-restart/experiment/node-restart.go +++ b/experiments/generic/node-restart/experiment/node-restart.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeRestart inject the node-restart chaos func NodeRestart(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -77,6 +84,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -86,6 +95,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -97,6 +108,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -114,6 +127,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -127,6 +142,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareNodeRestart(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: Node restart failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -140,6 +157,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -149,6 +168,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -174,6 +195,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -189,6 +212,8 @@ func NodeRestart(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } if experimentsDetails.EngineName != "" { diff --git a/experiments/generic/node-taint/experiment/node-taint.go b/experiments/generic/node-taint/experiment/node-taint.go index 9468e1c9d..719d312c7 100644 --- a/experiments/generic/node-taint/experiment/node-taint.go +++ b/experiments/generic/node-taint/experiment/node-taint.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // NodeTaint inject the node-taint chaos func NodeTaint(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -78,6 +85,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -87,6 +96,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -98,6 +109,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Target nodes are not in the ready state") + span.RecordError(err) return } } @@ -115,6 +128,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -128,6 +143,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareNodeTaint(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -141,6 +158,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } @@ -150,6 +169,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Auxiliary Application status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Auxiliary Application status check failed") + span.RecordError(err) return } } @@ -175,6 +196,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "NUT: Ready, Probes: Successful" @@ -190,6 +213,8 @@ func NodeTaint(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-autoscaler/experiment/pod-autoscaler.go b/experiments/generic/pod-autoscaler/experiment/pod-autoscaler.go index b7b78b8c5..1fcd7c6c6 100644 --- a/experiments/generic/pod-autoscaler/experiment/pod-autoscaler.go +++ b/experiments/generic/pod-autoscaler/experiment/pod-autoscaler.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodAutoscaler inject the pod-autoscaler chaos func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -81,6 +88,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -98,6 +107,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -111,6 +122,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PreparePodAutoscaler(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -127,6 +140,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -143,6 +158,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -158,6 +175,8 @@ func PodAutoscaler(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-cpu-hog-exec/experiment/pod-cpu-hog-exec.go b/experiments/generic/pod-cpu-hog-exec/experiment/pod-cpu-hog-exec.go index 396d74c04..d3fdf45fe 100644 --- a/experiments/generic/pod-cpu-hog-exec/experiment/pod-cpu-hog-exec.go +++ b/experiments/generic/pod-cpu-hog-exec/experiment/pod-cpu-hog-exec.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodCPUHogExec inject the pod-cpu-hog-exec chaos func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareCPUExecStress(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: CPU hog failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "CPU hog failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodCPUHogExec(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-cpu-hog/experiment/pod-cpu-hog.go b/experiments/generic/pod-cpu-hog/experiment/pod-cpu-hog.go index 6ad9c212a..e84623fef 100644 --- a/experiments/generic/pod-cpu-hog/experiment/pod-cpu-hog.go +++ b/experiments/generic/pod-cpu-hog/experiment/pod-cpu-hog.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodCPUHog inject the pod-cpu-hog chaos func PodCPUHog(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareAndInjectStressChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: CPU hog failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "CPU hog failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodCPUHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-delete/experiment/pod-delete.go b/experiments/generic/pod-delete/experiment/pod-delete.go index 0fb445f15..5caa4cf94 100644 --- a/experiments/generic/pod-delete/experiment/pod-delete.go +++ b/experiments/generic/pod-delete/experiment/pod-delete.go @@ -17,10 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodDelete inject the pod-delete chaos func PodDelete(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) + experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} eventsDetails := types.EventDetails{} @@ -40,6 +44,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -49,6 +55,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -56,6 +64,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { if err := result.SetResultUID(&resultDetails, clients, &chaosDetails); err != nil { log.Errorf("Unable to set the result uid, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to set the result uid") + span.RecordError(err) return } @@ -85,6 +95,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { log.Errorf("failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -104,6 +116,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { log.Errorf("failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -117,6 +131,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PreparePodDelete(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -132,6 +148,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -150,6 +168,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { log.Errorf("failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -165,6 +185,8 @@ func PodDelete(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/generic/pod-dns-error/experiment/pod-dns-error.go b/experiments/generic/pod-dns-error/experiment/pod-dns-error.go index a7c739f65..260caa5bd 100644 --- a/experiments/generic/pod-dns-error/experiment/pod-dns-error.go +++ b/experiments/generic/pod-dns-error/experiment/pod-dns-error.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodDNSError contains steps to inject chaos func PodDNSError(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -81,6 +88,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -98,6 +107,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -111,6 +122,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareAndInjectChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -126,6 +139,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -142,6 +157,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -157,6 +174,8 @@ func PodDNSError(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-dns-spoof/experiment/pod-dns-spoof.go b/experiments/generic/pod-dns-spoof/experiment/pod-dns-spoof.go index 145376a64..956e9e3c5 100644 --- a/experiments/generic/pod-dns-spoof/experiment/pod-dns-spoof.go +++ b/experiments/generic/pod-dns-spoof/experiment/pod-dns-spoof.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodDNSSpoof contains steps to inject chaos func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var err error experimentsDetails := experimentTypes.ExperimentDetails{} @@ -42,6 +45,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -51,6 +56,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -82,6 +89,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -100,6 +109,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -113,6 +124,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareAndInjectChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -128,6 +141,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -144,6 +159,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -159,6 +176,8 @@ func PodDNSSpoof(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-fio-stress/experiment/pod-fio-stress.go b/experiments/generic/pod-fio-stress/experiment/pod-fio-stress.go index e23a4df08..ff516c3a4 100644 --- a/experiments/generic/pod-fio-stress/experiment/pod-fio-stress.go +++ b/experiments/generic/pod-fio-stress/experiment/pod-fio-stress.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // Experiment contains steps to inject chaos func PodFioStress(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -96,6 +105,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -109,6 +120,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Chaos injection failed, err: %v", err) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -124,6 +137,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -140,6 +155,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -155,6 +172,8 @@ func PodFioStress(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-http-latency/experiment/pod-http-latency.go b/experiments/generic/pod-http-latency/experiment/pod-http-latency.go index 7f8a9f1cb..79266ba78 100644 --- a/experiments/generic/pod-http-latency/experiment/pod-http-latency.go +++ b/experiments/generic/pod-http-latency/experiment/pod-http-latency.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodHttpLatency inject the pod-http-latency chaos func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodHttpLatencyChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodHttpLatency(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-http-modify-body/experiment/pod-http-modify-body.go b/experiments/generic/pod-http-modify-body/experiment/pod-http-modify-body.go index 782d539f4..3b618dbc7 100644 --- a/experiments/generic/pod-http-modify-body/experiment/pod-http-modify-body.go +++ b/experiments/generic/pod-http-modify-body/experiment/pod-http-modify-body.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodHttpModifyBody contains steps to inject chaos func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -110,6 +121,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodHttpModifyBodyChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -156,6 +173,8 @@ func PodHttpModifyBody(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-http-modify-header/experiment/pod-http-modify-header.go b/experiments/generic/pod-http-modify-header/experiment/pod-http-modify-header.go index f05053481..0ef8d88fe 100644 --- a/experiments/generic/pod-http-modify-header/experiment/pod-http-modify-header.go +++ b/experiments/generic/pod-http-modify-header/experiment/pod-http-modify-header.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodHttpModifyHeader inject the pod-http-modify-header chaos func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodHttpModifyHeaderChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodHttpModifyHeader(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-http-reset-peer/experiment/pod-http-reset-peer.go b/experiments/generic/pod-http-reset-peer/experiment/pod-http-reset-peer.go index d5df84fd4..3c2478011 100644 --- a/experiments/generic/pod-http-reset-peer/experiment/pod-http-reset-peer.go +++ b/experiments/generic/pod-http-reset-peer/experiment/pod-http-reset-peer.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodHttpResetPeer contains steps to inject chaos func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -110,6 +121,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodHttpResetPeerChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -156,6 +173,8 @@ func PodHttpResetPeer(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-http-status-code/experiment/pod-http-status-code.go b/experiments/generic/pod-http-status-code/experiment/pod-http-status-code.go index 447b3ef13..d30412f10 100644 --- a/experiments/generic/pod-http-status-code/experiment/pod-http-status-code.go +++ b/experiments/generic/pod-http-status-code/experiment/pod-http-status-code.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodHttpStatusCode contains steps to inject chaos func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -42,6 +45,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -51,6 +56,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -88,6 +95,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -105,6 +114,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -118,6 +129,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodHttpStatusCodeChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -133,6 +146,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -149,6 +164,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -164,6 +181,8 @@ func PodHttpStatusCode(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-io-stress/experiment/pod-io-stress.go b/experiments/generic/pod-io-stress/experiment/pod-io-stress.go index 95b964754..4922ae06d 100644 --- a/experiments/generic/pod-io-stress/experiment/pod-io-stress.go +++ b/experiments/generic/pod-io-stress/experiment/pod-io-stress.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodIOStress inject the pod-io-stress chaos func PodIOStress(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareAndInjectStressChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: Pod IO Stress failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Pod IO Stress failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodIOStress(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-memory-hog-exec/experiment/pod-memory-hog-exec.go b/experiments/generic/pod-memory-hog-exec/experiment/pod-memory-hog-exec.go index 7cad3f0e3..2d2257d2c 100644 --- a/experiments/generic/pod-memory-hog-exec/experiment/pod-memory-hog-exec.go +++ b/experiments/generic/pod-memory-hog-exec/experiment/pod-memory-hog-exec.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodMemoryHogExec inject the pod-memory-hog-exec chaos func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareMemoryExecStress(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: pod memory hog failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "pod memory hog failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodMemoryHogExec(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-memory-hog/experiment/pod-memory-hog.go b/experiments/generic/pod-memory-hog/experiment/pod-memory-hog.go index 128e177f4..f3b869a24 100644 --- a/experiments/generic/pod-memory-hog/experiment/pod-memory-hog.go +++ b/experiments/generic/pod-memory-hog/experiment/pod-memory-hog.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodMemoryHog inject the pod-memory-hog chaos func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +121,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareAndInjectStressChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("[Error]: pod memory hog failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "pod memory hog failed") + span.RecordError(err) return } @@ -125,6 +138,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +156,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +173,8 @@ func PodMemoryHog(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-network-corruption/experiment/pod-network-corruption.go b/experiments/generic/pod-network-corruption/experiment/pod-network-corruption.go index 183b2021b..257a6f910 100644 --- a/experiments/generic/pod-network-corruption/experiment/pod-network-corruption.go +++ b/experiments/generic/pod-network-corruption/experiment/pod-network-corruption.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodNetworkCorruption inject the pod-network-corruption chaos func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -81,6 +88,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -98,6 +107,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -111,6 +122,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodNetworkCorruptionChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -126,6 +139,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -142,6 +157,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -157,6 +174,8 @@ func PodNetworkCorruption(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-network-duplication/experiment/pod-network-duplication.go b/experiments/generic/pod-network-duplication/experiment/pod-network-duplication.go index f1e42c839..09e277f75 100644 --- a/experiments/generic/pod-network-duplication/experiment/pod-network-duplication.go +++ b/experiments/generic/pod-network-duplication/experiment/pod-network-duplication.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodNetworkDuplication inject the pod-network-duplication chaos func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -81,6 +88,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -98,6 +107,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -111,6 +122,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodNetworkDuplicationChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -126,6 +139,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -142,6 +157,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -157,6 +174,8 @@ func PodNetworkDuplication(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-network-latency/experiment/pod-network-latency.go b/experiments/generic/pod-network-latency/experiment/pod-network-latency.go index efa7699a0..09766a5a2 100644 --- a/experiments/generic/pod-network-latency/experiment/pod-network-latency.go +++ b/experiments/generic/pod-network-latency/experiment/pod-network-latency.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodNetworkLatency inject the pod-network-latency chaos func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -81,6 +88,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +106,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -111,6 +122,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodNetworkLatencyChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -126,6 +139,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -142,6 +157,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -157,6 +174,8 @@ func PodNetworkLatency(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-network-loss/experiment/pod-network-loss.go b/experiments/generic/pod-network-loss/experiment/pod-network-loss.go index cfb538156..2c6c3252a 100644 --- a/experiments/generic/pod-network-loss/experiment/pod-network-loss.go +++ b/experiments/generic/pod-network-loss/experiment/pod-network-loss.go @@ -17,10 +17,14 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodNetworkLoss inject the pod-network-loss chaos func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) + experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} chaosDetails := types.ChaosDetails{} @@ -40,6 +44,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -49,6 +55,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +88,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -97,6 +107,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -110,6 +122,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PodNetworkLossChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -125,6 +139,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -141,6 +157,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -156,6 +174,8 @@ func PodNetworkLoss(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/generic/pod-network-partition/experiment/pod-network-partition.go b/experiments/generic/pod-network-partition/experiment/pod-network-partition.go index 44e73cd67..0d2b223f5 100644 --- a/experiments/generic/pod-network-partition/experiment/pod-network-partition.go +++ b/experiments/generic/pod-network-partition/experiment/pod-network-partition.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // PodNetworkPartition inject the pod-network-partition chaos func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -96,6 +105,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -109,6 +120,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareAndInjectChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Chaos injection failed, err: %v", err) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -124,6 +137,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -140,6 +155,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -155,6 +172,8 @@ func PodNetworkPartition(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/kafka/kafka-broker-pod-failure/experiment/kafka-broker-pod-failure.go b/experiments/kafka/kafka-broker-pod-failure/experiment/kafka-broker-pod-failure.go index ef33545cf..31e3597ea 100644 --- a/experiments/kafka/kafka-broker-pod-failure/experiment/kafka-broker-pod-failure.go +++ b/experiments/kafka/kafka-broker-pod-failure/experiment/kafka-broker-pod-failure.go @@ -19,10 +19,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // KafkaBrokerPodFailure derive and kill the kafka broker leader func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -43,6 +46,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -52,6 +57,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to Create the Chaos Resultt") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Cluster health check failed") + span.RecordError(err) return } } @@ -96,6 +105,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -112,6 +123,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Liveness check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Liveness check failed") + span.RecordError(err) return } log.Info("The Liveness pod gets established") @@ -129,6 +142,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { if err := kafkaPodDelete.PreparePodDelete(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -145,6 +160,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Cluster health check failed") + span.RecordError(err) return } } @@ -161,6 +178,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = common.GetStatusMessage(chaosDetails.DefaultHealthCheck, "AUT: Running", "Successful") @@ -178,6 +197,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { if err := status.CheckApplicationStatusesByLabels(experimentsDetails.ChaoslibDetail.AppNS, "name=kafka-liveness-"+experimentsDetails.RunID, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients); err != nil { log.Errorf("Application liveness status check failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application liveness status check failed") + span.RecordError(err) return } @@ -185,6 +206,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { if err := kafka.LivenessCleanup(&experimentsDetails, clients); err != nil { log.Errorf("liveness cleanup failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "liveness cleanup failed") + span.RecordError(err) return } } @@ -193,6 +216,8 @@ func KafkaBrokerPodFailure(ctx context.Context, clients clients.ClientSets) { log.Info("[The End]: Updating the chaos result of kafka pod delete experiment (EOT)") if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/kube-aws/ebs-loss-by-id/experiment/ebs-loss-by-id.go b/experiments/kube-aws/ebs-loss-by-id/experiment/ebs-loss-by-id.go index 20d3cc248..280540321 100644 --- a/experiments/kube-aws/ebs-loss-by-id/experiment/ebs-loss-by-id.go +++ b/experiments/kube-aws/ebs-loss-by-id/experiment/ebs-loss-by-id.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // EBSLossByID inject the ebs volume loss chaos func EBSLossByID(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var err error experimentsDetails := experimentTypes.ExperimentDetails{} @@ -42,6 +45,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -51,6 +56,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -80,6 +87,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { if err = aws.EBSStateCheckByID(experimentsDetails.EBSVolumeID, experimentsDetails.Region); err != nil { log.Errorf("Volume status check failed pre chaos: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Volume status check failed pre chaos") + span.RecordError(err) return } } @@ -99,6 +108,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -115,6 +126,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareEBSLossByID(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -128,6 +141,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { if err = aws.EBSStateCheckByID(experimentsDetails.EBSVolumeID, experimentsDetails.Region); err != nil { log.Errorf("Volume status check failed post chaos: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Volume status check failed post chaos") + span.RecordError(err) return } } @@ -146,6 +161,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -162,6 +179,8 @@ func EBSLossByID(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/kube-aws/ebs-loss-by-tag/experiment/ebs-loss-by-tag.go b/experiments/kube-aws/ebs-loss-by-tag/experiment/ebs-loss-by-tag.go index 44f201efa..feb0610e3 100644 --- a/experiments/kube-aws/ebs-loss-by-tag/experiment/ebs-loss-by-tag.go +++ b/experiments/kube-aws/ebs-loss-by-tag/experiment/ebs-loss-by-tag.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // EBSLossByTag inject the ebs volume loss chaos func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { if err := aws.SetTargetVolumeIDs(&experimentsDetails); err != nil { log.Errorf("Failed to set the volumes under chaos: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to set the volumes under chaos") + span.RecordError(err) return } @@ -97,6 +106,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -113,6 +124,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareEBSLossByTag(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -126,6 +139,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { if err := aws.PostChaosVolumeStatusCheck(&experimentsDetails); err != nil { log.Errorf("Failed to verify that the EBS volume is attached to an instance: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to verify that the EBS volume is attached to an instance") + span.RecordError(err) return } } @@ -144,6 +159,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -160,6 +177,8 @@ func EBSLossByTag(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/kube-aws/ec2-terminate-by-id/experiment/ec2-terminate-by-id.go b/experiments/kube-aws/ec2-terminate-by-id/experiment/ec2-terminate-by-id.go index 95010a803..c1d587d99 100644 --- a/experiments/kube-aws/ec2-terminate-by-id/experiment/ec2-terminate-by-id.go +++ b/experiments/kube-aws/ec2-terminate-by-id/experiment/ec2-terminate-by-id.go @@ -18,10 +18,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // EC2TerminateByID inject the ebs volume loss chaos func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var ( err error @@ -47,6 +50,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -56,6 +61,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -95,6 +102,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -112,6 +121,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { if err = aws.InstanceStatusCheckByID(experimentsDetails.Ec2InstanceID, experimentsDetails.Region); err != nil { log.Errorf("EC2 instance status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "EC2 instance status check failed") + span.RecordError(err) return } log.Info("[Status]: EC2 instance is in running state") @@ -124,6 +135,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Pre chaos node status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Pre chaos node status check failed") + span.RecordError(err) return } } @@ -133,6 +146,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareEC2TerminateByID(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -147,6 +162,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { if err = aws.InstanceStatusCheckByID(experimentsDetails.Ec2InstanceID, experimentsDetails.Region); err != nil { log.Errorf("EC2 instance status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "EC2 instance status check failed") + span.RecordError(err) return } log.Info("[Status]: EC2 instance is in running state (post chaos)") @@ -158,6 +175,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { if err := aws.PostChaosNodeCountCheck(activeNodeCount, autoScalingGroupName, experimentsDetails.Region); err != nil { log.Errorf("Post chaos active node count check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Post chaos active node count check failed") + span.RecordError(err) return } } @@ -176,6 +195,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -192,6 +213,8 @@ func EC2TerminateByID(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/kube-aws/ec2-terminate-by-tag/experiment/ec2-terminate-tag.go b/experiments/kube-aws/ec2-terminate-by-tag/experiment/ec2-terminate-tag.go index 5d4a0ee8e..10c0ab535 100644 --- a/experiments/kube-aws/ec2-terminate-by-tag/experiment/ec2-terminate-tag.go +++ b/experiments/kube-aws/ec2-terminate-by-tag/experiment/ec2-terminate-tag.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // EC2TerminateByTag inject the ebs volume loss chaos func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) var ( err error @@ -46,6 +49,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err = types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -55,6 +60,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -95,6 +102,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -110,6 +119,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.SetTargetInstance(&experimentsDetails); err != nil { log.Errorf("Failed to get the target ec2 instance: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the target ec2 instance") + span.RecordError(err) return } @@ -120,6 +131,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { if err != nil { log.Errorf("Pre chaos node status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Pre chaos node status check failed") + span.RecordError(err) return } } @@ -129,6 +142,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.PrepareEC2TerminateByTag(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -143,6 +158,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { if err = aws.InstanceStatusCheck(experimentsDetails.TargetInstanceIDList, experimentsDetails.Region); err != nil { log.Errorf("Failed to get the ec2 instance status as running post chaos: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get the ec2 instance status as running post chaos") + span.RecordError(err) return } log.Info("[Status]: EC2 instance is in running state (post chaos)") @@ -154,6 +171,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { if err = aws.PostChaosNodeCountCheck(activeNodeCount, autoScalingGroupName, experimentsDetails.Region); err != nil { log.Errorf("Post chaos active node count check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Post chaos active node count check failed") + span.RecordError(err) return } } @@ -171,6 +190,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -187,6 +208,8 @@ func EC2TerminateByTag(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/load/k6-loadgen/experiment/k6-loadgen.go b/experiments/load/k6-loadgen/experiment/k6-loadgen.go index 0a62d3949..88e1d4894 100644 --- a/experiments/load/k6-loadgen/experiment/k6-loadgen.go +++ b/experiments/load/k6-loadgen/experiment/k6-loadgen.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // Experiment contains steps to inject chaos func Experiment(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -79,6 +86,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -96,6 +105,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -108,6 +119,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { if err := litmusLIB.PrepareChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } log.Infof("[Confirmation]: %v chaos has been injected successfully", experimentsDetails.ExperimentName) @@ -122,6 +135,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -138,6 +153,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -153,6 +170,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/experiments/spring-boot/spring-boot-faults/experiment/spring-boot-faults.go b/experiments/spring-boot/spring-boot-faults/experiment/spring-boot-faults.go index 45d27ef62..699c4da0b 100644 --- a/experiments/spring-boot/spring-boot-faults/experiment/spring-boot-faults.go +++ b/experiments/spring-boot/spring-boot-faults/experiment/spring-boot-faults.go @@ -17,10 +17,13 @@ import ( "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) // Experiment contains steps to inject chaos func Experiment(ctx context.Context, clients clients.ClientSets, expName string) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -41,6 +44,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes, err: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -50,6 +55,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to Create the Chaos Result, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the Chaos Result") + span.RecordError(err) return } @@ -78,6 +85,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "Pods: Not Found", "Warning", &chaosDetails) _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Failed to get target pod list") + span.RecordError(err) return } podNames := make([]string, 0, 1) @@ -93,6 +102,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "ChaosMonkey: Not Found", "Warning", &chaosDetails) _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Some target pods don't have the chaos monkey endpoint") + span.RecordError(err) return } @@ -104,6 +115,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -120,6 +133,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -134,6 +149,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) if err := litmusLIB.PrepareChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -150,6 +167,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Application status check failed") + span.RecordError(err) return } } @@ -166,6 +185,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "AUT: Running, Probes: Successful" @@ -180,6 +201,8 @@ func Experiment(ctx context.Context, clients clients.ClientSets, expName string) log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to Update the Chaos Result, err: %v", err) + span.SetStatus(codes.Error, "Unable to Update the Chaos Result") + span.RecordError(err) return } diff --git a/experiments/vmware/vm-poweroff/experiment/vm-poweroff.go b/experiments/vmware/vm-poweroff/experiment/vm-poweroff.go index 551d717d0..010f71049 100644 --- a/experiments/vmware/vm-poweroff/experiment/vm-poweroff.go +++ b/experiments/vmware/vm-poweroff/experiment/vm-poweroff.go @@ -16,6 +16,8 @@ import ( "github.com/litmuschaos/litmus-go/pkg/utils/common" experimentEnv "github.com/litmuschaos/litmus-go/pkg/vmware/vm-poweroff/environment" experimentTypes "github.com/litmuschaos/litmus-go/pkg/vmware/vm-poweroff/types" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" "github.com/sirupsen/logrus" ) @@ -24,6 +26,7 @@ var err error // VMPoweroff contains steps to inject vm-power-off chaos func VMPoweroff(ctx context.Context, clients clients.ClientSets) { + span := trace.SpanFromContext(ctx) experimentsDetails := experimentTypes.ExperimentDetails{} resultDetails := types.ResultDetails{} @@ -44,6 +47,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { // Get values from chaosengine. Bail out upon error, as we haven't entered exp business logic yet if err := types.GetValuesFromChaosEngine(&chaosDetails, clients, &resultDetails); err != nil { log.Errorf("Unable to initialize the probes: %v", err) + span.SetStatus(codes.Error, "Unable to initialize the probes") + span.RecordError(err) return } } @@ -53,6 +58,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { log.Errorf("Unable to create the chaosresult: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Unable to create the chaosresult") + span.RecordError(err) return } @@ -72,6 +79,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { if err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Unable to get the VM ID, err: %v", err) + span.SetStatus(codes.Error, "Unable to get the VM ID") + span.RecordError(err) return } } @@ -91,6 +100,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { if err != nil { result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) log.Errorf("Vcenter Login failed: %v", err) + span.SetStatus(codes.Error, "Vcenter Login failed") + span.RecordError(err) return } @@ -99,6 +110,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { if err := vmware.VMStatusCheck(experimentsDetails.VcenterServer, experimentsDetails.VMIds, cookie); err != nil { log.Errorf("VM status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "VM status check failed") + span.RecordError(err) return } log.Info("[Verification]: VMs are in running state (pre-chaos)") @@ -119,6 +132,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PreChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probe Failed") + span.RecordError(err) return } msg = "IUT: Running, Probes: Successful" @@ -135,6 +150,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { if err = litmusLIB.InjectVMPowerOffChaos(ctx, &experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails, cookie); err != nil { log.Errorf("Chaos injection failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Chaos injection failed") + span.RecordError(err) return } @@ -149,6 +166,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { if err := vmware.VMStatusCheck(experimentsDetails.VcenterServer, experimentsDetails.VMIds, cookie); err != nil { log.Errorf("VM status check failed: %v", err) result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "VM status check failed") + span.RecordError(err) return } log.Info("[Verification]: VMs are in running state (post-chaos)") @@ -168,6 +187,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { log.Errorf("Failed to create %v event inside chaosengine", types.PostChaosCheck) } result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) + span.SetStatus(codes.Error, "Probes Failed") + span.RecordError(err) return } msg = "IUT: Running, Probes: Successful" @@ -184,6 +205,8 @@ func VMPoweroff(ctx context.Context, clients clients.ClientSets) { log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) if err = result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { log.Errorf("Unable to update the chaosresult: %v", err) + span.SetStatus(codes.Error, "Unable to update the chaosresult") + span.RecordError(err) return } diff --git a/pkg/probe/probe.go b/pkg/probe/probe.go index fe6e1a271..1237beb79 100644 --- a/pkg/probe/probe.go +++ b/pkg/probe/probe.go @@ -18,6 +18,7 @@ import ( "github.com/palantir/stacktrace" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -32,6 +33,8 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl // get the probes details from the chaosengine probes, err := getProbesFromChaosEngine(chaosDetails, clients) if err != nil { + span.SetStatus(codes.Error, "getProbesFromChaosEngine failed") + span.RecordError(err) return err } @@ -42,6 +45,8 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl switch strings.ToLower(probe.Mode) { case "sot", "edge", "continuous": if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + span.SetStatus(codes.Error, fmt.Sprintf("%s mode %s probe execute failed", probe.Mode, probe.Name)) + span.RecordError(err) return err } } @@ -51,6 +56,8 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl for _, probe := range probes { if strings.ToLower(probe.Mode) == "onchaos" { if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + span.SetStatus(codes.Error, fmt.Sprintf("%s mode %s probe execute failed", probe.Mode, probe.Name)) + span.RecordError(err) return err } } @@ -72,13 +79,19 @@ func RunProbes(ctx context.Context, chaosDetails *types.ChaosDetails, clients cl } } if len(probeError) != 0 { - return cerrors.PreserveError{ErrString: fmt.Sprintf("[%s]", strings.Join(probeError, ","))} + errString := fmt.Sprintf("[%s]", strings.Join(probeError, ",")) + span.SetStatus(codes.Error, errString) + err := cerrors.PreserveError{ErrString: errString} + span.RecordError(err) + return err } // executes the eot and edge modes for _, probe := range probes { switch strings.ToLower(probe.Mode) { case "eot", "edge": if err := execute(probe, chaosDetails, clients, resultDetails, phase); err != nil { + span.SetStatus(codes.Error, fmt.Sprintf("%s mode %s probe execute failed", probe.Mode, probe.Name)) + span.RecordError(err) return err } }