From 7eb0aaed24fee23f2363d63d349fbbc67ea2e528 Mon Sep 17 00:00:00 2001 From: Amit Schendel Date: Wed, 5 Jun 2024 07:48:07 +0000 Subject: [PATCH 1/4] Adding a check if the container is still being monitord before reading from perf buffer Signed-off-by: Amit Schendel --- pkg/containerwatcher/v1/container_watcher_private.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/containerwatcher/v1/container_watcher_private.go b/pkg/containerwatcher/v1/container_watcher_private.go index 0521222b..7cd84c62 100644 --- a/pkg/containerwatcher/v1/container_watcher_private.go +++ b/pkg/containerwatcher/v1/container_watcher_private.go @@ -44,6 +44,13 @@ func (ch *IGContainerWatcher) containerCallback(notif containercollection.PubSub // Read the syscall tracer events in a separate goroutine. go func() { for { + if !ch.timeBasedContainers.Contains(notif.Container.Runtime.ContainerID) && !ch.preRunningContainersIDs.Contains(notif.Container.Runtime.ContainerID) { + logger.L().Info("stop monitor on container - container has been removed", + helpers.String("container ID", notif.Container.Runtime.ContainerID), + helpers.String("k8s workload", k8sContainerID)) + return + } + evs, err := ch.syscallTracer.Read(notif.Container.Runtime.ContainerID) if err != nil { logger.L().Debug("syscalls perf buffer closed", helpers.String("error", err.Error())) From 3db380727294fc4d8cc5680d2cb1b1b609f8c221 Mon Sep 17 00:00:00 2001 From: Amit Schendel Date: Wed, 5 Jun 2024 08:54:02 +0000 Subject: [PATCH 2/4] Adding context instead of if statement Signed-off-by: Amit Schendel --- .../v1/container_watcher_private.go | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/pkg/containerwatcher/v1/container_watcher_private.go b/pkg/containerwatcher/v1/container_watcher_private.go index 7cd84c62..0ac09192 100644 --- a/pkg/containerwatcher/v1/container_watcher_private.go +++ b/pkg/containerwatcher/v1/container_watcher_private.go @@ -31,6 +31,8 @@ func (ch *IGContainerWatcher) containerCallback(notif containercollection.PubSub ch.timeBasedContainers.Add(notif.Container.Runtime.ContainerID) } + tracingContext := context.Background() + switch notif.Type { case containercollection.EventTypeAddContainer: logger.L().Info("start monitor on container", helpers.String("container ID", notif.Container.Runtime.ContainerID), helpers.String("k8s workload", k8sContainerID)) @@ -41,28 +43,29 @@ func (ch *IGContainerWatcher) containerCallback(notif containercollection.PubSub logger.L().Fatal("attaching container to syscall tracer", helpers.String("container ID", notif.Container.Runtime.ContainerID), helpers.String("k8s workload", k8sContainerID), helpers.Error(err)) } - // Read the syscall tracer events in a separate goroutine. + // Read the syscall tracer events in a separate goroutine until the container is removed (signaled from the tracing context). go func() { for { - if !ch.timeBasedContainers.Contains(notif.Container.Runtime.ContainerID) && !ch.preRunningContainersIDs.Contains(notif.Container.Runtime.ContainerID) { - logger.L().Info("stop monitor on container - container has been removed", + select { + case <-tracingContext.Done(): + logger.L().Info("stop monitor on container - container has terminated", helpers.String("container ID", notif.Container.Runtime.ContainerID), helpers.String("k8s workload", k8sContainerID)) return + default: + evs, err := ch.syscallTracer.Read(notif.Container.Runtime.ContainerID) + if err != nil { + logger.L().Debug("syscalls perf buffer closed", helpers.String("error", err.Error())) + return + } + for _, ev := range evs { + ev.SetContainerMetadata(notif.Container) + ch.syscallEventCallback(ev) + } + + // Sleep for a while before reading the next batch of events. + time.Sleep(2 * time.Second) // TODO: make this configurable. } - - evs, err := ch.syscallTracer.Read(notif.Container.Runtime.ContainerID) - if err != nil { - logger.L().Debug("syscalls perf buffer closed", helpers.String("error", err.Error())) - return - } - for _, ev := range evs { - ev.SetContainerMetadata(notif.Container) - ch.syscallEventCallback(ev) - } - - // Sleep for a while before reading the next batch of events. - time.Sleep(2 * time.Second) // TODO: make this configurable. } }() } @@ -80,6 +83,7 @@ func (ch *IGContainerWatcher) containerCallback(notif containercollection.PubSub logger.L().Info("stop monitor on container - container has terminated", helpers.String("container ID", notif.Container.Runtime.ContainerID), helpers.String("k8s workload", k8sContainerID)) + tracingContext.Done() ch.preRunningContainersIDs.Remove(notif.Container.Runtime.ContainerID) ch.timeBasedContainers.Remove(notif.Container.Runtime.ContainerID) ch.syscallTracer.Detach(notif.Container.Mntns) From b1d3557a5f75eb597fba98329d4a7aacf05c6324 Mon Sep 17 00:00:00 2001 From: Amit Schendel Date: Wed, 5 Jun 2024 09:46:45 +0000 Subject: [PATCH 3/4] Using channel to close traceloop perf buffer reading Signed-off-by: Amit Schendel --- pkg/containerwatcher/v1/container_watcher_private.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/containerwatcher/v1/container_watcher_private.go b/pkg/containerwatcher/v1/container_watcher_private.go index 0ac09192..10e28614 100644 --- a/pkg/containerwatcher/v1/container_watcher_private.go +++ b/pkg/containerwatcher/v1/container_watcher_private.go @@ -31,7 +31,7 @@ func (ch *IGContainerWatcher) containerCallback(notif containercollection.PubSub ch.timeBasedContainers.Add(notif.Container.Runtime.ContainerID) } - tracingContext := context.Background() + tracingContext := make(chan struct{}) switch notif.Type { case containercollection.EventTypeAddContainer: @@ -47,8 +47,8 @@ func (ch *IGContainerWatcher) containerCallback(notif containercollection.PubSub go func() { for { select { - case <-tracingContext.Done(): - logger.L().Info("stop monitor on container - container has terminated", + case <-tracingContext: + logger.L().Info("stop traceloop on container - container has terminated", helpers.String("container ID", notif.Container.Runtime.ContainerID), helpers.String("k8s workload", k8sContainerID)) return @@ -64,7 +64,7 @@ func (ch *IGContainerWatcher) containerCallback(notif containercollection.PubSub } // Sleep for a while before reading the next batch of events. - time.Sleep(2 * time.Second) // TODO: make this configurable. + time.Sleep(5 * time.Second) // TODO: make this configurable. } } }() @@ -83,7 +83,7 @@ func (ch *IGContainerWatcher) containerCallback(notif containercollection.PubSub logger.L().Info("stop monitor on container - container has terminated", helpers.String("container ID", notif.Container.Runtime.ContainerID), helpers.String("k8s workload", k8sContainerID)) - tracingContext.Done() + close(tracingContext) ch.preRunningContainersIDs.Remove(notif.Container.Runtime.ContainerID) ch.timeBasedContainers.Remove(notif.Container.Runtime.ContainerID) ch.syscallTracer.Detach(notif.Container.Mntns) From b65c43ab0c7af48e04d3d5c89493d9b9ba538c51 Mon Sep 17 00:00:00 2001 From: Amit Schendel Date: Wed, 5 Jun 2024 10:18:59 +0000 Subject: [PATCH 4/4] Rename variable Signed-off-by: Amit Schendel --- pkg/containerwatcher/v1/container_watcher_private.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/containerwatcher/v1/container_watcher_private.go b/pkg/containerwatcher/v1/container_watcher_private.go index 10e28614..f15ef160 100644 --- a/pkg/containerwatcher/v1/container_watcher_private.go +++ b/pkg/containerwatcher/v1/container_watcher_private.go @@ -31,7 +31,7 @@ func (ch *IGContainerWatcher) containerCallback(notif containercollection.PubSub ch.timeBasedContainers.Add(notif.Container.Runtime.ContainerID) } - tracingContext := make(chan struct{}) + traceloopCancelChannel := make(chan struct{}) switch notif.Type { case containercollection.EventTypeAddContainer: @@ -43,11 +43,11 @@ func (ch *IGContainerWatcher) containerCallback(notif containercollection.PubSub logger.L().Fatal("attaching container to syscall tracer", helpers.String("container ID", notif.Container.Runtime.ContainerID), helpers.String("k8s workload", k8sContainerID), helpers.Error(err)) } - // Read the syscall tracer events in a separate goroutine until the container is removed (signaled from the tracing context). + // Read the syscall tracer events in a separate goroutine until the container is removed (signaled from the traceloopCancelChannel). go func() { for { select { - case <-tracingContext: + case <-traceloopCancelChannel: logger.L().Info("stop traceloop on container - container has terminated", helpers.String("container ID", notif.Container.Runtime.ContainerID), helpers.String("k8s workload", k8sContainerID)) @@ -83,7 +83,7 @@ func (ch *IGContainerWatcher) containerCallback(notif containercollection.PubSub logger.L().Info("stop monitor on container - container has terminated", helpers.String("container ID", notif.Container.Runtime.ContainerID), helpers.String("k8s workload", k8sContainerID)) - close(tracingContext) + close(traceloopCancelChannel) ch.preRunningContainersIDs.Remove(notif.Container.Runtime.ContainerID) ch.timeBasedContainers.Remove(notif.Container.Runtime.ContainerID) ch.syscallTracer.Detach(notif.Container.Mntns)