Skip to content

Commit e52e65b

Browse files
authored
Merge pull request #625 from kubescape/prometheus
Prometheus
2 parents 7a5cc82 + a8b2589 commit e52e65b

File tree

5 files changed

+86
-7
lines changed

5 files changed

+86
-7
lines changed

pkg/containerwatcher/v2/containercallback.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ func (cw *ContainerWatcher) containerCallbackAsync(notif containercollection.Pub
6262
helpers.String("k8s workload", k8sContainerID),
6363
helpers.String("ContainerImageDigest", notif.Container.Runtime.ContainerImageDigest),
6464
helpers.String("ContainerImageName", notif.Container.Runtime.ContainerImageName))
65+
cw.metrics.ReportContainerStart()
6566
// Check if Pod has a label of max sniffing time
6667
sniffingTime := utils.AddJitter(cw.cfg.MaxSniffingTime, cw.cfg.MaxJitterPercentage)
6768
if podLabelMaxSniffingTime, ok := notif.Container.K8s.PodLabels[MaxSniffingTimeLabel]; ok {
@@ -89,6 +90,7 @@ func (cw *ContainerWatcher) containerCallbackAsync(notif containercollection.Pub
8990
helpers.String("k8s workload", k8sContainerID),
9091
helpers.String("ContainerImageDigest", notif.Container.Runtime.ContainerImageDigest),
9192
helpers.String("ContainerImageName", notif.Container.Runtime.ContainerImageName))
93+
cw.metrics.ReportContainerStop()
9294
cw.objectCache.K8sObjectCache().DeleteSharedContainerData(notif.Container.Runtime.ContainerID)
9395
}
9496
}

pkg/containerwatcher/v2/event_handler_factory.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,10 +245,10 @@ func (ehf *EventHandlerFactory) registerHandlers(
245245
ehf.handlers[utils.HTTPEventType] = []Manager{containerProfileManager, ruleManager, metrics}
246246

247247
// Ptrace events
248-
ehf.handlers[utils.PtraceEventType] = []Manager{ruleManager}
248+
ehf.handlers[utils.PtraceEventType] = []Manager{ruleManager, metrics}
249249

250250
// IoUring events
251-
ehf.handlers[utils.IoUringEventType] = []Manager{ruleManager, rulePolicy}
251+
ehf.handlers[utils.IoUringEventType] = []Manager{ruleManager, metrics, rulePolicy}
252252

253253
// Note: SyscallEventType is not registered here because the syscall tracer
254254
// doesn't generate events - it only provides a peek function for other components

pkg/metricsmanager/metrics_manager_interface.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,6 @@ type MetricsManager interface {
1515
ReportRuleProcessed(ruleID string)
1616
ReportRuleAlert(ruleID string)
1717
ReportEbpfStats(stats *top.Event[toptypes.Stats])
18+
ReportContainerStart()
19+
ReportContainerStop()
1820
}

pkg/metricsmanager/metrics_manager_mock.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,7 @@ func (m *MetricsMock) ReportRuleAlert(ruleID string) {
5353

5454
func (m *MetricsMock) ReportEbpfStats(stats *top.Event[toptypes.Stats]) {
5555
}
56+
57+
func (m *MetricsMock) ReportContainerStart() {}
58+
59+
func (m *MetricsMock) ReportContainerStop() {}

pkg/metricsmanager/prometheus/prometheus.go

Lines changed: 76 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ type PrometheusMetric struct {
3232
ebpfCapabilityCounter prometheus.Counter
3333
ebpfRandomXCounter prometheus.Counter
3434
ebpfFailedCounter prometheus.Counter
35+
ebpfSymlinkCounter prometheus.Counter
36+
ebpfHardlinkCounter prometheus.Counter
37+
ebpfSSHCounter prometheus.Counter
38+
ebpfHTTPCounter prometheus.Counter
39+
ebpfPtraceCounter prometheus.Counter
40+
ebpfIoUringCounter prometheus.Counter
3541
ruleCounter *prometheus.CounterVec
3642
alertCounter *prometheus.CounterVec
3743

@@ -45,6 +51,10 @@ type PrometheusMetric struct {
4551
programCpuUsageGauge *prometheus.GaugeVec
4652
programPerCpuUsageGauge *prometheus.GaugeVec
4753

54+
// Container metrics
55+
containerStartCounter prometheus.Counter
56+
containerStopCounter prometheus.Counter
57+
4858
// Cache to avoid allocating Labels maps on every call
4959
ruleCounterCache map[string]prometheus.Counter
5060
alertCounterCache map[string]prometheus.Counter
@@ -85,6 +95,30 @@ func NewPrometheusMetric() *PrometheusMetric {
8595
Name: "node_agent_ebpf_event_failure_counter",
8696
Help: "The total number of failed events received from the eBPF probe",
8797
}),
98+
ebpfSymlinkCounter: promauto.NewCounter(prometheus.CounterOpts{
99+
Name: "node_agent_symlink_counter",
100+
Help: "The total number of symlink events received from the eBPF probe",
101+
}),
102+
ebpfHardlinkCounter: promauto.NewCounter(prometheus.CounterOpts{
103+
Name: "node_agent_hardlink_counter",
104+
Help: "The total number of hardlink events received from the eBPF probe",
105+
}),
106+
ebpfSSHCounter: promauto.NewCounter(prometheus.CounterOpts{
107+
Name: "node_agent_ssh_counter",
108+
Help: "The total number of SSH events received from the eBPF probe",
109+
}),
110+
ebpfHTTPCounter: promauto.NewCounter(prometheus.CounterOpts{
111+
Name: "node_agent_http_counter",
112+
Help: "The total number of HTTP events received from the eBPF probe",
113+
}),
114+
ebpfPtraceCounter: promauto.NewCounter(prometheus.CounterOpts{
115+
Name: "node_agent_ptrace_counter",
116+
Help: "The total number of ptrace events received from the eBPF probe",
117+
}),
118+
ebpfIoUringCounter: promauto.NewCounter(prometheus.CounterOpts{
119+
Name: "node_agent_iouring_counter",
120+
Help: "The total number of io_uring events received from the eBPF probe",
121+
}),
88122
ruleCounter: promauto.NewCounterVec(prometheus.CounterOpts{
89123
Name: "node_agent_rule_counter",
90124
Help: "The total number of rules processed by the engine",
@@ -135,6 +169,16 @@ func NewPrometheusMetric() *PrometheusMetric {
135169
Help: "Per-CPU usage of programs by program ID",
136170
}, []string{programTypeLabel, programNameLabel}),
137171

172+
// Container metrics
173+
containerStartCounter: promauto.NewCounter(prometheus.CounterOpts{
174+
Name: "node_agent_container_start_counter",
175+
Help: "The total number of container start events",
176+
}),
177+
containerStopCounter: promauto.NewCounter(prometheus.CounterOpts{
178+
Name: "node_agent_container_stop_counter",
179+
Help: "The total number of container stop events",
180+
}),
181+
138182
// Initialize counter caches
139183
ruleCounterCache: make(map[string]prometheus.Counter),
140184
alertCounterCache: make(map[string]prometheus.Counter),
@@ -161,7 +205,14 @@ func (p *PrometheusMetric) Destroy() {
161205
prometheus.Unregister(p.ebpfFailedCounter)
162206
prometheus.Unregister(p.ruleCounter)
163207
prometheus.Unregister(p.alertCounter)
164-
208+
prometheus.Unregister(p.ebpfSymlinkCounter)
209+
prometheus.Unregister(p.ebpfHardlinkCounter)
210+
prometheus.Unregister(p.ebpfSSHCounter)
211+
prometheus.Unregister(p.ebpfHTTPCounter)
212+
prometheus.Unregister(p.ebpfPtraceCounter)
213+
prometheus.Unregister(p.ebpfIoUringCounter)
214+
prometheus.Unregister(p.containerStartCounter)
215+
prometheus.Unregister(p.containerStopCounter)
165216
// Unregister program ID metrics
166217
prometheus.Unregister(p.programRuntimeGauge)
167218
prometheus.Unregister(p.programRunCountGauge)
@@ -175,6 +226,8 @@ func (p *PrometheusMetric) Destroy() {
175226

176227
func (p *PrometheusMetric) ReportEvent(eventType utils.EventType) {
177228
switch eventType {
229+
case utils.CapabilitiesEventType:
230+
p.ebpfCapabilityCounter.Inc()
178231
case utils.ExecveEventType:
179232
p.ebpfExecCounter.Inc()
180233
case utils.OpenEventType:
@@ -183,12 +236,22 @@ func (p *PrometheusMetric) ReportEvent(eventType utils.EventType) {
183236
p.ebpfNetworkCounter.Inc()
184237
case utils.DnsEventType:
185238
p.ebpfDNSCounter.Inc()
186-
case utils.SyscallEventType:
187-
p.ebpfSyscallCounter.Inc()
188-
case utils.CapabilitiesEventType:
189-
p.ebpfCapabilityCounter.Inc()
190239
case utils.RandomXEventType:
191240
p.ebpfRandomXCounter.Inc()
241+
case utils.SymlinkEventType:
242+
p.ebpfSymlinkCounter.Inc()
243+
case utils.HardlinkEventType:
244+
p.ebpfHardlinkCounter.Inc()
245+
case utils.SSHEventType:
246+
p.ebpfSSHCounter.Inc()
247+
case utils.HTTPEventType:
248+
p.ebpfHTTPCounter.Inc()
249+
case utils.PtraceEventType:
250+
p.ebpfPtraceCounter.Inc()
251+
case utils.IoUringEventType:
252+
p.ebpfIoUringCounter.Inc()
253+
case utils.SyscallEventType:
254+
p.ebpfSyscallCounter.Inc()
192255
}
193256
}
194257

@@ -271,3 +334,11 @@ func (p *PrometheusMetric) ReportEbpfStats(stats *top.Event[toptypes.Stats]) {
271334
p.programPerCpuUsageGauge.With(labels).Set(stat.PerCpuUsage)
272335
}
273336
}
337+
338+
func (p *PrometheusMetric) ReportContainerStart() {
339+
p.containerStartCounter.Inc()
340+
}
341+
342+
func (p *PrometheusMetric) ReportContainerStop() {
343+
p.containerStopCounter.Inc()
344+
}

0 commit comments

Comments
 (0)