Skip to content

Commit

Permalink
Change the PID filter implementation to use bloom filter (#851)
Browse files Browse the repository at this point in the history
  • Loading branch information
grcevski authored May 22, 2024
1 parent 736ea23 commit 6137faf
Show file tree
Hide file tree
Showing 19 changed files with 101 additions and 51 deletions.
30 changes: 24 additions & 6 deletions bpf/pid.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@
#include "bpf_core_read.h"
#include "pid_types.h"

#define MAX_CONCURRENT_PIDS 3000 // estimate: 1000 concurrent processes (including children) * 3 namespaces per pid
#define MAX_CONCURRENT_PIDS 3001 // estimate: 1000 concurrent processes (including children) * 3 namespaces per pid
#define PRIME_HASH 192053 // closest prime to 3001 * 64

volatile const s32 filter_pids = 0;

struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, MAX_CONCURRENT_PIDS);
__type(key, pid_key_t);
__type(value, u8);
__type(key, u32);
__type(value, u64); // using 8 bytes, because array elements are 8 bytes aligned anyway
__uint(pinning, LIBBPF_PIN_BY_NAME);
} valid_pids SEC(".maps");

Expand All @@ -26,6 +27,23 @@ struct {
__uint(pinning, LIBBPF_PIN_BY_NAME);
} pid_cache SEC(".maps");

static __always_inline u8 pid_matches(pid_key_t *p) {
u64 k = (((u64)p->ns) << 32) | p->pid; // combine the namespace id and the pid into one single u64

u32 h = (u32)(k % PRIME_HASH); // divide with prime number lower than max pids * 64, modulo with primes gives good hash functions
u32 segment = h / 64; // divide by the segment size (8 bytes) to find the segment
u32 bit = h & 63; // lowest 64 bits gives us the placement inside the segment

u64 *v = bpf_map_lookup_elem(&valid_pids, &segment);
if (!v) {
// This is an error of some kind, we should always find the segment
bpf_printk("Error looking up PID segment %d", segment);
return 1;
}

return ((*v) >> bit) & 1;
}

static __always_inline u32 valid_pid(u64 id) {
u32 host_pid = id >> 32;
// If we are doing system wide instrumenting, accept all PIDs
Expand All @@ -52,7 +70,7 @@ static __always_inline u32 valid_pid(u64 id) {
.ns = pid_ns_id
};

u32 *found_ns_pid = bpf_map_lookup_elem(&valid_pids, &p_key);
u8 found_ns_pid = pid_matches(&p_key);

if (found_ns_pid) {
bpf_map_update_elem(&pid_cache, &host_pid, &ns_pid, BPF_ANY);
Expand All @@ -63,7 +81,7 @@ static __always_inline u32 valid_pid(u64 id) {
.ns = pid_ns_id
};

u32 *found_ns_ppid = bpf_map_lookup_elem(&valid_pids, &pp_key);
u8 found_ns_ppid = pid_matches(&pp_key);

if (found_ns_ppid) {
bpf_map_update_elem(&pid_cache, &host_pid, &ns_pid, BPF_ANY);
Expand Down
Binary file modified pkg/internal/ebpf/httpfltr/bpf_bpfel_arm64.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpfltr/bpf_bpfel_x86.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpfltr/bpf_debug_bpfel_arm64.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpfltr/bpf_debug_bpfel_x86.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpfltr/bpf_tp_bpfel_arm64.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpfltr/bpf_tp_bpfel_x86.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpfltr/bpf_tp_debug_bpfel_arm64.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpfltr/bpf_tp_debug_bpfel_x86.o
Binary file not shown.
95 changes: 50 additions & 45 deletions pkg/internal/ebpf/httpfltr/httpfltr.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,50 +44,65 @@ func New(cfg *beyla.Config, metrics imetrics.Reporter) *Tracer {
}
}

func (p *Tracer) AllowPID(pid uint32, svc svc.ID) {
if p.bpfObjects.ValidPids != nil {
nsid, err := ebpfcommon.FindNamespace(int32(pid))
ebpfcommon.ActiveNamespaces[pid] = nsid
if err == nil {
err = p.bpfObjects.ValidPids.Put(bpfPidKeyT{Pid: pid, Ns: nsid}, uint8(1))
if err != nil {
p.log.Error("Error setting up pid in BPF space", "error", err)
}
// This is requied to ensure everything works when Beyla is running in pid=host mode.
// In host mode, Beyla will find the host pid, while the bpf code matches the user pid.
// Therefore we find all namespaced pids for the current pid we discovered and allow those too.
otherPids, err := ebpfcommon.FindNamespacedPids(int32(pid))
if err != nil {
p.log.Error("Error finding namespaced pids", "error", err)
}
p.log.Debug("Found namespaced pids (will contain the existing pid too)", "pids", otherPids)
for _, op := range otherPids {
err = p.bpfObjects.ValidPids.Put(bpfPidKeyT{Pid: op, Ns: nsid}, uint8(1))
if err != nil {
p.log.Error("Error setting up pid in BPF space", "error", err)
}
}
} else {
p.log.Error("Error looking up namespace", "error", err)
// Updating these requires updating the constants below in pid.h
// #define MAX_CONCURRENT_PIDS 3001 // estimate: 1000 concurrent processes (including children) * 3 namespaces per pid
// #define PRIME_HASH 192053 // closest prime to 3001 * 64
const (
maxConcurrentPids = 3001
primeHash = 192053
)

func pidSegmentBit(k uint64) (uint32, uint32) {
h := uint32(k % primeHash)
segment := h / 64
bit := h & 63

return segment, bit
}

func (p *Tracer) buildPidFilter() []uint64 {
result := make([]uint64, maxConcurrentPids)
for nsid, pids := range p.pidsFilter.CurrentPIDs(ebpfcommon.PIDTypeKProbes) {
for pid := range pids {
// skip any pids that might've been added, but are not tracked by the kprobes
p.log.Debug("Reallowing pid", "pid", pid, "namespace", nsid)

k := uint64((uint64(nsid) << 32) | uint64(pid))

segment, bit := pidSegmentBit(k)

v := result[segment]
v |= (1 << bit)
result[segment] = v
}
}
p.pidsFilter.AllowPID(pid, svc, ebpfcommon.PIDTypeKProbes)

return result
}

func (p *Tracer) BlockPID(pid uint32) {
func (p *Tracer) rebuildValidPids() {
if p.bpfObjects.ValidPids != nil {
ns, ok := ebpfcommon.ActiveNamespaces[pid]
if ok {
err := p.bpfObjects.ValidPids.Delete(bpfPidKeyT{Pid: pid, Ns: ns})
v := p.buildPidFilter()

p.log.Debug("number of segments in pid filter cache", "len", len(v))

for i, segment := range v {
err := p.bpfObjects.ValidPids.Put(uint32(i), uint64(segment))
if err != nil {
p.log.Error("Error removing pid in BPF space", "error", err)
p.log.Error("Error setting up pid in BPF space, sizes of Go and BPF maps don't match", "error", err, "i", i)
}
} else {
p.log.Warn("Couldn't find active namespace", "pid", pid)
}
}
delete(ebpfcommon.ActiveNamespaces, pid)
}

func (p *Tracer) AllowPID(pid uint32, svc svc.ID) {
p.pidsFilter.AllowPID(pid, svc, ebpfcommon.PIDTypeKProbes)
p.rebuildValidPids()
}

func (p *Tracer) BlockPID(pid uint32) {
p.pidsFilter.BlockPID(pid)
p.rebuildValidPids()
}

func (p *Tracer) Load() (*ebpf.CollectionSpec, error) {
Expand Down Expand Up @@ -225,17 +240,7 @@ func (p *Tracer) Run(ctx context.Context, eventsChan chan<- []request.Span) {
// At this point we now have loaded the bpf objects, which means we should insert any
// pids that are allowed into the bpf map
if p.bpfObjects.ValidPids != nil {
p.log.Debug("Reallowing pids")
for nsid, pids := range p.pidsFilter.CurrentPIDs(ebpfcommon.PIDTypeKProbes) {
for pid := range pids {
// skip any pids that might've been added, but are not tracked by the kprobes
p.log.Debug("Reallowing pid", "pid", pid, "namespace", nsid)
err := p.bpfObjects.ValidPids.Put(bpfPidKeyT{Pid: pid, Ns: nsid}, uint8(1))
if err != nil {
p.log.Error("Error setting up pid in BPF space", "pid", pid, "namespace", nsid, "error", err)
}
}
}
p.rebuildValidPids()
} else {
p.log.Error("BPF Pids map is not created yet, this is a bug.")
}
Expand Down
27 changes: 27 additions & 0 deletions pkg/internal/ebpf/httpfltr/httpfltr_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package httpfltr

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestBitPositionCalculation(t *testing.T) {
for _, v := range [][4]uint32{
{0, 1, 0, 1},
{0, 2, 0, 2},
{0, 65, 1, 1},
{0, 66, 1, 2},
{0, primeHash, 0, 0},
{0, primeHash + 1, 0, 1},
} {
k := makeKey(v[0], v[1])
segment, bit := pidSegmentBit(k)
assert.Equal(t, segment, v[2])
assert.Equal(t, bit, v[3])
}
}

func makeKey(first, second uint32) uint64 {
return uint64((uint64(first) << 32) | uint64(second))
}
Binary file modified pkg/internal/ebpf/httpssl/bpf_bpfel_arm64.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpssl/bpf_bpfel_x86.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpssl/bpf_debug_bpfel_arm64.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpssl/bpf_debug_bpfel_x86.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpssl/bpf_tp_bpfel_arm64.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpssl/bpf_tp_bpfel_x86.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpssl/bpf_tp_debug_bpfel_arm64.o
Binary file not shown.
Binary file modified pkg/internal/ebpf/httpssl/bpf_tp_debug_bpfel_x86.o
Binary file not shown.

0 comments on commit 6137faf

Please sign in to comment.