Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Retina dataplane debug CLI #740

Draft
wants to merge 22 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cli/cmd/debug/bpf/bpf_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package bpf

import "github.com/spf13/cobra"

var Cmd = &cobra.Command{
Use: "bpf",
Short: "BPF debug commands",
}

func init() {
Cmd.AddCommand(featuresCmd)
Cmd.AddCommand(qdiscCmd)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add a status command, showcasing the important one-liners:

  • socket setup
  • agent running (maybe)
  • maps
  • programs
  • flow rate (?)
  • plugins

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can already get flow rate by exec hubble status on the agent so it might be overkill?

8 changes: 8 additions & 0 deletions cli/cmd/debug/bpf/bpf_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package bpf

import "github.com/spf13/cobra"

var Cmd = &cobra.Command{
Use: "bpf",
Short: "BPF debug commands (Not supported on Windows)",
}
88 changes: 88 additions & 0 deletions cli/cmd/debug/bpf/consts_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package bpf

import (
"fmt"

"github.com/cilium/ebpf"
"github.com/pkg/errors"
)

var (
eBPFMapList = []ebpf.MapType{
ebpf.Hash,
ebpf.Array,
ebpf.ProgramArray,
ebpf.PerfEventArray,
ebpf.PerCPUHash,
ebpf.PerCPUArray,
ebpf.StackTrace,
ebpf.CGroupArray,
ebpf.LRUHash,
ebpf.LRUCPUHash,
ebpf.LPMTrie,
ebpf.ArrayOfMaps,
ebpf.HashOfMaps,
ebpf.DevMap,
ebpf.SockMap,
ebpf.CPUMap,
ebpf.XSKMap,
ebpf.SockHash,
ebpf.CGroupStorage,
ebpf.ReusePortSockArray,
ebpf.PerCPUCGroupStorage,
ebpf.Queue,
ebpf.Stack,
ebpf.SkStorage,
ebpf.DevMapHash,
ebpf.StructOpsMap,
ebpf.RingBuf,
ebpf.InodeStorage,
ebpf.TaskStorage,
}

eBPFProgramList = []ebpf.ProgramType{
ebpf.SocketFilter,
ebpf.Kprobe,
ebpf.SchedCLS,
ebpf.SchedACT,
ebpf.TracePoint,
ebpf.XDP,
ebpf.PerfEvent,
ebpf.CGroupSKB,
ebpf.CGroupSock,
ebpf.LWTIn,
ebpf.LWTOut,
ebpf.LWTXmit,
ebpf.SockOps,
ebpf.SkSKB,
ebpf.CGroupDevice,
ebpf.SkMsg,
ebpf.RawTracepoint,
ebpf.CGroupSockAddr,
ebpf.LWTSeg6Local,
ebpf.LircMode2,
ebpf.SkReuseport,
ebpf.FlowDissector,
ebpf.CGroupSysctl,
ebpf.RawTracepointWritable,
ebpf.CGroupSockopt,
ebpf.Tracing,
ebpf.StructOps,
ebpf.Extension,
ebpf.LSM,
ebpf.SkLookup,
ebpf.Syscall,
ebpf.Netfilter,
}
)

func isSupported(err error) string {
if errors.Is(err, ebpf.ErrNotSupported) {
return "not supported"
}
return "supported"
}

func getLinuxKernelVersion(versionCode uint32) string {
return fmt.Sprintf("%d.%d.%d", versionCode>>16, (versionCode>>8)&0xff, versionCode&0xff) //nolint:gomnd // bit shifting
}
61 changes: 61 additions & 0 deletions cli/cmd/debug/bpf/features_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package bpf

import (
"fmt"

"github.com/cilium/ebpf"
"github.com/cilium/ebpf/features"
"github.com/pkg/errors"
"github.com/spf13/cobra"
)

// featuresCmd outputs available BPF features on the host
var featuresCmd = &cobra.Command{
Use: "features",
Short: "Output available BPF features on the host",
RunE: func(*cobra.Command, []string) error {
linuxVersion, err := features.LinuxVersionCode()
if err != nil {
return errors.Wrap(err, "failed to get Linux version code")
}
fmt.Printf("Linux kernel version: %s\n", getLinuxKernelVersion(linuxVersion))

fmt.Println("--------------------------------------------------")

err = features.HaveBoundedLoops()
if err != nil && !errors.Is(err, ebpf.ErrNotSupported) {
return errors.Wrap(err, "failed to check for bounded loops")
}
fmt.Printf("Bounded loops: %s\n", isSupported(err))

fmt.Println("--------------------------------------------------")

err = features.HaveLargeInstructions()
if err != nil && !errors.Is(err, ebpf.ErrNotSupported) {
return errors.Wrap(err, "failed to check for large instructions")
}
fmt.Printf("Large instructions: %s\n", isSupported(err))

fmt.Println("--------------------------------------------------")
fmt.Println("eBPF map availability:")
for _, mt := range eBPFMapList {
err = features.HaveMapType(mt)
if err != nil && !errors.Is(err, ebpf.ErrNotSupported) {
return errors.Wrapf(err, "failed to check for map type %s", mt.String())
}
fmt.Printf("%s: %s\n", mt.String(), isSupported(err))
}

fmt.Println("--------------------------------------------------")
fmt.Println("eBPF program types availability:")
for _, pt := range eBPFProgramList {
err = features.HaveProgramType(pt)
if err != nil && !errors.Is(err, ebpf.ErrNotSupported) {
return errors.Wrapf(err, "failed to check for program type %s", pt.String())
}
fmt.Printf("%s: %s\n", pt.String(), isSupported(err))
}
Comment on lines +40 to +57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, it's not important to list all map/prog types. We can just list all maps and programs, and that would be enough information. For specific maps (like conntrack), we may want to print entries, but listing by types is not that helpful.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is more so for users to check which maps and programs can be initialize on their host, but yes i do plan to have a command that will list out maps/programs running


return nil
},
}
118 changes: 118 additions & 0 deletions cli/cmd/debug/bpf/tc_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package bpf

import (
"fmt"
"net"
"os"

tc "github.com/florianl/go-tc"
"github.com/florianl/go-tc/core"
"github.com/mdlayher/netlink"
"github.com/pterm/pterm"
"github.com/spf13/cobra"
"golang.org/x/sys/unix"
)

// val is a struct to hold qdiscs and filters for an interface
type val struct {
qdisc []tc.Object
egressFilterExist bool
ingressFilterExist bool
}

var (
ifaceName string
ifaceToQdiscsAndFiltersMap = make(map[string]any)
qdiscCmd = &cobra.Command{
Use: "tc",
Short: "Output all qdiscs and attached bpf programs on each interface on the host",
Run: func(*cobra.Command, []string) {
logger := pterm.DefaultLogger.WithLevel(pterm.LogLevelTrace)
// open a rtnetlink socket
rtnl, err := tc.Open(&tc.Config{})
if err != nil {
logger.Error("could not open rtnetlink socket", logger.Args(err))
return
}
defer func() {
if err = rtnl.Close(); err != nil {
fmt.Fprintf(os.Stderr, "could not close rtnetlink socket: %v\n", err)
}
}()

// set NETLINK_EXT_ACK option for detailed error messages
err = rtnl.SetOption(netlink.ExtendedAcknowledge, true)
if err != nil {
logger.Warn("could not set NETLINK_EXT_ACK option", logger.Args(err))
}

// get all qdiscs
qdiscs, err := rtnl.Qdisc().Get()
if err != nil {
logger.Error("could not get qdiscs", logger.Args(err))
return
}

// populate ifaceToQdiscsAndFiltersMap
for _, qdisc := range qdiscs {
iface, err := net.InterfaceByIndex(int(qdisc.Ifindex))
if err != nil {
logger.Error("could not get interface by index", logger.Args(err, qdisc.Ifindex))
continue
}
if _, ok := ifaceToQdiscsAndFiltersMap[iface.Name]; !ok {
ifaceToQdiscsAndFiltersMap[iface.Name] = &val{}
}
v := ifaceToQdiscsAndFiltersMap[iface.Name].(*val)
v.qdisc = append(v.qdisc, qdisc)

ingressFilters, err := rtnl.Filter().Get(&tc.Msg{
Family: unix.AF_UNSPEC,
Ifindex: uint32(iface.Index),
Handle: 0,
Parent: core.BuildHandle(tc.HandleRoot, tc.HandleMinIngress),
Info: 0x10300, // nolint:gomnd // info
})
if err != nil {
logger.Error("could not get ingress filters for interface", logger.Args(err))
continue
}
v.ingressFilterExist = len(ingressFilters) > 0

egressFilters, err := rtnl.Filter().Get(&tc.Msg{
Family: unix.AF_UNSPEC,
Ifindex: uint32(iface.Index),
Handle: 1,
Parent: core.BuildHandle(tc.HandleRoot, tc.HandleMinEgress),
Info: 0x10300, // nolint:gomnd // info
})
if err != nil {
logger.Error("could not get egress filters for interface", logger.Args(err))
continue
}
v.egressFilterExist = len(egressFilters) > 0
}

if ifaceName != "" {
if value, ok := ifaceToQdiscsAndFiltersMap[ifaceName]; ok {
v := value.(*val)
outputMap := make(map[string]any)
outputMap["name"] = ifaceName
outputMap["qdiscs"] = v.qdisc
outputMap["ingressFilterExist"] = v.ingressFilterExist
outputMap["egressFilterExist"] = v.egressFilterExist
logger.Info("Interface", logger.ArgsFromMap(outputMap))
} else {
logger.Error("Interface not found", logger.Args(ifaceName))
return
}
} else {
logger.Info("Interfaces", logger.ArgsFromMap(ifaceToQdiscsAndFiltersMap))
}
},
}
)

func init() {
qdiscCmd.Flags().StringVarP(&ifaceName, "interface", "i", "", "Filter output to a specific interface")
}
13 changes: 13 additions & 0 deletions cli/cmd/debug/conntrack/conntrack_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package conntrack

import "github.com/spf13/cobra"

var Cmd = &cobra.Command{
Use: "conntrack",
Short: "Conntrack debug commands",
}

func init() {
Cmd.AddCommand(dump)
Cmd.AddCommand(stats)
}
8 changes: 8 additions & 0 deletions cli/cmd/debug/conntrack/conntrack_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package conntrack

import "github.com/spf13/cobra"

var Cmd = &cobra.Command{
Use: "conntrack",
Short: "Conntrack debug commands (Not supported on Windows)",
}
14 changes: 14 additions & 0 deletions cli/cmd/debug/conntrack/dump_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package conntrack

import (
"github.com/microsoft/retina/pkg/plugin/conntrack"

Check failure on line 4 in cli/cmd/debug/conntrack/dump_linux.go

View workflow job for this annotation

GitHub Actions / Lint (linux, arm64)

could not import github.com/microsoft/retina/pkg/plugin/conntrack (-: # github.com/microsoft/retina/pkg/plugin/conntrack
"github.com/spf13/cobra"
)

var dump = &cobra.Command{
Use: "dump",
Short: "Dump all conntrack entries",
RunE: func(*cobra.Command, []string) error {
return conntrack.Dump()
},
}
14 changes: 14 additions & 0 deletions cli/cmd/debug/conntrack/stats_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package conntrack

import (
"github.com/microsoft/retina/pkg/plugin/conntrack"
"github.com/spf13/cobra"
)

var stats = &cobra.Command{
Use: "stats",
Short: "Print conntrack stats",
RunE: func(*cobra.Command, []string) error {
return conntrack.Stats()
},
}
17 changes: 17 additions & 0 deletions cli/cmd/debug/debug.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package debug

import (
"github.com/microsoft/retina/cli/cmd/debug/bpf"
"github.com/microsoft/retina/cli/cmd/debug/conntrack"
"github.com/spf13/cobra"
)

var Cmd = &cobra.Command{
Use: "debug",
Short: "Dataplane debug commands",
}

func init() {
Cmd.AddCommand(conntrack.Cmd)
Cmd.AddCommand(bpf.Cmd)
}
4 changes: 4 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"os"

"github.com/microsoft/retina/cli/cmd/debug"
"github.com/microsoft/retina/cmd/legacy"
"github.com/spf13/cobra"
)
Expand Down Expand Up @@ -45,6 +46,9 @@ func init() {

// this is read during GetConfigOrDie, not explicitly passed to any of our logic
rootCmd.Flags().StringVar(&kubeConfigFileName, "kubeconfig", kubeConfigFileName, "noop we just need cobra to not check since controller runtime can use this flag")

// Add debug commands
rootCmd.AddCommand(debug.Cmd)
}

func Execute() {
Expand Down
Loading
Loading