Skip to content

Commit

Permalink
Make the Profiling module compatible with more linux systems (#66)
Browse files Browse the repository at this point in the history
  • Loading branch information
mrproliu authored Dec 7, 2022
1 parent e0b9db7 commit 2462d62
Show file tree
Hide file tree
Showing 11 changed files with 85 additions and 21 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/rover.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
e2e-test:
name: E2E test
needs: [ docker ]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
timeout-minutes: 60
strategy:
fail-fast: false
Expand Down Expand Up @@ -142,7 +142,7 @@ jobs:
https-e2e-test:
name: HTTPS E2E test
needs: [ docker ]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
timeout-minutes: 60
strategy:
fail-fast: false
Expand Down
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Release Notes.
* Enhancing the render context for the Kubernetes process.
* Simplify the logic of network protocol analysis.
* Upgrade Go library to `1.18`, eBPF library to `0.9.3`.
* Make the Profiling module compatible with more Linux systems.

#### Bug Fixes

Expand Down
11 changes: 11 additions & 0 deletions bpf/profiling/network/netmonitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,17 @@ int tcp_drop(struct pt_regs *ctx) {
return 0;
}

SEC("kprobe/kfree_skb_reason")
int kfree_skb_reason(struct pt_regs *ctx) {
struct sock *s = (void *)PT_REGS_PARM1(ctx);
enum skb_drop_reason reason = PT_REGS_PARM2(ctx);

if (reason > SKB_DROP_REASON_NOT_SPECIFIED) {
send_socket_exception_operation_event(ctx, SOCKET_EXCEPTION_OPERATION_TYPE_DROP, s);
}
return 0;
}

#include "openssl.c"
#include "go_tls.c"
#include "node_tls.c"
16 changes: 15 additions & 1 deletion bpf/profiling/network/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,18 @@ struct user_msghdr {
struct mmsghdr {
struct user_msghdr msg_hdr;
unsigned int msg_len;
} __attribute__((preserve_access_index));
} __attribute__((preserve_access_index));

struct sk_buff {
struct sock *sk;
} __attribute__((preserve_access_index));

enum skb_drop_reason {
SKB_DROP_REASON_NOT_SPECIFIED,
SKB_DROP_REASON_NO_SOCKET,
SKB_DROP_REASON_PKT_TOO_SMALL,
SKB_DROP_REASON_TCP_CSUM,
SKB_DROP_REASON_TCP_FILTER,
SKB_DROP_REASON_UDP_CSUM,
SKB_DROP_REASON_MAX,
};
3 changes: 3 additions & 0 deletions pkg/profiling/task/network/analyze/layer4/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ type SocketExceptionOperationEvent struct {
RemoteAddrV4 uint32
RemoteAddrV6 [16]uint8
RemoteAddrPort uint32
LocalAddrV4 uint32
LocalAddrV6 [16]uint8
LocalAddrPort uint32
Type base.SocketExceptionOperationType
}

Expand Down
14 changes: 9 additions & 5 deletions pkg/profiling/task/network/bpf/linker.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,19 +98,23 @@ type UProbeExeFile struct {
realFile *link.Executable
}

func (m *Linker) AddLink(linkF LinkFunc, p *ebpf.Program, trySymbolNames ...string) {
func (m *Linker) AddLink(linkF LinkFunc, symbolWithPrograms map[string]*ebpf.Program) {
var lk link.Link
var err error
var realSym string
for _, n := range trySymbolNames {
lk, err = linkF(n, p, nil)
for symbol, p := range symbolWithPrograms {
lk, err = linkF(symbol, p, nil)
if err == nil {
realSym = n
realSym = symbol
break
}
}
if err != nil {
m.errors = multierror.Append(m.errors, fmt.Errorf("open %s error: %v", trySymbolNames, err))
symbolNames := make([]string, 0)
for s := range symbolWithPrograms {
symbolNames = append(symbolNames, s)
}
m.errors = multierror.Append(m.errors, fmt.Errorf("open %s error: %v", symbolNames, err))
} else {
log.Debugf("attach to the kprobe: %s", realSym)
m.closers = append(m.closers, lk)
Expand Down
14 changes: 7 additions & 7 deletions pkg/profiling/task/network/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ func (r *Runner) Start(ctx context.Context, task *base.ProfilingTask, processes
bpfLoader.AddSysCall("connect", bpfLoader.SysConnect, bpfLoader.SysConnectRet)
bpfLoader.AddSysCall("accept", bpfLoader.SysAccept, bpfLoader.SysAcceptRet)
bpfLoader.AddSysCall("accept4", bpfLoader.SysAccept, bpfLoader.SysAcceptRet)
bpfLoader.AddLink(link.Kretprobe, bpfLoader.SockAllocRet, "sock_alloc")
bpfLoader.AddLink(link.Kprobe, bpfLoader.TcpConnect, "tcp_connect")
bpfLoader.AddLink(link.Kretprobe, map[string]*ebpf.Program{"sock_alloc": bpfLoader.SockAllocRet})
bpfLoader.AddLink(link.Kprobe, map[string]*ebpf.Program{"tcp_connect": bpfLoader.TcpConnect})

// write/receive data
bpfLoader.AddSysCall("send", bpfLoader.SysSend, bpfLoader.SysSendRet)
Expand All @@ -161,13 +161,13 @@ func (r *Runner) Start(ctx context.Context, task *base.ProfilingTask, processes
bpfLoader.AddSysCall("recvfrom", bpfLoader.SysRecvfrom, bpfLoader.SysRecvfromRet)
bpfLoader.AddSysCall("recvmsg", bpfLoader.SysRecvmsg, bpfLoader.SysRecvmsgRet)
bpfLoader.AddSysCall("recvmmsg", bpfLoader.SysRecvmmsg, bpfLoader.SysRecvmmsgRet)
bpfLoader.AddLink(link.Kprobe, bpfLoader.TcpRcvEstablished, "tcp_rcv_established")
bpfLoader.AddLink(link.Kprobe, bpfLoader.SecuritySocketSendmsg, "security_socket_sendmsg")
bpfLoader.AddLink(link.Kprobe, bpfLoader.SecuritySocketRecvmsg, "security_socket_recvmsg")
bpfLoader.AddLink(link.Kprobe, map[string]*ebpf.Program{"tcp_rcv_established": bpfLoader.TcpRcvEstablished})
bpfLoader.AddLink(link.Kprobe, map[string]*ebpf.Program{"security_socket_sendmsg": bpfLoader.SecuritySocketSendmsg})
bpfLoader.AddLink(link.Kprobe, map[string]*ebpf.Program{"security_socket_recvmsg": bpfLoader.SecuritySocketRecvmsg})

// retransmit/drop
bpfLoader.AddLink(link.Kprobe, bpfLoader.TcpRetransmit, "tcp_retransmit_skb")
bpfLoader.AddLink(link.Kprobe, bpfLoader.TcpDrop, "tcp_drop")
bpfLoader.AddLink(link.Kprobe, map[string]*ebpf.Program{"tcp_retransmit_skb": bpfLoader.TcpRetransmit})
bpfLoader.AddLink(link.Kprobe, map[string]*ebpf.Program{"tcp_drop": bpfLoader.TcpDrop, "kfree_skb_reason": bpfLoader.KfreeSkbReason})

if err := bpfLoader.HasError(); err != nil {
_ = bpfLoader.Close()
Expand Down
15 changes: 14 additions & 1 deletion pkg/profiling/task/offcpu/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import (
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -no-global-types -target bpfel -cc $BPF_CLANG -cflags $BPF_CFLAGS bpf $REPO_ROOT/bpf/profiling/offcpu.c -- -I$REPO_ROOT/bpf/include -D__TARGET_ARCH_x86

var log = logger.GetLogger("profiling", "task", "offcpu")
var defaultKernelSymbol = "finish_task_switch"

type ProcessStack struct {
UserStackID uint32
Expand Down Expand Up @@ -116,7 +117,7 @@ func (r *Runner) Run(ctx context.Context, notify base.ProfilingRunningSuccessNot
}
r.bpf = &objs

kprobe, err := link.Kprobe("finish_task_switch", objs.DoFinishTaskSwitch, nil)
kprobe, err := link.Kprobe(r.findMatchesSymbol(), objs.DoFinishTaskSwitch, nil)
if err != nil {
return fmt.Errorf("link to finish task swtich failure: %v", err)
}
Expand All @@ -127,6 +128,18 @@ func (r *Runner) Run(ctx context.Context, notify base.ProfilingRunningSuccessNot
return nil
}

func (r *Runner) findMatchesSymbol() string {
if r.kernelProfiling == nil {
return defaultKernelSymbol
}
res, err := r.kernelProfiling.FindSymbolByRegex(`finish_task_switch(\.\w+\.\d+)?`)
if err != nil {
log.Warnf("found symbol error: %v", err)
return defaultKernelSymbol
}
return res
}

func (r *Runner) Stop() error {
var err error
r.base.ShutdownOnce.Do(func() {
Expand Down
18 changes: 18 additions & 0 deletions pkg/tools/profiling/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
package profiling

import (
"fmt"
"regexp"

"github.com/apache/skywalking-rover/pkg/logger"

"github.com/ianlancetaylor/demangle"
Expand Down Expand Up @@ -138,6 +141,21 @@ func (i *Info) FindSymbolAddress(name string) uint64 {
return 0
}

func (i *Info) FindSymbolByRegex(rep string) (string, error) {
compile, err := regexp.Compile(rep)
if err != nil {
return "", err
}
for _, m := range i.Modules {
for _, sym := range m.Symbols {
if compile.MatchString(sym.Name) {
return sym.Name, nil
}
}
}
return "", fmt.Errorf("cannot found any matches symbol: %s", rep)
}

func (m *Module) contains(addr uint64) (uint64, bool) {
for _, r := range m.Ranges {
if addr >= r.StartAddr && addr < r.EndAddr {
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/base/env
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.

SW_CTL_COMMIT=0883266bfaa36612927b69e35781b64ea181758d
SW_OAP_COMMIT=bcd9f7a56b99ca612711ed3a540bdae5f46e9171
SW_OAP_COMMIT=108260681ea9657217488685779a20cb84c2f288
SW_KUBERNETES_COMMIT_SHA=0f3ec68e5a7e1608cec8688716b848ed15e971e5

SW_AGENT_GO_COMMIT=216f122d942cb683f48578d3014cc5ea83637582
SW_AGENT_GO_COMMIT=216f122d942cb683f48578d3014cc5ea83637582
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
cases:
# slow traces
- query: |
curl -s https://${service_host}:${service_10443}/consumer-zipkin > /dev/null;
curl -s -k https://${service_host}:${service_10443}/consumer-zipkin > /dev/null;
sleep 5;
swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql records list \
--name=sampled_slow_trace_record --service-name service --instance-name test --process-name service \
Expand All @@ -41,14 +41,14 @@ cases:
expected: expected/skywalking-trace.yml
# not founds
- query: |
curl -s https://${service_host}:${service_10443}/consumer?type=notfound > /dev/null;
curl -s -k https://${service_host}:${service_10443}/consumer?type=notfound > /dev/null;
sleep 5;
swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql records list \
--name=sampled_status_4xx_trace_record --service-name service --instance-name test --process-name service \
--dest-service-name service --dest-instance-name test --dest-process-name UNKNOWN_REMOTE 20
expected: expected/status-4xx-traces.yml
- query: |
curl -s https://${service_host}:${service_10443}/consumer?type=error > /dev/null;
curl -s -k https://${service_host}:${service_10443}/consumer?type=error > /dev/null;
sleep 5;
swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql records list \
--name=sampled_status_5xx_trace_record --service-name service --instance-name test --process-name service \
Expand Down

0 comments on commit 2462d62

Please sign in to comment.