From 1bb40c9c693dffac10d6682346f3bb2caf683b37 Mon Sep 17 00:00:00 2001 From: mrproliu <741550557@qq.com> Date: Thu, 8 Dec 2022 21:55:01 +0800 Subject: [PATCH] Support compatibility checks and add documentation (#67) --- .github/workflows/compatibility.yaml | 206 ++++++++++++++++ CHANGES.md | 3 + bpf/profiling/network/args.h | 12 +- bpf/profiling/network/go_tls.h | 8 +- bpf/profiling/network/netmonitor.c | 12 +- bpf/profiling/network/sock_stats.h | 2 +- .../concepts-and-designs/project_structue.md | 34 ++- docs/en/setup/overview.md | 32 +++ internal/cmd/check.go | 228 ++++++++++++++++++ internal/cmd/root.go | 1 + internal/cmd/start.go | 2 +- pkg/boot/module.go | 5 +- pkg/boot/module_test.go | 2 +- pkg/boot/starter.go | 4 +- pkg/process/finders/manager.go | 4 + pkg/process/finders/storage.go | 10 + pkg/process/module.go | 4 + pkg/profiling/task/manager.go | 4 +- pkg/profiling/task/network/bpf/linker.go | 24 +- pkg/profiling/task/network/runner.go | 6 +- pkg/profiling/task/network/ssl.go | 3 +- pkg/profiling/task/oncpu/runner.go | 2 +- scripts/check/profiling/check.sh | 45 ++++ scripts/check/profiling/config.yaml | 42 ++++ scripts/check/profiling/demo.go | 26 ++ scripts/check/profiling/run.sh | 39 +++ 26 files changed, 712 insertions(+), 48 deletions(-) create mode 100644 .github/workflows/compatibility.yaml create mode 100644 internal/cmd/check.go create mode 100644 scripts/check/profiling/check.sh create mode 100644 scripts/check/profiling/config.yaml create mode 100644 scripts/check/profiling/demo.go create mode 100644 scripts/check/profiling/run.sh diff --git a/.github/workflows/compatibility.yaml b/.github/workflows/compatibility.yaml new file mode 100644 index 00000000..477c9705 --- /dev/null +++ b/.github/workflows/compatibility.yaml @@ -0,0 +1,206 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Compatibility Checks + +on: workflow_dispatch + +jobs: + upload-checkers: + name: Upload checkers + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - name: Set up Go 1.18 + uses: actions/setup-go@v2 + with: + go-version: 1.18 + - id: 'auth' + uses: 'google-github-actions/auth@v1' + with: + credentials_json: '${{ secrets.GCP_SERVICE_ACCOUNT_CREDENTIALS }}' + - name: 'Set up Cloud SDK' + uses: 'google-github-actions/setup-gcloud@v1' + - name: Build and Upload + run: | + # build skywalking rover binary + make container-btfgen build + cp bin/skywalking-rover-latest-linux-amd64 scripts/check/profiling + + # build demo program + cd scripts/check/profiling + go build demo.go + mv demo demo-program + + # upload to the gcloud + gcloud storage cp ./* gs://skywalking-rover-compatibility/runner + + checks: + name: Compatibility Checks + needs: [upload-checkers] + runs-on: ubuntu-latest + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + # following `gcloud compute images list` + system: + # ubuntu + - name: Ubuntu + project: ubuntu-os-cloud + family: ubuntu-1804-lts + image: ubuntu-1804-bionic-v20221201 + - name: Ubuntu + project: ubuntu-os-cloud + family: ubuntu-2004-lts + image: ubuntu-2004-focal-v20221202 + - name: Ubuntu + project: ubuntu-os-cloud + family: ubuntu-2204-lts + image: ubuntu-2204-jammy-v20221201 + - name: Ubuntu + project: ubuntu-os-cloud + family: ubuntu-2210-amd64 + image: ubuntu-2210-kinetic-amd64-v20221201 + + # ubuntu pro + - name: Ubuntu_Pro + project: ubuntu-os-pro-cloud + family: ubuntu-pro-1604-lts + image: ubuntu-pro-1604-xenial-v20221201 + - name: Ubuntu_Pro + project: ubuntu-os-pro-cloud + family: ubuntu-pro-1804-lts + image: ubuntu-pro-1804-bionic-v20221201 + - name: Ubuntu_Pro + project: ubuntu-os-pro-cloud + family: ubuntu-pro-2004-lts + image: ubuntu-pro-2004-focal-v20221202 + - name: Ubuntu_Pro + project: ubuntu-os-pro-cloud + family: ubuntu-pro-2204-lts + image: ubuntu-pro-2204-jammy-v20221201 + + # centos + - name: Centos + project: centos-cloud + family: centos-7 + image: centos-7-v20221102 + - name: Centos + project: centos-cloud + family: centos-stream-8 + image: centos-stream-8-v20221102 + - name: Centos + project: centos-cloud + family: centos-stream-9 + image: centos-stream-9-v20221102 + + # debian + - name: Debian + project: debian-cloud + family: debian-10 + image: debian-10-buster-v20221102 + - name: Debian + project: debian-cloud + family: debian-11 + image: debian-11-bullseye-v20221102 + + # RHEL + - name: RHEL + project: rhel-cloud + family: rhel-7 + image: rhel-7-v20221102 + - name: RHEL + project: rhel-cloud + family: rhel-8 + image: rhel-8-v20221102 + - name: RHEL + project: rhel-cloud + family: rhel-9 + image: rhel-9-v20221102 + + # rocky + - name: Rocky + project: rocky-linux-cloud + family: rocky-linux-8 + image: rocky-linux-8-v20221102 + - name: Rocky + project: rocky-linux-cloud + family: rocky-linux-9 + image: rocky-linux-9-v20221102 + + # Fedora + - name: Fedora + project: fedora-cloud + family: fedora-cloud-34 + image: fedora-cloud-base-gcp-34-1-2-x86-64 + - name: Fedora + project: fedora-cloud + family: fedora-cloud-35 + image: fedora-cloud-base-gcp-35-1-2-x86-64 + - name: Fedora + project: fedora-cloud + family: fedora-cloud-36 + image: fedora-cloud-base-gcp-36-20220506-n-0-x86-64 + - name: Fedora + project: fedora-cloud + family: fedora-cloud-37 + image: fedora-cloud-base-gcp-37-beta-1-5-x86-64 + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - id: 'auth' + uses: 'google-github-actions/auth@v1' + with: + credentials_json: '${{ secrets.GCP_SERVICE_ACCOUNT_CREDENTIALS }}' + - name: 'Set up Cloud SDK' + uses: 'google-github-actions/setup-gcloud@v1' + - name: Check + env: + INSTANCE_PROJECT: "${{ matrix.system.project }}" + INSTANCE_IMAGE: "${{ matrix.system.image }}" + INSTANCE_FAMILY: "${{ matrix.system.family }}" + INSTANCE_SYSTEM: "${{ matrix.system.name }}" + run: | + # download the runner + mkdir -p /tmp/runner + gcloud storage cp gs://skywalking-rover-compatibility/runner/* /tmp/runner + + # create instance + gcloud compute instances create "skywalking-rover-test-$INSTANCE_IMAGE" --image "$INSTANCE_IMAGE" \ + --image-project "$INSTANCE_PROJECT" --machine-type e2-medium --zone us-west1-b + + # generate the public ssh key + ssh-keygen -b 2048 -t rsa -f /tmp/sshkey -f ~/.ssh/google_compute_engine -q -N "" + + # scp runner + gcloud compute scp --recurse /tmp/runner hanliu@"skywalking-rover-test-$INSTANCE_IMAGE":/tmp/ --zone us-west1-b + + # run checker + gcloud compute ssh --zone "us-west1-b" "hanliu@skywalking-rover-test-$INSTANCE_IMAGE" \ + --project "skywalking-live-demo" \ + --command "cd /tmp/runner; chmod +x /tmp/runner/demo-program /tmp/runner/skywalking-rover-latest-linux-amd64; sudo bash check.sh /tmp/runner/config.yaml /tmp/runner/demo-program /tmp/runner/skywalking-rover-latest-linux-amd64 /tmp/runner/result/$INSTANCE_SYSTEM/$INSTANCE_FAMILY json" + + # scp from instance and upload to the storage + gcloud compute scp --recurse hanliu@"skywalking-rover-test-$INSTANCE_IMAGE":/tmp/runner/result /tmp/runner --zone us-west1-b + gcloud storage cp -r /tmp/runner/result gs://skywalking-rover-compatibility/ + + # delete test instance + gcloud compute instances delete "skywalking-rover-test-$INSTANCE_IMAGE" --zone us-west1-b -q \ No newline at end of file diff --git a/CHANGES.md b/CHANGES.md index 5dfde35f..b80c9f50 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,6 +12,9 @@ Release Notes. #### Bug Fixes +#### Documentation +* Adding support version of Linux documentation. + #### Issues and PR - All issues are [here](https://github.com/apache/skywalking/milestone/154?closed=1) - All and pull requests are [here](https://github.com/apache/skywalking-rover/milestone/4?closed=1) diff --git a/bpf/profiling/network/args.h b/bpf/profiling/network/args.h index d6e6ce83..b3c7c510 100644 --- a/bpf/profiling/network/args.h +++ b/bpf/profiling/network/args.h @@ -20,9 +20,9 @@ #pragma once // for protocol analyze need to read -#define MAX_PROTOCOL_SOCKET_READ_LENGTH 21 +#define MAX_PROTOCOL_SOCKET_READ_LENGTH 31 // for transmit to the user space -#define MAX_TRANSMIT_SOCKET_READ_LENGTH 2048 +#define MAX_TRANSMIT_SOCKET_READ_LENGTH 2047 // unknown the connection type, not trigger the syscall connect,accept #define AF_UNKNOWN 0xff @@ -205,7 +205,7 @@ struct { struct socket_buffer_reader_t { __u32 data_len; - char buffer[MAX_PROTOCOL_SOCKET_READ_LENGTH]; + char buffer[MAX_PROTOCOL_SOCKET_READ_LENGTH + 1]; }; struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); @@ -240,9 +240,9 @@ static __inline struct socket_buffer_reader_t* read_socket_data(struct sock_data if (size > MAX_PROTOCOL_SOCKET_READ_LENGTH) { size = MAX_PROTOCOL_SOCKET_READ_LENGTH; } - asm volatile("%[size] &= 0xfff;\n" ::[size] "+r"(size) :); - bpf_probe_read(&reader->buffer, size, buf); - reader->data_len = size; + asm volatile("%[size] &= 0x1f;\n" ::[size] "+r"(size) :); + bpf_probe_read(&reader->buffer, size & MAX_PROTOCOL_SOCKET_READ_LENGTH, buf); + reader->data_len = size & MAX_PROTOCOL_SOCKET_READ_LENGTH; return reader; } diff --git a/bpf/profiling/network/go_tls.h b/bpf/profiling/network/go_tls.h index 2ad9729f..1428a4e2 100644 --- a/bpf/profiling/network/go_tls.h +++ b/bpf/profiling/network/go_tls.h @@ -49,7 +49,7 @@ struct { __type(key, __u32); __type(value, struct go_tls_args_symaddr_t); } go_tls_args_symaddr_map SEC(".maps"); -static __inline struct go_tls_args_symaddr_t* get_go_tls_args_symaddr(__u32 tgid) { +static __always_inline struct go_tls_args_symaddr_t* get_go_tls_args_symaddr(__u32 tgid) { struct go_tls_args_symaddr_t *addr = bpf_map_lookup_elem(&go_tls_args_symaddr_map, &tgid); return addr; } @@ -82,7 +82,7 @@ struct { } go_regabi_regs_map SEC(".maps"); // Copies the registers of the golang ABI, so that they can be // easily accessed using an offset. -static __inline uint64_t* go_regabi_regs(const struct pt_regs* ctx) { +static __always_inline uint64_t* go_regabi_regs(const struct pt_regs* ctx) { __u32 zero = 0; struct go_regabi_regs_t* regs_heap_var = bpf_map_lookup_elem(&go_regabi_regs_map, &zero); if (regs_heap_var == NULL) { @@ -107,7 +107,7 @@ struct go_interface { void* ptr; }; -static __inline void assign_go_tls_arg(void* arg, size_t arg_size, struct go_tls_arg_location_t loc, const void* sp, +static __always_inline void assign_go_tls_arg(void* arg, size_t arg_size, struct go_tls_arg_location_t loc, const void* sp, uint64_t* regs) { // stack type if (loc.type == 1) { @@ -120,7 +120,7 @@ static __inline void assign_go_tls_arg(void* arg, size_t arg_size, struct go_tls } } -static __inline int get_fd_from_go_tls_conn(struct go_interface conn, struct go_tls_args_symaddr_t* symaddr) { +static __always_inline int get_fd_from_go_tls_conn(struct go_interface conn, struct go_tls_args_symaddr_t* symaddr) { // read connection bpf_probe_read(&conn, sizeof(conn), conn.ptr + symaddr->tls_conn_offset); diff --git a/bpf/profiling/network/netmonitor.c b/bpf/profiling/network/netmonitor.c index 0a36d182..05e8803b 100644 --- a/bpf/profiling/network/netmonitor.c +++ b/bpf/profiling/network/netmonitor.c @@ -227,7 +227,7 @@ static __inline struct active_connection_t* get_or_create_active_conn(struct pt_ return bpf_map_lookup_elem(&active_connection_map, &conid); } -static __inline void set_conn_as_ssl(struct pt_regs* ctx, __u32 tgid, __u32 fd, __u32 func_name) { +static __always_inline void set_conn_as_ssl(struct pt_regs* ctx, __u32 tgid, __u32 fd, __u32 func_name) { struct active_connection_t* conn = get_or_create_active_conn(ctx, tgid, fd, func_name); if (conn == NULL) { return; @@ -276,17 +276,13 @@ static __always_inline void __upload_socket_data_with_buffer(void *ctx, __u8 ind event->sequence = index; event->data_len = size; event->finished = is_finished; - if (size == 0) { + if (size <= 0) { return; } - if (size > MAX_TRANSMIT_SOCKET_READ_LENGTH) { - size = MAX_TRANSMIT_SOCKET_READ_LENGTH; - } - asm volatile("%[size] &= 0x7fffffff;\n" ::[size] "+r"(size) :); - bpf_probe_read(&event->buffer, size, buf); + asm volatile("%[size] &= 0x7ff;\n" ::[size] "+r"(size) :); + bpf_probe_read(&event->buffer, size & 0x7ff, buf); bpf_perf_event_output(ctx, &socket_data_upload_event_queue, BPF_F_CURRENT_CPU, event, sizeof(*event)); - } static __always_inline void upload_socket_data_buf(void *ctx, char* buf, ssize_t size, struct socket_data_upload_event *event, __u8 force_unfinished) { diff --git a/bpf/profiling/network/sock_stats.h b/bpf/profiling/network/sock_stats.h index 85690363..84a46eb6 100644 --- a/bpf/profiling/network/sock_stats.h +++ b/bpf/profiling/network/sock_stats.h @@ -269,7 +269,7 @@ struct socket_data_upload_event { __u64 randomid; __u64 data_id; __u64 total_size; - char buffer[MAX_TRANSMIT_SOCKET_READ_LENGTH]; + char buffer[MAX_TRANSMIT_SOCKET_READ_LENGTH + 1]; }; struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); diff --git a/docs/en/concepts-and-designs/project_structue.md b/docs/en/concepts-and-designs/project_structue.md index 1d2b2b31..44890cf4 100644 --- a/docs/en/concepts-and-designs/project_structue.md +++ b/docs/en/concepts-and-designs/project_structue.md @@ -2,28 +2,48 @@ - cmd: The starter of Rover. - configs: Rover configs. - bpf: All the BPF programs with C code. +- docker: Docker files for build Rover image. +- docs: The documentation of Rover. - pkg: Contains all modules and basic framework. - - boot: Manage all enabled module life cycle. - - config: Read config for start. - logger: Manage the log. + - config: Read config for start. + - module: The interface of each module. + - boot: Manage all enabled module life cycle. + - core: Manage the connection with OAP. + - process: Manage the process detect and upload them to the OAP. + - profiling: Manage the profiling tasks and upload data to the OAP. - tools: Sharing tools for each module. -- script/build: For `make` command use. +- internal/cmd: Command lines for execute the Rover. +- script: The rover related shell scripts. + - build: For `make` command use. + - check: Check the rover features is supported for the system. + - debug: Generate the debug information for the programs. + - release: Fot release the rover. - test/e2e: E2E test to verify the Rover future. ``` . ├── CHANGES.md ├── cmd ├── configs +├── bpf +├── docker ├── docs -├── go.sum ├── script │   ├── build +│   ├── check +│   ├── debug +│   ├── release ├── pkg -│   ├── boot -│   ├── config │   ├── logger +│   ├── config +│   ├── module +│   ├── boot +│   ├── core +│   ├── process +│   ├── profiling │   ├── tools -│   ├── modules ├── test │   ├── e2e +├── internal +│   ├── cmd ``` \ No newline at end of file diff --git a/docs/en/setup/overview.md b/docs/en/setup/overview.md index 3ba3c841..d64ad54d 100644 --- a/docs/en/setup/overview.md +++ b/docs/en/setup/overview.md @@ -48,3 +48,35 @@ So, in rover_configs.yaml, there contains these parts. 3. [Profiling Module](./configuration/profiling.md). Also, You could using [Overriding Setting](./configuration/override-settings.md) feature to setup the configuration. + +## Prerequisites + +Currently, Linux operating systems are supported from version `4.9` and above, except for network profiling which requires version `4.16` or higher. + +The following table are currently support operating systems. + +| System | Kernel Version | On CPU Profiling | Off CPU Profiling | Network Profiling | +|------------------|----------------|------------------|-------------------|--------------------------------| +| CentOS 7 | 3.10.0 | No | No | No | +| CentOS Stream 8 | 4.18.0 | Yes | Yes | Yes | +| CentOS Stream 9 | 5.47.0 | Yes | Yes | Yes | +| Debian 10 | 4.19.0 | Yes | Yes | Yes | +| Debian 11 | 5.10.0 | Yes | Yes | Yes(TCP Drop Monitor Excluded) | +| Fedora 35 | 5.14.10 | Yes | Yes | Yes(TCP Drop Monitor Excluded) | +| RHEL 7 | 3.10.0 | No | No | No | +| RHEL 8 | 4.18.0 | Yes | Yes | Yes | +| RHEL 9 | 5.14.0 | Yes | Yes | Yes | +| Rocky Linux 8 | 4.18.0 | Yes | Yes | Yes | +| Rocky Linux 9 | 5.14.0 | Yes | Yes | Yes | +| Ubuntu 1804 | 5.4.0 | Yes | Yes | Yes | +| Ubuntu 20.04 | 5.15.0 | Yes | Yes | Yes | +| Ubuntu 20.04 | 5.15.0 | Yes | Yes | Yes | +| Ubuntu 22.04 | 5.15.0 | Yes | Yes | Yes | +| Ubuntu 22.04 | 5.15.0 | Yes | Yes | Yes | +| Ubuntu 22.10 | 5.19.0 | Yes | Yes | Yes | +| Ubuntu Pro 16.04 | 4.15.0 | Yes | Yes | No | +| Ubuntu Pro 18.04 | 5.4.0 | Yes | Yes | Yes | +| Ubuntu Pro 20.04 | 5.15.0 | Yes | Yes | Yes | +| Ubuntu Pro 22.04 | 5.15.0 | Yes | Yes | Yes | +| Ubuntu Pro 22.04 | 5.15.0 | Yes | Yes | Yes | + diff --git a/internal/cmd/check.go b/internal/cmd/check.go new file mode 100644 index 00000000..a81c1c0c --- /dev/null +++ b/internal/cmd/check.go @@ -0,0 +1,228 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "os" + "path/filepath" + "time" + + "github.com/apache/skywalking-rover/pkg/tools/operator" + + "github.com/cilium/ebpf/rlimit" + + "github.com/apache/skywalking-rover/pkg/module" + "github.com/apache/skywalking-rover/pkg/process" + "github.com/apache/skywalking-rover/pkg/process/api" + "github.com/apache/skywalking-rover/pkg/profiling/task" + "github.com/apache/skywalking-rover/pkg/profiling/task/base" + + "github.com/spf13/cobra" + + "github.com/apache/skywalking-rover/pkg/boot" +) + +func newCheckCmd() *cobra.Command { + configPath := "" + outputPath := "" + outputFormat := "" + cmd := &cobra.Command{ + Use: "check", + RunE: func(cmd *cobra.Command, args []string) error { + return check(configPath, outputPath, outputFormat) + }, + } + + cmd.Flags().StringVarP(&configPath, "config", "c", "configs/rover_configs.yaml", "the rover config file path") + cmd.Flags().StringVarP(&outputPath, "output", "o", "output.txt", "the rover check output file") + cmd.Flags().StringVarP(&outputFormat, "format", "f", "plain", "the check output format, support \"json\", \"plain\"") + return cmd +} + +func check(configPath, outputPath, format string) error { + if configPath == "" || outputPath == "" { + return fmt.Errorf("the config and output path is required") + } + + err := os.MkdirAll(filepath.Dir(outputPath), os.ModePerm) + if err != nil { + log.Fatalf("failed to create the output file directory: %v", err) + } + + outFile, err := os.Create(outputPath) + if err != nil { + log.Fatalf("failed to create the output file: %v", err) + } + defer outFile.Close() + + notify := make(chan bool, 1) + go func(notify chan bool) { + err = boot.RunModules(context.Background(), configPath, func(manager *module.Manager) { + // startup success + processModuleStartSuccess(notify, nil, manager, outFile, format) + }) + if err != nil { + // startup failure + processModuleStartSuccess(notify, err, nil, outFile, format) + } + }(notify) + + <-notify + return nil +} + +func processModuleStartSuccess(notify chan bool, err error, mgr *module.Manager, file io.Writer, format string) { + data := &outputData{} + defer func() { + writeOutput(data, file, format) + notify <- true + }() + if err != nil { + data.Startup = err + return + } + uname, err := operator.GetOSUname() + if err != nil { + data.Startup = err + return + } + data.Kernel = uname.Release + + if err := rlimit.RemoveMemlock(); err != nil { + data.Startup = err + return + } + + // wait processes + processModule := mgr.FindModule(process.ModuleName).(*process.Module) + var processes []api.ProcessInterface + for i := 0; i < 3; i++ { + processes = processModule.GetAllProcesses() + if len(processes) != 0 { + break + } + time.Sleep(time.Second) + } + if len(processes) == 0 { + data.Startup = fmt.Errorf("no process") + return + } + + ctx := context.Background() + data.OnCPU = testOnCPUProfiling(ctx, mgr, processes[0]) + data.OffCPU = testOffCPUProfiling(ctx, mgr, processes[0]) + data.Network = testNetworkProfiling(ctx, mgr, processes[0]) +} + +func testOnCPUProfiling(ctx context.Context, mgr *module.Manager, p api.ProcessInterface) error { + return testWithRunner(ctx, base.TargetTypeOnCPU, &base.TaskConfig{ + OnCPU: &base.OnCPUConfig{ + Period: "9ms", + }, + }, mgr, p) +} + +func testOffCPUProfiling(ctx context.Context, mgr *module.Manager, p api.ProcessInterface) error { + return testWithRunner(ctx, base.TargetTypeOffCPU, &base.TaskConfig{}, mgr, p) +} + +func testNetworkProfiling(ctx context.Context, mgr *module.Manager, p api.ProcessInterface) error { + return testWithRunner(ctx, base.TargetTypeNetworkTopology, &base.TaskConfig{ + Network: &base.NetworkConfig{ + MeterPrefix: "test", + ReportInterval: "10ms", + ProtocolAnalyze: base.ProtocolAnalyzeConfig{ + PerCPUBufferSize: "10K", + Parallels: 1, + QueueSize: 5000, + }, + }, + }, mgr, p) +} + +func testWithRunner(ctx context.Context, taskType base.TargetType, taskConfig *base.TaskConfig, + moduleManager *module.Manager, p api.ProcessInterface) error { + runner, err := task.NewProfilingRunner(taskType, taskConfig, moduleManager) + if err != nil { + return err + } + + if err := runner.Init(&base.ProfilingTask{}, []api.ProcessInterface{p}); err != nil { + return err + } + + if err := runner.Run(ctx, func() { + _ = runner.Stop() + }); err != nil { + return err + } + + return nil +} + +func writeOutput(data *outputData, file io.Writer, format string) { + if format != "json" { + sprintData := fmt.Sprintf("Kernel: %s\nStartup: %s\nOnCPU: %s\nOffCPU: %s\nNetwork: %s", + data.Kernel, errorOrSuccess(data.Startup), errorOrSuccess(data.OnCPU), + errorOrSuccess(data.OffCPU), errorOrSuccess(data.Network)) + _, _ = file.Write([]byte(sprintData)) + return + } + // some error could not be marshaled, such as multierror + jsonData := &outputDataJSON{ + Kernel: data.Kernel, + Startup: errorOrSuccess(data.Startup), + OnCPU: errorOrSuccess(data.OnCPU), + OffCPU: errorOrSuccess(data.OffCPU), + Network: errorOrSuccess(data.Network), + } + marshal, err := json.Marshal(jsonData) + if err != nil { + log.Printf("format the output failure: %v", err) + return + } + _, _ = file.Write(marshal) +} + +func errorOrSuccess(data error) string { + if data != nil { + return data.Error() + } + return "true" +} + +type outputData struct { + Kernel string + Startup error + OnCPU error + OffCPU error + Network error +} + +type outputDataJSON struct { + Kernel string + Startup string + OnCPU string + OffCPU string + Network string +} diff --git a/internal/cmd/root.go b/internal/cmd/root.go index e3f8130b..e8069e80 100644 --- a/internal/cmd/root.go +++ b/internal/cmd/root.go @@ -26,5 +26,6 @@ func NewRoot() *cobra.Command { "deployed in the target monitoring system to collect metrics, perf profiling, network optimization, etc.", } cmd.AddCommand(newStartCmd()) + cmd.AddCommand(newCheckCmd()) return cmd } diff --git a/internal/cmd/start.go b/internal/cmd/start.go index 49f2b066..02dfb623 100644 --- a/internal/cmd/start.go +++ b/internal/cmd/start.go @@ -34,7 +34,7 @@ func newStartCmd() *cobra.Command { ctx := context.Background() // run modules - return boot.RunModules(ctx, configPath) + return boot.RunModules(ctx, configPath, nil) }, } diff --git a/pkg/boot/module.go b/pkg/boot/module.go index ade378ae..022574ab 100644 --- a/pkg/boot/module.go +++ b/pkg/boot/module.go @@ -62,7 +62,7 @@ func NewModuleStarter(modules []module.Module) *ModuleStarter { } } -func (m *ModuleStarter) Run(ctx context.Context) error { +func (m *ModuleStarter) Run(ctx context.Context, startUpSuccessCallback func(*module.Manager)) error { // resolve module dependencies if err := m.ResolveDependency(); err != nil { return err @@ -97,6 +97,9 @@ func (m *ModuleStarter) Run(ctx context.Context) error { for _, mod := range m.startedModules { mod.NotifyStartSuccess() } + if startUpSuccessCallback != nil { + startUpSuccessCallback(m.moduleManager) + } // register terminal signals := make(chan os.Signal, 1) diff --git a/pkg/boot/module_test.go b/pkg/boot/module_test.go index a6a89d87..313db9cf 100644 --- a/pkg/boot/module_test.go +++ b/pkg/boot/module_test.go @@ -246,7 +246,7 @@ func testRun(run *testRunStruct, t *testing.T) { shutdownChannel := make(chan error) starter := NewModuleStarter(modules) go func() { - err := starter.Run(ctx) + err := starter.Run(ctx, nil) if err != nil { shutdownChannel <- err } else { diff --git a/pkg/boot/starter.go b/pkg/boot/starter.go index 300dbe7b..056c590f 100644 --- a/pkg/boot/starter.go +++ b/pkg/boot/starter.go @@ -28,7 +28,7 @@ import ( var log = logger.GetLogger("boot", "starter") -func RunModules(ctx context.Context, file string) error { +func RunModules(ctx context.Context, file string, startUpSuccessCallback func(*module.Manager)) error { // read config files conf, err := config.Load(file) if err != nil { @@ -43,7 +43,7 @@ func RunModules(ctx context.Context, file string) error { // startup all modules starter := NewModuleStarter(modules) - return starter.Run(ctx) + return starter.Run(ctx, startUpSuccessCallback) } func findAllDeclaredModules(conf *config.Config) ([]module.Module, error) { diff --git a/pkg/process/finders/manager.go b/pkg/process/finders/manager.go index cd16511a..3b0cb081 100644 --- a/pkg/process/finders/manager.go +++ b/pkg/process/finders/manager.go @@ -121,6 +121,10 @@ func (p *ProcessManagerWithFinder) SyncAllProcessInFinder(processes []base.Detec p.storage.SyncAllProcessInFinder(p.finderType, processes) } +func (m *ProcessManager) GetAllProcesses() []api.ProcessInterface { + return m.storage.GetAllProcesses() +} + func (m *ProcessManager) FindProcessByID(processID string) api.ProcessInterface { return m.storage.FindProcessByID(processID) } diff --git a/pkg/process/finders/storage.go b/pkg/process/finders/storage.go index 13e28a11..128f02eb 100644 --- a/pkg/process/finders/storage.go +++ b/pkg/process/finders/storage.go @@ -308,6 +308,16 @@ func (s *ProcessStorage) updateProcessToUploadIgnored(pc *ProcessContext) { log.Infof("could not found the process id from upstream, pid: %d, entity: %v", pc.Pid(), pc.Entity()) } +func (s *ProcessStorage) GetAllProcesses() []api.ProcessInterface { + result := make([]api.ProcessInterface, 0) + for _, processList := range s.processes { + for _, p := range processList { + result = append(result, p) + } + } + return result +} + func (s *ProcessStorage) FindProcessByID(processID string) api.ProcessInterface { for _, finderProcesses := range s.processes { for _, p := range finderProcesses { diff --git a/pkg/process/module.go b/pkg/process/module.go index d2e4afd6..04af2279 100644 --- a/pkg/process/module.go +++ b/pkg/process/module.go @@ -75,6 +75,10 @@ func (m *Module) Shutdown(ctx context.Context, mgr *module.Manager) error { return m.manager.Shutdown() } +func (m *Module) GetAllProcesses() []api.ProcessInterface { + return m.manager.GetAllProcesses() +} + func (m *Module) FindProcessByID(processID string) api.ProcessInterface { return m.manager.FindProcessByID(processID) } diff --git a/pkg/profiling/task/manager.go b/pkg/profiling/task/manager.go index 5f230dd7..23752de6 100644 --- a/pkg/profiling/task/manager.go +++ b/pkg/profiling/task/manager.go @@ -128,7 +128,7 @@ func (m *Manager) StartTask(c *Context) { // close task if not same id := m.tasks[taskIdentity].TaskID() log.Infof("existing profiling task: %s, so need to stop it", id) - if err := m.shutdownAndRemoveTask(m.tasks[taskIdentity]); err != nil { + if err := m.ShutdownAndRemoveTask(m.tasks[taskIdentity]); err != nil { log.Warnf("shutdown existing profiling task failure, so cannot to start new profiling task: %v. reason: %v", c.task.TaskID, err) return } @@ -227,7 +227,7 @@ func (m *Manager) shutdownTask(c *Context) error { return err } -func (m *Manager) shutdownAndRemoveTask(c *Context) error { +func (m *Manager) ShutdownAndRemoveTask(c *Context) error { err := m.shutdownTask(c) delete(m.tasks, c.BuildTaskIdentity()) return err diff --git a/pkg/profiling/task/network/bpf/linker.go b/pkg/profiling/task/network/bpf/linker.go index 774068dc..9ff943f5 100644 --- a/pkg/profiling/task/network/bpf/linker.go +++ b/pkg/profiling/task/network/bpf/linker.go @@ -99,6 +99,12 @@ type UProbeExeFile struct { } func (m *Linker) AddLink(linkF LinkFunc, symbolWithPrograms map[string]*ebpf.Program) { + if e := m.AddLinkOrError(linkF, symbolWithPrograms); e != nil { + m.errors = multierror.Append(m.errors, e) + } +} + +func (m *Linker) AddLinkOrError(linkF LinkFunc, symbolWithPrograms map[string]*ebpf.Program) error { var lk link.Link var err error var realSym string @@ -114,11 +120,11 @@ func (m *Linker) AddLink(linkF LinkFunc, symbolWithPrograms map[string]*ebpf.Pro for s := range symbolWithPrograms { symbolNames = append(symbolNames, s) } - m.errors = multierror.Append(m.errors, fmt.Errorf("open %s error: %v", symbolNames, err)) - } else { - log.Debugf("attach to the kprobe: %s", realSym) - m.closers = append(m.closers, lk) + return multierror.Append(m.errors, fmt.Errorf("open %s error: %v", symbolNames, err)) } + log.Debugf("attach to the kprobe: %s", realSym) + m.closers = append(m.closers, lk) + return nil } func (m *Linker) AddSysCall(call string, enter, exit *ebpf.Program) { @@ -253,7 +259,7 @@ func (u *UProbeExeFile) addLinkWithType0(symbol string, enter bool, p *ebpf.Prog var opts *link.UprobeOptions if customizeAddress > 0 { opts = &link.UprobeOptions{ - Address: customizeAddress, + Offset: customizeAddress, } } return fun(symbol, p, opts) @@ -291,12 +297,6 @@ func (u *UProbeExeFile) addGoExitLink0(symbol string, p *ebpf.Program, elfFile * return nil, fmt.Errorf("reading symbol data error: %v", err) } - // find the base addresses - targetBaseAddress := elfFile.FindBaseAddressForAttach(targetSymbol.Location) - if targetBaseAddress == 0 { - return nil, fmt.Errorf("could not found the symbol base addresses") - } - // based on the base addresses and symbol data buffer // calculate all RET addresses // https://github.com/iovisor/bcc/issues/1320#issuecomment-407927542 @@ -308,7 +308,7 @@ func (u *UProbeExeFile) addGoExitLink0(symbol string, p *ebpf.Program, elfFile * } if inst.Op == x86asm.RET { - addresses = append(addresses, targetBaseAddress+uint64(i)) + addresses = append(addresses, uint64(i)) } i += inst.Len diff --git a/pkg/profiling/task/network/runner.go b/pkg/profiling/task/network/runner.go index 20c31121..8c41eb37 100644 --- a/pkg/profiling/task/network/runner.go +++ b/pkg/profiling/task/network/runner.go @@ -167,7 +167,11 @@ func (r *Runner) Start(ctx context.Context, task *base.ProfilingTask, processes // retransmit/drop bpfLoader.AddLink(link.Kprobe, map[string]*ebpf.Program{"tcp_retransmit_skb": bpfLoader.TcpRetransmit}) - bpfLoader.AddLink(link.Kprobe, map[string]*ebpf.Program{"tcp_drop": bpfLoader.TcpDrop, "kfree_skb_reason": bpfLoader.KfreeSkbReason}) + if e := bpfLoader.AddLinkOrError(link.Kprobe, map[string]*ebpf.Program{ + "tcp_drop": bpfLoader.TcpDrop, + "kfree_skb_reason": bpfLoader.KfreeSkbReason}); e != nil { + log.Warnf("cannot monitor the tcp drop, ignore it and keep profiling: %v", e) + } if err := bpfLoader.HasError(); err != nil { _ = bpfLoader.Close() diff --git a/pkg/profiling/task/network/ssl.go b/pkg/profiling/task/network/ssl.go index c586c3a2..f6925f70 100644 --- a/pkg/profiling/task/network/ssl.go +++ b/pkg/profiling/task/network/ssl.go @@ -419,7 +419,8 @@ func generateGOTLSSymbolOffsets(modules []*profiling.Module, _ int, elfFile *elf return a == b }, "go.itab.*net.TCPConn,net.Conn") if sym == nil { - return nil, nil, fmt.Errorf("could found the tcp connection symbol") + log.Warnf("could not found the tcp connection symbol: go.itab.*net.TCPConn,net.Conn") + return nil, nil, nil } symbolAddresses.TCPConnOffset = sym.Location diff --git a/pkg/profiling/task/oncpu/runner.go b/pkg/profiling/task/oncpu/runner.go index d55ea1c9..b099c949 100644 --- a/pkg/profiling/task/oncpu/runner.go +++ b/pkg/profiling/task/oncpu/runner.go @@ -124,7 +124,7 @@ func (r *Runner) Run(ctx context.Context, notify base.ProfilingRunningSuccessNot return fmt.Errorf("replace the monitor pid failure") } if err1 := spec.LoadAndAssign(&objs, nil); err1 != nil { - log.Errorf("loading objects: %s", err1) + return fmt.Errorf("loading objects: %s", err1) } defer objs.Close() r.bpf = &objs diff --git a/scripts/check/profiling/check.sh b/scripts/check/profiling/check.sh new file mode 100644 index 00000000..e1209831 --- /dev/null +++ b/scripts/check/profiling/check.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# the rover config file +config_file=$1 +# the demo program file +demo_exe_file=$2 +# the skywalking rover binary file +rover_exe_file=$3 +# the checker output directory +output_dir=$4 +# the output format, support: json, plain +output_format=$5 + +set -e + +mkdir -p $output_dir + +# build and run demo program +nohup $demo_exe_file >/dev/null 2>&1 & +sleep 1 + +# run checker +set +e +nohup $rover_exe_file check -c $config_file -o "$output_dir/result.txt" -f $output_format > "$output_dir/checker.log" 2>&1 +set -e + +# close the demo program +kill -9 `pidof $demo_exe_file` \ No newline at end of file diff --git a/scripts/check/profiling/config.yaml b/scripts/check/profiling/config.yaml new file mode 100644 index 00000000..cdc1ca08 --- /dev/null +++ b/scripts/check/profiling/config.yaml @@ -0,0 +1,42 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +logger: + level: ${ROVER_LOGGER_LEVEL:INFO} + +core: + backend: + addr: localhost:11800 + enable_TLS: false + check_period: 5 + authentication: "" + +process_discovery: + heartbeat_period: 20s + properties_report_period: 10 + scanner: + period: 1s + mode: REGEX + regex: + - match_cmd: demo-program$ + layer: OS_LINUX + service_name: demo-service + instance_name: instance + process_name: demo + labels: "" + kubernetes: + active: false \ No newline at end of file diff --git a/scripts/check/profiling/demo.go b/scripts/check/profiling/demo.go new file mode 100644 index 00000000..87a4d56b --- /dev/null +++ b/scripts/check/profiling/demo.go @@ -0,0 +1,26 @@ +// Licensed to Apache Software Foundation (ASF) under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Apache Software Foundation (ASF) licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package main + +import "time" + +func main() { + for { + time.Sleep(time.Minute) + } +} diff --git a/scripts/check/profiling/run.sh b/scripts/check/profiling/run.sh new file mode 100644 index 00000000..6f54358c --- /dev/null +++ b/scripts/check/profiling/run.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +set -e + +current_dir="$(cd "$(dirname $0)"; pwd)" +ROVER_DIR="$current_dir/../../../" + +# build demo program +echo "building demo program" +go build demo.go +mv demo ./demo-program + +# build the skywalking rover +echo "builing skywalking rover" +make -C $ROVER_DIR container-generate build +cp $ROVER_DIR/bin/skywalking-rover-latest-linux-amd64 ./skywalking-rover + +# run the checker +echo "running checker" +bash check.sh $current_dir/config.yaml $current_dir/demo-program $current_dir/skywalking-rover "$current_dir/output" +echo "check result:" +echo "---------------------------------" +cat "$current_dir/output" \ No newline at end of file