diff --git a/.github/workflows/release-master.yaml b/.github/workflows/release-master.yaml new file mode 100644 index 000000000..7cba72a0f --- /dev/null +++ b/.github/workflows/release-master.yaml @@ -0,0 +1,27 @@ +name: Latest Release + +on: + push: + branches: + - "main" + paths-ignore: + - "**/*.png" + - "**/*.svg" + - "**/*.yaml" + - "**/*.md" + +jobs: + latest-release: + name: Push image to ghcr.io + runs-on: ubuntu-22.04 + steps: + - name: checkout code + uses: actions/checkout@v3 + - name: docker build + # master branch alway use latest tag + run: sudo TAG=latest make docker + - name: login registry + # This is where you will update the PAT to GITHUB_TOKEN + run: echo "${{ secrets.GH_PAT }}" | docker login ghcr.io -u $ --password-stdin + - name: docker push + run: docker push ghcr.io/kmesh-net/kmesh:latest \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 449452ecf..b7135d96c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,19 +2,20 @@ Welcome to Kmesh! -- [Before you get started](#before-you-get-started) - - [Code of Conduct](#code-of-conduct) - - [Community Expectations](#community-expectations) -- [Getting started](#getting-started) -- [Your First Contribution](#your-first-contribution) - - [Find something to work on](#find-something-to-work-on) - - [Find a good first topic](#find-a-good-first-topic) - - [Work on an Issue](#work-on-an-issue) - - [File an Issue](#file-an-issue) -- [Contributor Workflow](#contributor-workflow) - - [Creating Pull Requests](#creating-pull-requests) - - [Code Review](#code-review) - - [Testing](#testing) +- [Contributing](#contributing) +- [Before you get started](#before-you-get-started) + - [Code of Conduct](#code-of-conduct) + - [Community Expectations](#community-expectations) +- [Getting started](#getting-started) +- [Your First Contribution](#your-first-contribution) + - [Find something to work on](#find-something-to-work-on) + - [Find a good first topic](#find-a-good-first-topic) + - [Work on an issue](#work-on-an-issue) + - [File an Issue](#file-an-issue) +- [Contributor Workflow](#contributor-workflow) + - [Creating Pull Requests](#creating-pull-requests) + - [Code Review](#code-review) +- [Memebership](#memebership) # Before you get started @@ -34,7 +35,6 @@ and intended to realize multi-cloud centralized management, high availability, f - Make your changes on your fork repository. - Submit a PR. - # Your First Contribution We will help you to contribute in different areas like filing issues, developing features, fixing critical bugs and @@ -104,3 +104,11 @@ To make it easier for your PR to receive reviews, consider the reviewers will ne * follow [good coding guidelines](https://github.com/golang/go/wiki/CodeReviewComments). * write [good commit messages](https://chris.beams.io/posts/git-commit/). * break large changes into a logical series of smaller patches which individually make easily understandable changes, and in aggregate solve a broader issue. + +# Memebership + +We encourage all contributors to become members. Learn more about requirements and responsibilities of membership in our [Community Membership doc](https://github.com/kmesh-net/website/blob/main/content/en/docs/community/membership.md). + +If you have made contributions that meet the requirements of becoming KMesh member, simply file an [issue](https://github.com/kmesh-net/kmesh/issues/new?assignees=&labels=&projects=&template=membership-request.md&title=REQUEST%3A+New+membership+for+%3Cyour+name%3E) to apply. + +Kmesh community welcomes all interested developers to become members of the Kmesh community! diff --git a/Makefile b/Makefile index 07a5c6fd5..fb8be2d30 100644 --- a/Makefile +++ b/Makefile @@ -170,3 +170,7 @@ clean: $(QUIET) make clean -C bpf/deserialization_to_bpf_map $(call printlog, CLEAN, "kernel") $(QUIET) make clean -C kernel/ko_src + + $(QUIET) if docker ps -a -q -f name=kmesh-build | grep -q .; then \ + docker rm -f kmesh-build; \ + fi \ No newline at end of file diff --git a/bpf/include/bpf_common.h b/bpf/include/bpf_common.h index b5df9f0ee..76754c63f 100644 --- a/bpf/include/bpf_common.h +++ b/bpf/include/bpf_common.h @@ -16,14 +16,14 @@ #define map_of_manager kmesh_manage #define MAP_SIZE_OF_MANAGER 8192 -/*0x3a10000 is the specific port handled by the cni to enable kmesh*/ -#define ENABLE_KMESH_PORT 0x3a10000 -/*0x3a20000 is the specific port handled by the cni to enable kmesh*/ -#define DISABLE_KMESH_PORT 0x3a20000 -/*0x3a30000 is the specific port handled by the daemon to enable bypass*/ -#define ENABLE_BYPASS_PORT 0x3a30000 -/*0x3a40000 is the specific port handled by the daemon to enable bypass*/ -#define DISABLE_BYPASS_PORT 0x3a40000 +/*0x3a1(929) is the specific port handled by the cni to enable kmesh*/ +#define ENABLE_KMESH_PORT 0x3a1 +/*0x3a2(930) is the specific port handled by the cni to enable kmesh*/ +#define DISABLE_KMESH_PORT 0x3a2 +/*0x3a3(931) is the specific port handled by the daemon to enable bypass*/ +#define ENABLE_BYPASS_PORT 0x3a3 +/*0x3a4(932) is the specific port handled by the daemon to enable bypass*/ +#define DISABLE_BYPASS_PORT 0x3a4 typedef struct { __u32 is_bypassed; @@ -104,28 +104,28 @@ static inline bool conn_from_bypass_sim_add(struct bpf_sock_addr *ctx) { // daemon sim connect 0.0.0.0:931(0x3a3) // 0x3a3 is the specific port handled by the daemon to enable bypass - return ((bpf_ntohl(ctx->user_ip4) == 1) && (bpf_ntohl(ctx->user_port) == ENABLE_BYPASS_PORT)); + return ((bpf_ntohl(ctx->user_ip4) == 1) && (bpf_ntohs(ctx->user_port) == ENABLE_BYPASS_PORT)); } static inline bool conn_from_bypass_sim_delete(struct bpf_sock_addr *ctx) { // daemon sim connect 0.0.0.1:932(0x3a4) // 0x3a4 is the specific port handled by the daemon to disable bypass - return ((bpf_ntohl(ctx->user_ip4) == 1) && (bpf_ntohl(ctx->user_port) == DISABLE_BYPASS_PORT)); + return ((bpf_ntohl(ctx->user_ip4) == 1) && (bpf_ntohs(ctx->user_port) == DISABLE_BYPASS_PORT)); } static inline bool conn_from_cni_sim_add(struct bpf_sock_addr *ctx) { // cni sim connect 0.0.0.0:929(0x3a1) // 0x3a1 is the specific port handled by the cni to enable Kmesh - return ((bpf_ntohl(ctx->user_ip4) == 1) && (bpf_ntohl(ctx->user_port) == ENABLE_KMESH_PORT)); + return ((bpf_ntohl(ctx->user_ip4) == 1) && (bpf_ntohs(ctx->user_port) == ENABLE_KMESH_PORT)); } static inline bool conn_from_cni_sim_delete(struct bpf_sock_addr *ctx) { // cni sim connect 0.0.0.1:930(0x3a2) // 0x3a2 is the specific port handled by the cni to disable Kmesh - return ((bpf_ntohl(ctx->user_ip4) == 1) && (bpf_ntohl(ctx->user_port) == DISABLE_KMESH_PORT)); + return ((bpf_ntohl(ctx->user_ip4) == 1) && (bpf_ntohs(ctx->user_port) == DISABLE_KMESH_PORT)); } /* This function is used to store and delete cookie diff --git a/bpf/include/common.h b/bpf/include/common.h index 1da865f70..a4758651d 100644 --- a/bpf/include/common.h +++ b/bpf/include/common.h @@ -69,7 +69,12 @@ static inline int kmesh_map_update_elem(void *map, const void *key, const void * } #if OE_23_03 -#define bpf__strncmp bpf_strncmp +#define bpf__strncmp bpf_strncmp +#define GET_SKOPS_REMOTE_PORT(sk_ops) (__u16)((sk_ops)->remote_port) +#else +#define GET_SKOPS_REMOTE_PORT(sk_ops) (__u16)((sk_ops)->remote_port >> 16) #endif +#define GET_SKOPS_LOCAL_PORT(sk_ops) (__u16)((sk_ops)->local_port) + #endif // _COMMON_H_ diff --git a/bpf/kmesh/ads/sockops.c b/bpf/kmesh/ads/sockops.c index 3ace3f0f9..9e7d7fa6c 100644 --- a/bpf/kmesh/ads/sockops.c +++ b/bpf/kmesh/ads/sockops.c @@ -33,9 +33,8 @@ static int sockops_traffic_control(struct bpf_sock_ops *skops, struct bpf_mem_pt int ret; /* 1 lookup listener */ DECLARE_VAR_ADDRESS(skops, addr); -#if !OE_23_03 - addr.port = addr.port >> 16; -#endif + addr.port = GET_SKOPS_REMOTE_PORT(skops); + Listener__Listener *listener = map_lookup_listener(&addr); if (!listener) { diff --git a/bpf/kmesh/bpf2go/bpf2go.go b/bpf/kmesh/bpf2go/bpf2go.go index a71028777..acfcdeea5 100644 --- a/bpf/kmesh/bpf2go/bpf2go.go +++ b/bpf/kmesh/bpf2go/bpf2go.go @@ -25,6 +25,6 @@ package bpf2go //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang --cflags $EXTRA_CFLAGS --cflags $EXTRA_CDEFINE KmeshCgroupSockWorkload ../workload/cgroup_sock.c -- -I../workload/include -I../../include //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang --cflags $EXTRA_CFLAGS --cflags $EXTRA_CDEFINE KmeshSockops ../ads/sockops.c -- -I../ads/include -I../../include -I../../../api/v2-c //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang --cflags $EXTRA_CFLAGS --cflags $EXTRA_CDEFINE KmeshTracePoint ../ads/tracepoint.c -- -I../ads/include -I../../include -//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang --cflags $EXTRA_CFLAGS --cflags $EXTRA_CDEFINE KmeshSockopsWorkload ../workload/sockops_tuple.c -- -I../workload/include -I../../include +//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang --cflags $EXTRA_CFLAGS --cflags $EXTRA_CDEFINE KmeshSockopsWorkload ../workload/sockops.c -- -I../workload/include -I../../include //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang --cflags $EXTRA_CFLAGS --cflags $EXTRA_CDEFINE KmeshXDPAuth ../workload/xdp.c -- -I../workload/include -I../../include //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang --cflags $EXTRA_CFLAGS --cflags $EXTRA_CDEFINE KmeshSendmsg ../workload/sendmsg.c -- -I../workload/include -I../../include diff --git a/bpf/kmesh/workload/include/backend.h b/bpf/kmesh/workload/include/backend.h index 92c2c685f..444664241 100644 --- a/bpf/kmesh/workload/include/backend.h +++ b/bpf/kmesh/workload/include/backend.h @@ -82,7 +82,7 @@ static inline int backend_manager(ctx_buff_t *ctx, backend_value *backend_v, __u return -EINVAL; } if (service_id == backend_v->service[i]) { - BPF_LOG(DEBUG, BACKEND, "access the backend by service:%d\n", service_id); + BPF_LOG(DEBUG, BACKEND, "access the backend by service:%u\n", service_id); #pragma unroll for (__u32 j = 0; j < MAX_PORT_COUNT; j++) { if (user_port == service_v->service_port[j]) { diff --git a/bpf/kmesh/workload/sockops_tuple.c b/bpf/kmesh/workload/sockops.c similarity index 90% rename from bpf/kmesh/workload/sockops_tuple.c rename to bpf/kmesh/workload/sockops.c index 4b8911d3b..b7cd11c78 100644 --- a/bpf/kmesh/workload/sockops_tuple.c +++ b/bpf/kmesh/workload/sockops.c @@ -42,7 +42,7 @@ struct { __type(value, __u32); __uint(max_entries, MAP_SIZE_OF_MANAGER); __uint(map_flags, 0); -} map_of_kmesh_hashmap SEC(".maps"); +} map_of_kmesh_socket SEC(".maps"); static inline bool is_managed_by_kmesh(__u32 ip) { @@ -60,14 +60,11 @@ static inline void extract_skops_to_tuple(struct bpf_sock_ops *skops, struct bpf tuple_key->ipv4.saddr = skops->local_ip4; tuple_key->ipv4.daddr = skops->remote_ip4; // local_port is host byteorder - tuple_key->ipv4.sport = bpf_htonl(skops->local_port) >> FORMAT_IP_LENGTH; + tuple_key->ipv4.sport = bpf_htons(GET_SKOPS_LOCAL_PORT(skops)); // remote_port is network byteorder // openEuler 2303 convert remote port different than other linux vendor -#if !OE_23_03 - tuple_key->ipv4.dport = skops->remote_port >> FORMAT_IP_LENGTH; -#else - tuple_key->ipv4.dport = skops->remote_port; -#endif + + tuple_key->ipv4.dport = GET_SKOPS_REMOTE_PORT(skops); } static inline void extract_skops_to_tuple_reverse(struct bpf_sock_ops *skops, struct bpf_sock_tuple *tuple_key) @@ -75,14 +72,9 @@ static inline void extract_skops_to_tuple_reverse(struct bpf_sock_ops *skops, st tuple_key->ipv4.saddr = skops->remote_ip4; tuple_key->ipv4.daddr = skops->local_ip4; // remote_port is network byteorder - // openEuler 2303 convert remote port different than other linux vendor -#if !OE_23_03 - tuple_key->ipv4.sport = skops->remote_port >> FORMAT_IP_LENGTH; -#else - tuple_key->ipv4.sport = skops->remote_port; -#endif + tuple_key->ipv4.sport = GET_SKOPS_REMOTE_PORT(skops); // local_port is host byteorder - tuple_key->ipv4.dport = bpf_htonl(skops->local_port) >> FORMAT_IP_LENGTH; + tuple_key->ipv4.dport = bpf_htons(GET_SKOPS_LOCAL_PORT(skops)); } // clean map_of_auth @@ -126,12 +118,13 @@ static inline void auth_ip_tuple(struct bpf_sock_ops *skops) bpf_ringbuf_submit(msg, 0); } +// update sockmap to trigger sk_msg prog to encode metadata before sending to waypoint static inline void enable_encoding_metadata(struct bpf_sock_ops *skops) { int err; struct bpf_sock_tuple tuple_info = {0}; extract_skops_to_tuple(skops, &tuple_info); - err = bpf_sock_hash_update(skops, &map_of_kmesh_hashmap, &tuple_info, BPF_ANY); + err = bpf_sock_hash_update(skops, &map_of_kmesh_socket, &tuple_info, BPF_ANY); if (err) BPF_LOG(ERR, SOCKOPS, "enable encoding metadta failed!, err is %d", err); } @@ -157,14 +150,12 @@ static inline void remove_kmesh_managed_ip(__u32 ip) BPF_LOG(ERR, KMESH, "remove ip failed!, err is %d\n", err); } -static inline bool conn_from_sim(struct bpf_sock_ops *skops, __u32 ip, __u32 port) +static inline bool conn_from_sim(struct bpf_sock_ops *skops, __u32 ip, __u16 port) { - __u32 rev_port = bpf_ntohl(skops->remote_port); + __u16 remote_port = GET_SKOPS_REMOTE_PORT(skops); __u32 client_ip = bpf_ntohl(skops->remote_ip4); -#if !OE_23_03 - port >>= 16; -#endif - return (client_ip == ip) && (port == rev_port); + + return (client_ip == ip) && (bpf_ntohs(remote_port) == port); } static inline bool skops_conn_from_cni_sim_add(struct bpf_sock_ops *skops) @@ -231,7 +222,7 @@ static inline void skops_handle_bypass_process(struct bpf_sock_ops *skops) } SEC("sockops") -int record_tuple(struct bpf_sock_ops *skops) +int sockops_prog(struct bpf_sock_ops *skops) { if (skops->family != AF_INET && !ipv4_mapped_addr(skops->local_ip6)) return 0; @@ -245,7 +236,10 @@ int record_tuple(struct bpf_sock_ops *skops) break; if (bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG) != 0) BPF_LOG(ERR, SOCKOPS, "set sockops cb failed!\n"); - enable_encoding_metadata(skops); + __u64 *current_sk = (__u64 *)skops->sk; + struct bpf_sock_tuple *dst = bpf_map_lookup_elem(&map_of_dst_info, ¤t_sk); + if (dst != NULL) + enable_encoding_metadata(skops); break; case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: if (!is_managed_by_kmesh(skops->local_ip4)) // local ip4 is server ip diff --git a/build.sh b/build.sh index f1b134ed6..5cd1d3227 100755 --- a/build.sh +++ b/build.sh @@ -4,11 +4,11 @@ ROOT_DIR=$(dirname $(readlink -f ${BASH_SOURCE[0]})) # adjust the range of BPF code compillation based on the kernel is enhanced function bpf_compile_range_adjust() { if [ "$ENHANCED_KERNEL" == "enhanced" ]; then - sed -i '/tracepoint\.c/s/\(.*\)generate/\/\/go:generate/' bpf/kmesh/bpf2go/bpf2go.go - sed -i '/sockops\.c/s/\(.*\)generate/\/\/go:generate/' bpf/kmesh/bpf2go/bpf2go.go + sed -i '/ads\/tracepoint\.c/s/\(.*\)generate/\/\/go:generate/' bpf/kmesh/bpf2go/bpf2go.go + sed -i '/ads\/sockops\.c/s/\(.*\)generate/\/\/go:generate/' bpf/kmesh/bpf2go/bpf2go.go else - sed -i '/tracepoint\.c/s/\(.*\)generate/\/\/not go:generate/' bpf/kmesh/bpf2go/bpf2go.go - sed -i '/sockops\.c/s/\(.*\)generate/\/\/not go:generate/' bpf/kmesh/bpf2go/bpf2go.go + sed -i '/ads\/tracepoint\.c/s/\(.*\)generate/\/\/not go:generate/' bpf/kmesh/bpf2go/bpf2go.go + sed -i '/ads\/sockops\.c/s/\(.*\)generate/\/\/not go:generate/' bpf/kmesh/bpf2go/bpf2go.go fi } diff --git a/build/docker/kmesh.dockerfile b/build/docker/kmesh.dockerfile index c59eaddca..f5ba950a1 100644 --- a/build/docker/kmesh.dockerfile +++ b/build/docker/kmesh.dockerfile @@ -11,9 +11,9 @@ RUN \ --mount=type=cache,target=/var/cache/dnf \ yum install -y kmod util-linux iptables -ADD out/$arch/*so* /usr/lib64/ -ADD out/$arch/kmesh-daemon /usr/bin/ -ADD out/$arch/kmesh-cni /usr/bin/ -ADD out/$arch/mdacore /usr/bin/ -ADD build/docker/start_kmesh.sh /kmesh -ADD out/$arch/ko /kmesh +COPY out/$arch/*so* /usr/lib64/ +COPY out/$arch/kmesh-daemon /usr/bin/ +COPY out/$arch/kmesh-cni /usr/bin/ +COPY out/$arch/mdacore /usr/bin/ +COPY build/docker/start_kmesh.sh /kmesh +COPY out/$arch/ko /kmesh diff --git a/deploy/README.md b/deploy/README.md index 2ca6f1cfa..dc01c8b47 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -6,4 +6,12 @@ We provide a Helm Chart to deploy Kmesh in Kubernets Cluster. ```bash helm install kmesh ./deploy/helm -n kmesh-system --create-namespace +``` + +## Yaml + +We also support deploying using yaml files. + +```bash +kubectl apply -f ./deploy/yaml/ ``` \ No newline at end of file diff --git a/docs/pics/fortio_performance_test.png b/docs/pics/fortio_performance_test.png new file mode 100644 index 000000000..893e4af7c Binary files /dev/null and b/docs/pics/fortio_performance_test.png differ diff --git a/docs/proposal/kmesh_support_workload.md b/docs/proposal/kmesh_support_workload.md index feafd41bc..aa1ae543d 100644 --- a/docs/proposal/kmesh_support_workload.md +++ b/docs/proposal/kmesh_support_workload.md @@ -76,12 +76,11 @@ In the subsequent traffic management of kmesh, based on the IP and Port accessed typedef struct { __be32 ipv4; // service ip - __be16 service_port; } __attribute__((packed)) frontend_key; typedef struct { - __u32 service_id; // service id, through / string convert to uint32 variable + __u32 upstream_id; // service id for Service access or backend uid for Pod access. } __attribute__((packed)) frontend_value; // service map @@ -92,8 +91,13 @@ typedef struct typedef struct { - __u32 endpoint_count; // the endpoint count of the service - __u32 lb_policy; // current only support random lb policy + __u32 endpoint_count; // endpoint count of current service + __u32 lb_policy; // load balancing algorithm + __u32 service_port[MAX_PORT_COUNT]; // service_port[i] and target_port[i] are a pair, i starts from 0 and max value + // is MAX_PORT_COUNT-1 + __u32 target_port[MAX_PORT_COUNT]; + __u32 waypoint_addr; + __u32 waypoint_port; } __attribute__((packed)) service_value; // endpoint map @@ -116,26 +120,24 @@ typedef struct typedef struct { - __be32 ipv4; // backend ip - __u32 port_count; - __u32 service_port[MAX_COUNT]; // MAX_ COUNT fixed at 10, currently - __u32 target_port[MAX_COUNT]; + __be32 ipv4; // backend ip + __u32 service[MAX_SERVICE_COUNT]; + __u32 waypoint_addr; + __u32 waypoint_port; } __attribute__((packed)) backend_value; ``` -## Subscription data processing flow -![new_workload.svg](./pics/new_workload.svg) - -![delete_workload.svg](./pics/delete_workload.svg) - -![update_workload.svg](./pics/update_workload.svg) - ## Traffic governance process -![traffic_governance](./pics/traffic_governance.svg) +

+ +

-* Client Access Service: Search the serviceinfo map based on the IP and Port accessed by the client, find the corresponding service_id, and then search the service map based on the service_id to find the endpoint_count of the backend Pod in the service. Then, search the endpoint map based on the service_id and the random backend_index generated based on the count to find the corresponding backend_uid. Finally, use the backenduid to find the IP and Port of the backend. -* Client access Pod: Access directly through Pod's IP and Port. +**traffic governance process** +- Search the frontend map based on the IP accessed by the client, find the corresponding upstream_id, and then use this upstream_id to search for the service map and backend map: + - If the corresponding service is found in service map, and get the endpoint_count of the backend Pod in the service. Then, search the endpoint map based on the service_id and the random backend_index generated based on the count to find the corresponding backend_uid. Finally, use the backenduid to find the IP and Port of the backend. + - If the corresponding backend is found in backend map, it is a directly pod access. +- Additionally, if the service or backend contains a waypoint, it will redirect to the waypoint. diff --git a/docs/proposal/layer4_authorization.md b/docs/proposal/layer4_authorization.md new file mode 100644 index 000000000..bca7e8be6 --- /dev/null +++ b/docs/proposal/layer4_authorization.md @@ -0,0 +1,60 @@ +--- +title: support layer 4 authorization in kmesh workload mod +authors: +- "@supercharge-xsy" +reviewers: +- "@hzxuzhonghu" +- "@nlwcy" +approvers: +- "@robot" +- TBD + +creation-date: 2024-05-28 + + +--- + +## Support L4 authorization in workload mode + +### Summary + +This article aims to explain how Kmesh achieves layer 4 authorization functionality in workload mode. For an introduction to the authentication features, please refer to:[Kmesh TCP Authorization](https://kmesh.net/en/docs/userguide/tcp_authorization/) + +### Design details + + +![l4_authz](pics/kmesh_l4_authorization.svg#pic_center) + +#### Map definition + +```.c +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct bpf_sock_tuple); + __type(value, __u32); // init, deny, allow + __uint(max_entries, MAP_SIZE_OF_AUTH); +} map_of_auth SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, RINGBUF_SIZE); +} map_of_tuple SEC(".maps"); + + +``` + +#### Processing logic + +1. **sock-bpf:** During the connection establishment process, on the server side, sock bpf logic is triggered at the established phase. If the server is managed by Kmesh: + - 1.1: Tuple information is recorded into `tuple_map`, which is a ringbuffer type map, readily accessible for real-time reading by the Kmesh-daemon. + - 1.2: An `auth_map` entry is initialized with its value set to `init`, indicating that migration authentication is underway. +2. **kmesh-daemon:** The kmesh-daemon is responsible for subscribing to authorization rules and matching these rules for authorization checks. + - 2.1: It reads the tuple records from `tuple_map`. Once read, the record in the ringbuffer map is automatically cleared by the system. + - 2.2: Based on the read tuple information, it matches the authorization rules. If it's an `allow`, the `init` record in the table is cleared; if it's a `deny`, the value is refreshed from `init` to `deny`. +3. **xdp-bpf**: When the client sends a message and the server receives it, passing through the xdp bpf program: + - 3.1: It matches data in `auth_map` using the five-tuple information. If a match is found with the value set to `init`, indicating the migration authentication is not yet complete, the message is temporarily discarded. + - 3.2: If the matched record shows `value=deny`, it alters the message flag, sends an RST message to the server, clears the corresponding `auth_map` record. If no record is matched, implying authorization is allowed, the message is passed through. +4. **client retry**: The client attempts to send another message, but because the server has closed the connection, the client receives a "reset by peer" signal and subsequently closes its own channel. + + + diff --git a/docs/proposal/pics/delete_workload.svg b/docs/proposal/pics/delete_workload.svg deleted file mode 100644 index 0fe976f4c..000000000 --- a/docs/proposal/pics/delete_workload.svg +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - key: ip service_portval: service_idfrontendkey: service_idval: endpoint_countservicekey: service_id backend_indexval: backend_uidendpointdelete processdel workload info note:fix hollow scenesdel service1.update backend without port info2.del endpoint、service、frontendkey: backend_uidval: ip count service_port[MAX] target_port[MAX]backend \ No newline at end of file diff --git a/docs/proposal/pics/kmesh_l4_authorization.svg b/docs/proposal/pics/kmesh_l4_authorization.svg new file mode 100644 index 000000000..54c119800 --- /dev/null +++ b/docs/proposal/pics/kmesh_l4_authorization.svg @@ -0,0 +1,21 @@ + + + + + + + + clientserverkmesh-deamonSYNsockbpfxdpbpfPASSIVE_ESTABILISHEDtuple_mapauth_map1.11.2 mark init staterbac manageristiodauthpolicy2.1 tuple2.2 update deny or clean init statesend3.1 match initdropsend3.2 match denyrst server4. retry sendrstclientclean denyconnectsend \ No newline at end of file diff --git a/docs/proposal/pics/new_workload.svg b/docs/proposal/pics/new_workload.svg deleted file mode 100644 index f4e41ee9a..000000000 --- a/docs/proposal/pics/new_workload.svg +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - key: ip service_portval: service_idfrontendkey: service_idval: endpoint_countservicekey: service_id backend_indexval: backend_uidendpointservice_cacheendpointservicenew workload info1. have service info a. service exist b. service not exist cache service and endpoint2. not have servicenew processnew service1.associate ip、port with service_id2.process service_cachekey: backend_uidval: ip count service_port[MAX] target_port[MAX]backend \ No newline at end of file diff --git a/docs/proposal/pics/tls_cert_design.svg b/docs/proposal/pics/tls_cert_design.svg new file mode 100644 index 000000000..1760164bb --- /dev/null +++ b/docs/proposal/pics/tls_cert_design.svg @@ -0,0 +1,4 @@ + + + +
add new workload
add new workload
update
update
add
add
addOrUpdate
addOrUpdate
refCnt++
refCnt++
fetchCert
fetchCert
storeCert
storeCert
store cert
add Item to RotateQueue
store cert...
check cert map 
cert is exists?
check cert map...
handleCertRequests
handleCertRequests
certRequestChan
certRequestChan
delete old workload
delete old workload
rotateCert
rotateCert
refCnt--
refCnt--
Yes
Yes
refCnt=0?
refCnt=0?
delete map
delete Queue
delete map...
cert events
cert events
Yes
Yes
check cert exist?
check cert exist?
fetchCert
fetchCert
storeCert
storeCert
store cert
add Item to RotateQueue
store cert...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/proposal/pics/tls_cert_scenario1.svg b/docs/proposal/pics/tls_cert_scenario1.svg new file mode 100644 index 000000000..c3c9c195b --- /dev/null +++ b/docs/proposal/pics/tls_cert_scenario1.svg @@ -0,0 +1,4 @@ + + + +
serviceaccount 1









serviceaccount 1...
pod 1
pod 1
pod 2
pod 2
pod 3
pod 3


kmesh
kmesh...
KmeshCaClient
KmeshCaClient
SecretManager
SecretManager
SecretManager








SecretManager...
Priority Queue:exp-time
Priority Queue:exp-t...
map:
key:identity
value:certCache
map:...
istiod
istiod
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/proposal/pics/tls_cert_scenario2.svg b/docs/proposal/pics/tls_cert_scenario2.svg new file mode 100644 index 000000000..dcaf68fc4 --- /dev/null +++ b/docs/proposal/pics/tls_cert_scenario2.svg @@ -0,0 +1,4 @@ + + + +
serviceaccount 1









serviceaccount 1...
pod 1
pod 1
pod 2
pod 2
pod 3
pod 3


kmesh
kmesh...
KmeshCaClient
KmeshCaClient
SecretManager
SecretManager
SecretManager








SecretManager...
Priority Queue:exp-time
Priority Queue:exp-t...
map:
key:identity
value:certCache
map:...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/proposal/pics/tls_cert_scenario3.svg b/docs/proposal/pics/tls_cert_scenario3.svg new file mode 100644 index 000000000..aeb47d345 --- /dev/null +++ b/docs/proposal/pics/tls_cert_scenario3.svg @@ -0,0 +1,4 @@ + + + +
serviceaccount 1









serviceaccount 1...
pod 1
pod 1
pod 2
pod 2
pod 3
pod 3


kmesh
kmesh...
KmeshCaClient
KmeshCaClient
SecretManager
SecretManager
SecretManager








SecretManager...
Priority Queue:exp-time
Priority Queue:exp-t...
map:
key:identity
value:certCache
map:...
istiod
istiod
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/proposal/pics/traffic_governance.svg b/docs/proposal/pics/traffic_governance.svg index 528200144..9dcfdf316 100644 --- a/docs/proposal/pics/traffic_governance.svg +++ b/docs/proposal/pics/traffic_governance.svg @@ -1,4 +1,4 @@ - + @@ -18,4 +18,4 @@ - key: backend_uidval: ip count service_port[MAX_COUNT] target_port[MAX_COUNT]key: service_idval: endpoint_countkey: ip portval: service_idkey: service_id backend_indexval: backend_uidbackendendpointservicefrontendclientService accessPod access \ No newline at end of file + clientfrontendserviceendpointbackendhave waypoint?service orbackend?backendyesnoyesservicednatnokey: backend_uidval: ip service_count services[MAX_COUNT] waypoint_addr waypoint_portbackendkey: service_id backend_indexval: backend_uidendpointkey: ipval: upstream_idfrontendService accessPod accessWaypoint accesskey: service_idval: endpoint_count lb_policy service_port[MAX_COUNT] target_port[MAX_COUNT] waypoint_addr waypoint_portservicebackendnoreturnhave waypoint? \ No newline at end of file diff --git a/docs/proposal/pics/update_workload.svg b/docs/proposal/pics/update_workload.svg deleted file mode 100644 index a2a5f8759..000000000 --- a/docs/proposal/pics/update_workload.svg +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - key: ip service_portval: service_idfrontendkey: service_idval: endpoint_countservicekey: service_id backend_indexval: backend_uidendpointupdate processupdate workload 1.update backend 2.update serviceupdate service such as:update service_portkey: backend_uidval: ip count service_port[MAX] target_port[MAX]backend \ No newline at end of file diff --git a/docs/proposal/tls_cert.md b/docs/proposal/tls_cert.md new file mode 100644 index 000000000..b72aa4e04 --- /dev/null +++ b/docs/proposal/tls_cert.md @@ -0,0 +1,168 @@ +## Workload Certificate Management + +### Overview + +Kmesh requires certificates issued by Istiod to support TLS capabilities. Therefore, a certificate request and management module is needed to apply for certificates from Istiod and manage the lifecycle of the certificates. + +### Motivation + +Kmesh needs to provide TLS capabilities for managed workloads and needs to be able to conveniently apply for and manage certificates, adding, deleting, and refreshing certificates at the appropriate time. + +#### Goals + +1. Apply for certificates for the service account (sa) where the managed workload is located. +2. Automatically refresh the certificate when it expires. + +#### Non-Goals + +1. In ambient mode, ztunnel and Kmesh each have their own certificate management system, which do not interfere with each other. There may be situations where both have applied for certificates for a certain sa. In this case, whoever takes over the traffic will use their set of certificates. +2. In the event of an abnormal restart of Kmesh, all old certificate records are discarded and all certificates are re-applied. Retaining previous certificates is not considered. + +### Proposal + +Implement a certificate application module and a certificate management module, where: + +Certificate Application Module: Establish an encrypted gRPC connection with Istiod when Kmesh starts, construct a CSR request and corresponding private key for the sa (service account) where the managed workload is located, and interact with Istiod using the CSR request. Istiod returns the certificate after signing. + +Certificate Management Module: + +- Manage the timing of operations on certificates: 1. Add workload 2. Delete workload 3. Automatically refresh the certificate when it expires. +- Manage the storage and management method of the certificate. +- Trigger the corresponding certificate refresh task when the certificate is about to expire. + +### Limitations + +Currently, if you need to use Kmesh tls capabilities, you need to modify the deployment when Istio starts, and add `kmesh-system/kmesh` after the `CA_TRUSTED_NODE_ACCOUNTS` environment variable. + +## Design Details + +### Certificate Application Module + +Create a caclient client when Kmesh starts, and establish an encrypted gRPC connection with Istiod. + +Use the information in the workload to construct a CSR request and private key, send the CSR request to Istiod through caclient, and Istiod returns the certificate after signing. + +### Certificate Lifecycle Management + +Use a channel, queue, and map to record and manage, where the queue and map both have locks to ensure concurrency safety. + +
+ +![tls_cert_design](pics/tls_cert_design.svg) + +
+ +**Channel**: Manage certificate events, handle certificate tasks according to Operation, create tasks in order from the channel, which can prevent some concurrent scheduling problems. + +```go +chan: used to receive all certificate-related events +type certRequest struct { + Identity string + Operation int +} +``` + +Trigger timing: + +- When adding a workload +- When deleting a workload +- When the certificate expires, take out the certificate refresh task from the queue + +**Queue**: Check the certificate that is about to expire, refresh the certificate 1 hour in advance; + +```go +Queue element content: +type certExp struct { + identity string //The certificate name constructed using sa + exp time.Time //Certificate expiration time +} +``` + +Update timing: Add a certificate: insert a new record Refresh the certificate: delete the old record, add a new record; Delete the certificate: traverse and delete the old certificate record + +**Map**: Record certificate information and certificate status + +```go +map: record the number of pods using this certificate + key: Identity //The certificate name constructed using sa + value: certItem + +type certItem struct { + cert istiosecurity.SecretItem //Certificate information + refcnt int32 //Record the number of pods using this certificate +} +``` + +Update timing: When a pod is managed by Kmesh for the first time under a certain sa, a new certificate is added; a new record is created and added When all pods managed by Kmesh under this sa are deleted (refCnt=0), delete the certificate; delete a record + + When the certificate expires automatically refresh, update the value content; refresh the cert in the existing record + + When a pod is managed by Kmesh under a certain sa, the corresponding refcnt+1; When a pod managed by Kmesh under a certain sa is deleted, the corresponding refcnt-1; + +Lifecycle: The time when the certificate of the entire sa exists; created at the time of sa certificate application, deleted at the time of sa certificate deletion + +#### Scenario One: Add Certificate + +
+ +![tls_cert_scenario1](pics/tls_cert_scenario1.svg) + +
+ +1. Kmesh manages pod1, subscribes to the added workload, SecretManager looks for the corresponding sa certificate: if it already exists, count +1; if it does not exist, apply for a certificate +2. Construct and send a CSR request for sa1 +3. Istiod issues a certificate +4. Store the certificate: + - Store the certificate + - In the status information + - Record count, count for this sa, record the number of pods using this certificate; + - Add a record of the expiration time to the queue + +#### Scenario Two: Delete Certificate + +
+ +![tls_cert_scenario2](pics/tls_cert_scenario2.svg) + +
+ +1. Delete pod1, delete the corresponding workload + +2. The count of this sa is reduced by one; + + + + If the count of this sa is 0 at this time, delete the certificate: + + - Traverse and find the queue, delete the corresponding record + - Delete the certificate corresponding to sa + +#### Scenario Three: Certificate Expires Automatically Update + +
+ +![tls_cert_scenario3](pics/tls_cert_scenario3.svg) + +
+ +1. The certificate with the nearest expiration date in the queue expires, pop up this record, trigger the certificate refresh action +2. Construct and send a CSR request for the sa of this certificate +3. Istiod issues a certificate +4. Store the certificate, + - Refresh the certificate in the map; refcnt remains unchanged + +- Add this record to the queue + +#### Special Design: + +The map and queue both use locks to ensure concurrency safety, all operations involving the map and queue use the defined interface to avoid deadlock and other problems + +Since applying for a certificate requires interaction with Istiod through a grpc connection, which may take a long time, and the change of certificate status information has added a lock for concurrency safety, so when you need to add or refresh a certificate, you need to separate the change of certificate status information and the process of applying for a certificate: + +For example: in the function flow of adding a certificate, if it is judged that a new certificate needs to be applied for, it will first create the corresponding status information record and write it into the map, so that other threads will not apply for the certificate repeatedly when executing, and then write into this record after the certificate is refreshed, if the application fails, delete this record; + +### Remaining Issues + +1. The current implementation of the queue is a priority queue. It needs to be modified to a normal queue. In the current scenario, the certificate events are obtained from the channel in order, and the certificate applied by Kmesh for the workload has a consistent validity period, so there is no need to sort in the queue again +2. Managed pod judgment, the current Kmesh related certificate processing process cannot judge whether the workload is managed, to be implemented later +3. There is only one pod under a certain sa, the pod restarts, causing the workload to be quickly deleted and added, and the certificate will be added and deleted repeatedly, bringing unnecessary overhead, this scenario needs special treatment \ No newline at end of file diff --git a/docs/proposal/tls_cert_zh.md b/docs/proposal/tls_cert_zh.md new file mode 100644 index 000000000..572a747b2 --- /dev/null +++ b/docs/proposal/tls_cert_zh.md @@ -0,0 +1,182 @@ +## 工作负载证书管理 + +### 概要 + +Kmesh支持TLS能力需要使用由istiod签发的证书,所以需要一套证书申请与管理模块,用于向istiod申请证书并管理证书的生命周期。 + +### 动机 + +Kmesh需要为纳管workload提供TLS能力,需要能够便捷的申请与管理证书,在合适的时机新增、删除、刷新证书 + +#### 目标 + +1. 为纳管workload所在的sa(service accont)申请证书 +2. 证书有效期到期自动刷新 + +#### 非目标 + +1. 在ambient模式下,ztunnel与Kmesh各自拥有着一套证书管理体系,两者互不干扰,可能存在两者均为某sa申请了证书的情况,这种情况下流量被谁接管就使用谁的一套证书 +2. Kmesh异常重启情况下,旧的证书记录全部废弃,证书全部重新申请,不考虑保留之前的证书 + +### 提议 + +实现一个证书申请模块和证书管理模块,其中 + +证书申请模块:与istod建立一个加密grpc连接,为纳管workload所在的sa(service accont)构造出CSR请求和对应的私钥,并使用CSR请求与istiod进行交互,由istiod进行签名后返回证书 + +证书管理模块: + +- 管理需要对证书进行操作的时机入口:1、新增workload 2、删除workload 3、证书有效期到期自动刷新 +- 管理证书的存放与管理方式 +- 根据证书的有效期,在临近到期时触发对应证书的刷新任务 + +### 限制 + +当前如果需要使用Kmesh tls能力,需要在istio启动时,修改deployment,在`CA_TRUSTED_NODE_ACCOUNTS`环境变量后边添加`kmesh-system/kmesh ` + +## 设计细节 + +### 证书申请模块 + +随Kmesh启动创建一个caclient客户端,与istiod建立加密的grpc连接 + +使用workload中的信息,构造出CSR请求和私钥,将CSR请求通过caclient发送给istiod,istiod进行签名并返回证书 + +### 证书生命周期管理 + +使用一个通道、队列和map来记录和管理,其中队列和map均有锁来保证并发安全性 + +
+ +![tls_cert_design](pics/tls_cert_design.svg) + +
+ +**通道**:管理证书事件,根据Operation去处理证书任务,从通道中按序创建任务,可以防止一些并发调度问题 + +```go +chan :用于接受所有证书相关事件 +type certRequest struct { + Identity string + Operation int +} +``` + +触发时机: + +- 新增workload时 +- 删除workload +- 证书到期,从队列中取出需要刷新的证书任务 + +**队列**:检查最近到期的证书,提前1小时刷新证书; + +```go +队列元素内容: +type certExp struct { + identity string //使用sa构造的证书名 + exp time.Time //证书到期时间 +} +``` + +更新时机: + 新增证书:插入一条新的记录 + 刷新证书:删除旧记录,添加新记录; + 删除证书:遍历并删除旧证书的记录 + +**map**:记录证书信息和证书状态 + +```go +map:记录使用该证书的pod 数量 +​ key:Identity //使用sa构造的证书名 +​ value:certItem + +type certItem struct { + cert istiosecurity.SecretItem //证书信息 + refcnt int32 //记录使用该证书的pod数 +} +``` + +更新时机: + 在某sa下第一次有pod被Kmesh纳管时新增证书;新建并添加一条记录 + 在该sa下所有被Kmesh纳管pod都被删除时(refCnt=0)删除证书;删除一条记录 + +​ 在证书到期自动刷新时更新value内容;刷新已有记录中的cert + +​ 在某sa下有pod被Kmesh纳管时,对应refcnt+1; +​ 在某sa下有被Kmesh纳管的pod被删除时,对应refcnt-1; + +生命周期:整个sa的证书存在的时间;创建于sa证书申请时,删除于sa证书删除时 + +#### 场景一:新增证书 + +
+ +![tls_cert_scenario1](pics/tls_cert_scenario1.svg) + +
+ +1. Kmesh纳管pod1,订阅到新增的workload,SecretManager查找对应sa的证书:若已存在则计数加1;若不存在则进行证书申请 + +2. 为sa1 构造并发送CSR请求 + +3. istiod签发证书 + +4. 存储证书: + + - 存储证书 + + - 在状态信息中 + - 记录 count,为此sa进行计数,记录使用该证书的pod数量; + + - 往队列中添加一条到期时间的记录 + + +#### 场景二:删除证书 + +
+ +![tls_cert_scenario2](pics/tls_cert_scenario2.svg) + +
+ +1. 删除pod1,删除对应workload + +2. 该sa计数减一; + + 若此时sa计数为0,则删除证书: + + - 遍历查找队列,删除对应的记录 + - 删除sa对应的证书 + + +#### 场景三:证书到期自动更新 + +
+ +![tls_cert_scenario3](pics/tls_cert_scenario3.svg) + +
+ +1. 队列中有效期最近的证书到期,弹出该条记录,触发证书刷新动作 +2. 为该证书的sa构造并发送CSR请求 +3. istiod签发证书 +4. 存储证书, + + - 刷新map中的证书;refcnt保持不变 + +- 在队列中添加该条记录 + +#### 特别设计: + +map与队列均使用了锁来保证并发安全性,所有设计到map和队列的操作均使用定义的接口去进行操作,避免出现死锁等问题 + +由于申请证书需要通过grpc连接和istiod进行交互,耗时可能较大,而证书状态信息的变更为了并发安全性加了锁,所以在在需要新增或刷新证书时,需要把证书状态信息的变更和申请证书的流程分开: + +例如:在新增证书的函数流程中,如果判断需要新申请证书,会先创建对应的状态信息记录并写入map,这样其他线程执行的时候就不会重复申请证书,随后等证书刷新下来之后再写入该条记录内,申请失败则删除这条记录; + +### 遗留事项 + +1. 当前代码中的队列实现是优先队列,需要修改为普通队列,现有场景下按序从通道中获取证书事件,且Kmesh为workload申请的证书有效期一致,无需在队列中再排序 +2. 纳管pod判断,目前Kmesh相关证书处理流程中无法判断workload是否被纳管,待后续实现 +3. 某sa下只存在一个pod,该pod重启,引起workload快速删除与新增,会重复增删证书,带来不必要的开销,该场景需特殊处理 + diff --git a/kmesh_compile.sh b/kmesh_compile.sh index d8874fbb8..37a196227 100755 --- a/kmesh_compile.sh +++ b/kmesh_compile.sh @@ -36,6 +36,7 @@ function get_arch() { function build_kmesh() { local container_id=$1 + docker exec $container_id git config --global --add safe.directory /kmesh docker exec $container_id sh /kmesh/build.sh docker exec $container_id sh /kmesh/build.sh -i docker exec $container_id sh -c "$(declare -f copy_to_host); copy_to_host" diff --git a/oncn-mda/ebpf_src/sock_ops.c b/oncn-mda/ebpf_src/sock_ops.c index 92fbfd4b9..68086b0a4 100644 --- a/oncn-mda/ebpf_src/sock_ops.c +++ b/oncn-mda/ebpf_src/sock_ops.c @@ -234,14 +234,10 @@ static void extract_key4_from_ops(struct bpf_sock_ops *const ops, struct sock_ke * optimized to think that only 16-bit data needs to be read here, but most kernels do not support this, * causing the BPF validator to fail. */ - key->sport = (bpf_htonl(ops->local_port) >> FORMAT_IP_LENGTH); + key->sport = bpf_htons(GET_SKOPS_LOCAL_PORT(ops)); key->dip4 = ops->remote_ip4; + key->dport = GET_SKOPS_REMOTE_PORT(ops); -#if !OE_23_03 - key->dport = (force_read(ops->remote_port) >> FORMAT_IP_LENGTH); -#else - key->dport = (force_read(ops->remote_port)); -#endif bpf_log(DEBUG, "sip:%u, sport:%u\n", key->sip4, key->sport); bpf_log(DEBUG, "dip:%u, dport:%u\n", key->dip4, key->dport); @@ -389,7 +385,7 @@ static void clean_ops_map(struct bpf_sock_ops *const skops) reverse_key = bpf_map_lookup_elem(&SOCK_OPS_PROXY_MAP_NAME, &key); ret = bpf_map_delete_elem(&SOCK_OPS_PROXY_MAP_NAME, &key); if (ret && ret != -ENOENT) - bpf_log(INFO, "bpf map delete proxy elem key failed! ret:%d\n", ret); + bpf_log(INFO, "bpf map delete prox elem key failed! ret:%d\n", ret); if (reverse_key == NULL) return; diff --git a/oncn-mda/include/mesh_accelerate.h b/oncn-mda/include/mesh_accelerate.h index 307bc905e..e48c76d42 100644 --- a/oncn-mda/include/mesh_accelerate.h +++ b/oncn-mda/include/mesh_accelerate.h @@ -37,6 +37,14 @@ enum bpf_loglevel { #define BPF_LOGLEVEL BPF_LOG_ERROR +#if OE_23_03 +#define GET_SKOPS_REMOTE_PORT(sk_ops) (__u16)((sk_ops)->remote_port) +#else +#define GET_SKOPS_REMOTE_PORT(sk_ops) (__u16)((sk_ops)->remote_port >> 16) +#endif + +#define GET_SKOPS_LOCAL_PORT(sk_ops) (__u16)((sk_ops)->local_port) + #ifndef bpf_printk #define bpf_printk(fmt, ...) \ ({ \ diff --git a/pkg/auth/rbac.go b/pkg/auth/rbac.go index 6960d7585..3bf182e72 100644 --- a/pkg/auth/rbac.go +++ b/pkg/auth/rbac.go @@ -49,8 +49,9 @@ var ( ) type Rbac struct { - policyStore *policyStore - bpfWorkload *bpf.BpfKmeshWorkload + policyStore *policyStore + workloadCache cache.WorkloadCache + bpfWorkload *bpf.BpfKmeshWorkload } type Identity struct { @@ -67,10 +68,11 @@ type rbacConnection struct { dstPort uint32 } -func NewRbac(workloadObj *bpf.BpfKmeshWorkload) *Rbac { +func NewRbac(workloadObj *bpf.BpfKmeshWorkload, workloadCache cache.WorkloadCache) *Rbac { return &Rbac{ - policyStore: newPolicystore(), - bpfWorkload: workloadObj, + policyStore: newPolicystore(), + workloadCache: workloadCache, + bpfWorkload: workloadObj, } } @@ -160,7 +162,7 @@ func (r *Rbac) RemovePolicy(policyKey string) { func (r *Rbac) doRbac(conn *rbacConnection) bool { var dstWorkload *workloadapi.Workload if len(conn.dstIp) > 0 { - dstWorkload = cache.WorkloadCache.GetWorkloadByAddr(cache.NetworkAddress{ + dstWorkload = r.workloadCache.GetWorkloadByAddr(cache.NetworkAddress{ Network: conn.dstNetwork, Address: nets.ConvertIpByteToUint32(conn.dstIp), }) diff --git a/pkg/auth/rbac_test.go b/pkg/auth/rbac_test.go index 880f3138c..2b051fd2a 100644 --- a/pkg/auth/rbac_test.go +++ b/pkg/auth/rbac_test.go @@ -22,6 +22,7 @@ import ( "istio.io/istio/pkg/util/sets" "kmesh.net/kmesh/api/v2/workloadapi/security" + "kmesh.net/kmesh/pkg/controller/workload/cache" ) const ( @@ -1855,7 +1856,8 @@ func TestRbac_doRbac(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { rbac := &Rbac{ - policyStore: tt.fields.policyStore, + policyStore: tt.fields.policyStore, + workloadCache: cache.NewWorkloadCache(), } if got := rbac.doRbac(tt.args.conn); got != tt.want { t.Errorf("Rbac.DoRbac() = %v, want %v", got, tt.want) diff --git a/pkg/bpf/bpf_kmesh_workload.go b/pkg/bpf/bpf_kmesh_workload.go index 08b3da16f..d204153d5 100644 --- a/pkg/bpf/bpf_kmesh_workload.go +++ b/pkg/bpf/bpf_kmesh_workload.go @@ -222,7 +222,7 @@ func (so *BpfSockOpsWorkload) LoadSockOps() error { return err } - prog := spec.Programs["record_tuple"] + prog := spec.Programs["sockops_prog"] so.Info.Type = prog.Type so.Info.AttachType = prog.AttachType @@ -233,7 +233,7 @@ func (so *BpfSockOpsWorkload) Attach() error { cgopt := link.CgroupOptions{ Path: so.Info.Cgroup2Path, Attach: so.Info.AttachType, - Program: so.KmeshSockopsWorkloadObjects.RecordTuple, + Program: so.KmeshSockopsWorkloadObjects.SockopsProg, } lk, err := link.AttachCgroup(cgopt) @@ -277,7 +277,7 @@ func (so *BpfSockOpsWorkload) Detach() error { } func (so *BpfSockOpsWorkload) GetSockMapFD() int { - return so.KmeshSockopsWorkloadObjects.KmeshSockopsWorkloadMaps.MapOfKmeshHashmap.FD() + return so.KmeshSockopsWorkloadObjects.KmeshSockopsWorkloadMaps.MapOfKmeshSocket.FD() } type BpfSendMsgWorkload struct { diff --git a/pkg/controller/client.go b/pkg/controller/client.go index d69dce74d..6990af32c 100644 --- a/pkg/controller/client.go +++ b/pkg/controller/client.go @@ -57,8 +57,9 @@ func NewXdsClient(mode string, bpfWorkloadObj *bpf.BpfKmeshWorkload) *XdsClient } if mode == constants.WorkloadMode { - client.rbac = auth.NewRbac(bpfWorkloadObj) client.workloadController = workload.NewController(bpfWorkloadObj.SockConn.KmeshCgroupSockWorkloadObjects.KmeshCgroupSockWorkloadMaps) + + client.rbac = auth.NewRbac(bpfWorkloadObj, client.workloadController.Processor.WorkloadCache) } else if mode == constants.AdsMode { client.AdsController = ads.NewController() } diff --git a/pkg/controller/security/manager.go b/pkg/controller/security/manager.go index b6128bfca..502fb3d1c 100644 --- a/pkg/controller/security/manager.go +++ b/pkg/controller/security/manager.go @@ -274,14 +274,14 @@ func (s *SecretManager) deleteCert(identity string) { } func (s *SecretManager) rotateCert(identity string) { - s.certsCache.mu.Lock() + s.certsCache.mu.RLock() certificate := s.certsCache.certs[identity] if certificate == nil { - s.certsCache.mu.Unlock() + s.certsCache.mu.RUnlock() log.Debugf("identity: %v cert has been deleted", identity) return } - s.certsCache.mu.Unlock() + s.certsCache.mu.RUnlock() s.addCert(identity) } diff --git a/pkg/controller/workload/cache/workload_cache.go b/pkg/controller/workload/cache/workload_cache.go index 087ee351b..3b38e0954 100644 --- a/pkg/controller/workload/cache/workload_cache.go +++ b/pkg/controller/workload/cache/workload_cache.go @@ -25,35 +25,38 @@ import ( "kmesh.net/kmesh/pkg/nets" ) -var ( - WorkloadCache = newWorkloadStore() -) +type WorkloadCache interface { + GetWorkloadByUid(uid string) *workloadapi.Workload + GetWorkloadByAddr(networkAddress NetworkAddress) *workloadapi.Workload + AddWorkload(workload *workloadapi.Workload) + DeleteWorkload(uid string) +} type NetworkAddress struct { Network string Address uint32 } -type workloadStore struct { +type cache struct { byUid map[string]*workloadapi.Workload byAddr map[NetworkAddress]*workloadapi.Workload mutex sync.RWMutex } -func newWorkloadStore() *workloadStore { - return &workloadStore{ +func NewWorkloadCache() *cache { + return &cache{ byUid: make(map[string]*workloadapi.Workload), byAddr: make(map[NetworkAddress]*workloadapi.Workload), } } -func (w *workloadStore) GetWorkloadByUid(uid string) *workloadapi.Workload { +func (w *cache) GetWorkloadByUid(uid string) *workloadapi.Workload { w.mutex.RLock() defer w.mutex.RUnlock() return w.byUid[uid] } -func (w *workloadStore) GetWorkloadByAddr(networkAddress NetworkAddress) *workloadapi.Workload { +func (w *cache) GetWorkloadByAddr(networkAddress NetworkAddress) *workloadapi.Workload { w.mutex.RLock() defer w.mutex.RUnlock() return w.byAddr[networkAddress] @@ -68,7 +71,7 @@ func composeNetworkAddress(network string, addr uint32) NetworkAddress { return networkAddress } -func (w *workloadStore) AddWorkload(workload *workloadapi.Workload) { +func (w *cache) AddWorkload(workload *workloadapi.Workload) { uid := workload.Uid w.mutex.Lock() @@ -95,7 +98,7 @@ func (w *workloadStore) AddWorkload(workload *workloadapi.Workload) { } } -func (w *workloadStore) DeleteWorkload(uid string) { +func (w *cache) DeleteWorkload(uid string) { w.mutex.Lock() defer w.mutex.Unlock() diff --git a/pkg/controller/workload/cache/workload_cache_test.go b/pkg/controller/workload/cache/workload_cache_test.go index cb8e9ad66..1332809db 100644 --- a/pkg/controller/workload/cache/workload_cache_test.go +++ b/pkg/controller/workload/cache/workload_cache_test.go @@ -27,7 +27,7 @@ import ( func TestAddWorkload(t *testing.T) { t.Run("adding a workload when none exists", func(t *testing.T) { - w := newWorkloadStore() + w := NewWorkloadCache() workload := &workloadapi.Workload{ Name: "ut-workload", Uid: "123456", @@ -46,7 +46,7 @@ func TestAddWorkload(t *testing.T) { }) t.Run("modify addresses in workload", func(t *testing.T) { - w := newWorkloadStore() + w := NewWorkloadCache() workload := &workloadapi.Workload{ Name: "ut-workload", Uid: "123456", @@ -82,7 +82,7 @@ func TestAddWorkload(t *testing.T) { }) t.Run("add addresses to the same workload", func(t *testing.T) { - w := newWorkloadStore() + w := NewWorkloadCache() workload := &workloadapi.Workload{ Name: "ut-workload", Uid: "123456", @@ -116,7 +116,7 @@ func TestAddWorkload(t *testing.T) { func TestDeleteWorkload(t *testing.T) { t.Run("normal function test", func(t *testing.T) { - w := newWorkloadStore() + w := NewWorkloadCache() workload := &workloadapi.Workload{ Name: "ut-workload", Uid: "123456", diff --git a/pkg/controller/workload/workload_processor.go b/pkg/controller/workload/workload_processor.go index d696d74ec..868e8bf3d 100644 --- a/pkg/controller/workload/workload_processor.go +++ b/pkg/controller/workload/workload_processor.go @@ -19,6 +19,7 @@ package workload import ( "encoding/binary" "fmt" + "os" "strings" service_discovery_v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" @@ -52,6 +53,8 @@ type Processor struct { endpointsByService map[string]map[string]Endpoint bpf *bpf.Cache Sm *kmeshsecurity.SecretManager + nodeName string + WorkloadCache cache.WorkloadCache } type Endpoint struct { @@ -63,13 +66,11 @@ type Endpoint struct { func newProcessor(workloadMap bpf2go.KmeshCgroupSockWorkloadMaps) *Processor { return &Processor{ - ack: nil, - req: nil, - hashName: NewHashName(), endpointsByService: make(map[string]map[string]Endpoint), bpf: bpf.NewCache(workloadMap), - Sm: nil, + nodeName: os.Getenv("NODE_NAME"), + WorkloadCache: cache.NewWorkloadCache(), } } @@ -93,8 +94,8 @@ func newAckRequest(rsp *service_discovery_v3.DeltaDiscoveryResponse) *service_di } } -func GetIdentityByUid(workloadUid string) string { - workload := cache.WorkloadCache.GetWorkloadByUid(workloadUid) +func (p *Processor) getIdentityByUid(workloadUid string) string { + workload := p.WorkloadCache.GetWorkloadByUid(workloadUid) if workload == nil { log.Errorf("workload %v not found", workloadUid) return "" @@ -107,10 +108,11 @@ func GetIdentityByUid(workloadUid string) string { }.String() } -func isManagedWorkload(workloadUid string) bool { - // TODO: Currently, there is no good way to accurately judge whether a workload - // is managed by kmesh when new ones are added or deleted. - // In the future, we plan to add a cache to implement this functionality. +func (p *Processor) isManagedWorkload(workload *workloadapi.Workload) bool { + // TODO: check the workload is managed by namespace and pod label + if workload.Node == p.nodeName { + return true + } return true } @@ -168,7 +170,7 @@ func (p *Processor) storePodFrontendData(uid uint32, ip []byte) error { return nil } -func (p *Processor) removeWorkloadResource(removed_resources []string) error { +func (p *Processor) removeWorkloadResource(removedResources []string) error { var ( err error skUpdate = bpf.ServiceKey{} @@ -178,14 +180,15 @@ func (p *Processor) removeWorkloadResource(removed_resources []string) error { bkDelete = bpf.BackendKey{} ) - for _, workloadUid := range removed_resources { - if isManagedWorkload(workloadUid) { - Identity := GetIdentityByUid(workloadUid) + for _, uid := range removedResources { + exist := p.WorkloadCache.GetWorkloadByUid(uid) + if exist != nil && p.isManagedWorkload(exist) { + Identity := p.getIdentityByUid(uid) p.Sm.SendCertRequest(Identity, kmeshsecurity.DELETE) } - cache.WorkloadCache.DeleteWorkload(workloadUid) + p.WorkloadCache.DeleteWorkload(uid) - backendUid := p.hashName.StrToNum(workloadUid) + backendUid := p.hashName.StrToNum(uid) // for Pod to Pod access, Pod info stored in frontend map, when Pod offline, we need delete the related records if err = p.deletePodFrontendData(backendUid); err != nil { log.Errorf("deletePodFrontendData failed: %s", err) @@ -241,7 +244,7 @@ func (p *Processor) removeWorkloadResource(removed_resources []string) error { log.Errorf("BackendDelete failed: %s", err) goto failed } - p.hashName.Delete(workloadUid) + p.hashName.Delete(uid) } failed: @@ -459,16 +462,22 @@ func (p *Processor) handleDataWithoutService(workload *workloadapi.Workload) err } func (p *Processor) handleWorkload(workload *workloadapi.Workload) error { - log.Debugf("workload uid: %s", workload.Uid) - exist := cache.WorkloadCache.GetWorkloadByUid(workload.Uid) - cache.WorkloadCache.AddWorkload(workload) - - // The grpc connection is disconnected every half hour and the workload will be resubscribed. - // We need to determine whether it already exists in the cache. - if exist == nil && isManagedWorkload(workload.Uid) { - Identity := GetIdentityByUid(workload.Uid) - p.Sm.SendCertRequest(Identity, kmeshsecurity.ADD) + log.Debugf("handle workload: %s", workload.Uid) + if p.isManagedWorkload(workload) { + oldIdentity := p.getIdentityByUid(workload.Uid) + if oldIdentity == "" { + newIdentity := spiffe.Identity{ + TrustDomain: workload.TrustDomain, + Namespace: workload.Namespace, + ServiceAccount: workload.ServiceAccount, + }.String() + // This is the case workload added first time + p.Sm.SendCertRequest(newIdentity, kmeshsecurity.ADD) + } } + + p.WorkloadCache.AddWorkload(workload) + // if have the service name, the workload belongs to a service if workload.GetServices() != nil { if err := p.handleDataWithService(workload); err != nil { diff --git a/pkg/status/status_server.go b/pkg/status/status_server.go index b246e0589..fea964416 100644 --- a/pkg/status/status_server.go +++ b/pkg/status/status_server.go @@ -19,6 +19,7 @@ package status import ( "fmt" "net/http" + "net/http/pprof" "time" // nolint @@ -70,6 +71,13 @@ func NewServer(c *controller.Controller, configs *options.BootstrapConfigs) *Ser s.mux.HandleFunc(patternBpfAdsMaps, s.bpfAdsMaps) s.mux.HandleFunc(patternConfigDump, s.configDump) s.mux.HandleFunc(patternReadyProbe, s.readyProbe) + + // support pprof + s.mux.HandleFunc("/debug/pprof/", pprof.Index) + s.mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) + s.mux.HandleFunc("/debug/pprof/profile", pprof.Profile) + s.mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) + s.mux.HandleFunc("/debug/pprof/trace", pprof.Trace) return s }