diff --git a/Makefile b/Makefile index 6cf74560b..d7c61b35b 100644 --- a/Makefile +++ b/Makefile @@ -151,6 +151,10 @@ checkfmt: exit 1; \ fi +.PHONY: clang-tidy +clang-tidy: + cd bpf && $(CLANG_TIDY) *.c *.h + .PHONY: lint-dashboard lint-dashboard: prereqs @echo "### Linting dashboard"; @@ -379,10 +383,6 @@ clean-testoutput: check-ebpf-integrity: docker-generate git diff --name-status --exit-code || (echo "Run make docker-generate locally and commit the code changes" && false) -.PHONY: clang-tidy -clang-tidy: - cd bpf && $(CLANG_TIDY) *.c *.h - .PHONY: protoc-gen protoc-gen: docker run --rm -v $(PWD):/work -w /work $(PROTOC_IMAGE) protoc --go_out=pkg/kubecache --go-grpc_out=pkg/kubecache proto/informer.proto diff --git a/bpf/go_nethttp.h b/bpf/go_nethttp.h index 3ad437db2..a798b1aab 100644 --- a/bpf/go_nethttp.h +++ b/bpf/go_nethttp.h @@ -538,6 +538,25 @@ int uprobe_writeSubset(struct pt_regs *ctx) { (void *)(io_writer_addr + go_offset_of(ot, (go_offset){.v = _io_writer_n_pos})), &len, sizeof(len)); + + // For Go we support two types of HTTP context propagation for now. + // 1. The one that this code does, which uses the locked down bpf_probe_write_user. + // 2. By using a sock_msg program that will extend the packet. + // If this code ran, we should ensure that the second part doesn't run, therefore + // we remove the metadata setup in uprobe_persistConnRoundTrip(struct pt_regs *ctx), so + // that approach 2. skips this packet. + connection_info_t *info = bpf_map_lookup_elem(&ongoing_client_connections, &g_key); + if (info) { + egress_key_t e_key = { + .d_port = info->d_port, + .s_port = info->s_port, + }; + bpf_map_delete_elem(&outgoing_trace_map, &e_key); + bpf_dbg_printk( + "wrote traceparent using bpf_probe_write_user, removing outgoing trace map %d:%d", + e_key.s_port, + e_key.d_port); + } } done: diff --git a/bpf/http_ssl_defs.h b/bpf/http_ssl_defs.h index ddd2453d6..aaa181cec 100644 --- a/bpf/http_ssl_defs.h +++ b/bpf/http_ssl_defs.h @@ -215,4 +215,27 @@ handle_ssl_buf(void *ctx, u64 id, ssl_args_t *args, int bytes_len, u8 direction) } } +static __always_inline void *is_ssl_connection(u64 id) { + void *ssl = 0; + // Checks if it's sandwitched between active SSL handshake, read or write uprobe/uretprobe + void **s = bpf_map_lookup_elem(&active_ssl_handshakes, &id); + if (s) { + ssl = *s; + } else { + ssl_args_t *ssl_args = bpf_map_lookup_elem(&active_ssl_read_args, &id); + if (!ssl_args) { + ssl_args = bpf_map_lookup_elem(&active_ssl_write_args, &id); + } + if (ssl_args) { + ssl = (void *)ssl_args->ssl; + } + } + + return ssl; +} + +static __always_inline void *is_active_ssl(pid_connection_info_t *conn) { + return bpf_map_lookup_elem(&active_ssl_connections, conn); +} + #endif diff --git a/bpf/http_types.h b/bpf/http_types.h index 58a2ecdd3..360ed3980 100644 --- a/bpf/http_types.h +++ b/bpf/http_types.h @@ -50,8 +50,14 @@ // h = high word, l = low word // used as hashmap key, must be 4 byte aligned? typedef struct http_connection_info { - u8 s_addr[IP_V6_ADDR_LEN]; - u8 d_addr[IP_V6_ADDR_LEN]; + union { + u8 s_addr[IP_V6_ADDR_LEN]; + u32 s_ip[IP_V6_ADDR_LEN_WORDS]; + }; + union { + u8 d_addr[IP_V6_ADDR_LEN]; + u32 d_ip[IP_V6_ADDR_LEN_WORDS]; + }; u16 s_port; u16 d_port; } connection_info_t; @@ -187,6 +193,14 @@ typedef struct http2_grpc_request { tp_info_t tp; } http2_grpc_request_t; +// When sock_msg is installed it disables the kprobes attached to tcp_sendmsg. +// We use this data structure to provide the buffer to the tcp_sendmsg logic, +// because we can't read the bvec physical pages. +typedef struct msg_buffer { + u8 buf[KPROBES_HTTP2_BUF_SIZE]; + u16 pos; +} msg_buffer_t; + // Force emitting struct http_request_trace into the ELF for automatic creation of Golang struct const http_info_t *unused __attribute__((unused)); const http2_grpc_request_t *unused_http2 __attribute__((unused)); @@ -244,6 +258,19 @@ static __always_inline void sort_connection_info(connection_info_t *info) { } } +// Equivalent to sort_connection_info, but works only with the ports key (egress_key_t), +// which we use for egress connection tracking +static __always_inline void sort_egress_key(egress_key_t *info) { + if (likely_ephemeral_port(info->s_port) && !likely_ephemeral_port(info->d_port)) { + return; + } + + if ((likely_ephemeral_port(info->d_port) && !likely_ephemeral_port(info->s_port)) || + (info->d_port > info->s_port)) { + __SWAP(u16, info->s_port, info->d_port); + } +} + static __always_inline bool client_call(connection_info_t *info) { return likely_ephemeral_port(info->s_port) && !likely_ephemeral_port(info->d_port); } diff --git a/bpf/k_tracer.h b/bpf/k_tracer.h index ec809aeba..dc5cb8490 100644 --- a/bpf/k_tracer.h +++ b/bpf/k_tracer.h @@ -38,11 +38,6 @@ struct { __type(value, recv_args_t); } active_recv_args SEC(".maps"); -typedef struct send_args { - pid_connection_info_t p_conn; - u64 size; -} send_args_t; - struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __uint(max_entries, MAX_CONCURRENT_REQUESTS); @@ -256,29 +251,6 @@ int BPF_KRETPROBE(kretprobe_sys_connect, int fd) { // Main HTTP read and write operations are handled with tcp_sendmsg and tcp_recvmsg -static __always_inline void *is_ssl_connection(u64 id) { - void *ssl = 0; - // Checks if it's sandwitched between active SSL handshake, read or write uprobe/uretprobe - void **s = bpf_map_lookup_elem(&active_ssl_handshakes, &id); - if (s) { - ssl = *s; - } else { - ssl_args_t *ssl_args = bpf_map_lookup_elem(&active_ssl_read_args, &id); - if (!ssl_args) { - ssl_args = bpf_map_lookup_elem(&active_ssl_write_args, &id); - } - if (ssl_args) { - ssl = (void *)ssl_args->ssl; - } - } - - return ssl; -} - -static __always_inline void *is_active_ssl(pid_connection_info_t *conn) { - return bpf_map_lookup_elem(&active_ssl_connections, conn); -} - // The size argument here will be always the total response size. // However, the return value of tcp_sendmsg tells us how much it sent. When the // response is large it will get chunked, so we have to use a kretprobe to @@ -302,6 +274,11 @@ int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t s u16 orig_dport = s_args.p_conn.conn.d_port; dbg_print_http_connection_info( &s_args.p_conn.conn); // commented out since GitHub CI doesn't like this call + // Create the egress key before we sort the connection info. + egress_key_t e_key = { + .d_port = s_args.p_conn.conn.d_port, + .s_port = s_args.p_conn.conn.s_port, + }; sort_connection_info(&s_args.p_conn.conn); s_args.p_conn.pid = pid_from_pid_tgid(id); @@ -313,6 +290,33 @@ int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t s u8 *buf = iovec_memory(); if (buf) { size = read_msghdr_buf(msg, buf, size); + // If a sock_msg program is installed, this kprobe will fail to + // read anything, because the data is in bvec physical pages. However, + // the sock_msg will setup a buffer for us if this is the case. We + // look up this buffer and use it instead of what we'd get from + // calling read_msghdr_buf. + if (!size) { + msg_buffer_t *m_buf = bpf_map_lookup_elem(&msg_buffers, &e_key); + bpf_dbg_printk("No size, m_buf[%llx]", m_buf); + if (m_buf) { + buf = m_buf->buf; + // The buffer setup for us by a sock_msg program is always the + // full buffer, but when we extend a packet to be able to inject + // a Traceparent field, it will actually be split in 3 chunks: + // [before the injected header],[70 bytes for 'Traceparent...'],[the rest]. + // We don't want the handle_buf_with_connection logic to run more than + // once on the same data, so if we find a buf we send all of it to the + // handle_buf_with_connection logic and then mark it as seen by making + // m_buf->pos be the size of the buffer. + if (!m_buf->pos) { + size = sizeof(m_buf->buf); + m_buf->pos = size; + bpf_dbg_printk("msg_buffer: size %d, buf[%s]", size, buf); + } else { + size = 0; + } + } + } if (size) { u64 sock_p = (u64)sk; bpf_map_update_elem(&active_send_args, &id, &s_args, BPF_ANY); diff --git a/bpf/k_tracer_defs.h b/bpf/k_tracer_defs.h index 1ba76da30..30025aa66 100644 --- a/bpf/k_tracer_defs.h +++ b/bpf/k_tracer_defs.h @@ -11,6 +11,12 @@ #include "protocol_http.h" #include "protocol_http2.h" #include "protocol_tcp.h" +#include "tc_common.h" + +typedef struct send_args { + pid_connection_info_t p_conn; + u64 size; +} send_args_t; struct bpf_map_def SEC("maps") jump_table = { .type = BPF_MAP_TYPE_PROG_ARRAY, @@ -19,13 +25,19 @@ struct bpf_map_def SEC("maps") jump_table = { .max_entries = 8, }; +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __type(key, egress_key_t); + __type(value, msg_buffer_t); + __uint(max_entries, 1000); + __uint(pinning, BEYLA_PIN_INTERNAL); +} msg_buffers SEC(".maps"); + #define TAIL_PROTOCOL_HTTP 0 #define TAIL_PROTOCOL_HTTP2 1 #define TAIL_PROTOCOL_TCP 2 static __always_inline void handle_buf_with_args(void *ctx, call_protocol_args_t *args) { - bpf_probe_read(args->small_buf, MIN_HTTP2_SIZE, (void *)args->u_buf); - bpf_dbg_printk( "buf=[%s], pid=%d, len=%d", args->small_buf, args->pid_conn.pid, args->bytes_len); @@ -42,8 +54,48 @@ static __always_inline void handle_buf_with_args(void *ctx, call_protocol_args_t } else { // large request tracking http_info_t *info = bpf_map_lookup_elem(&ongoing_http, &args->pid_conn); - if (info && still_responding(info)) { - info->end_monotime_ns = bpf_ktime_get_ns(); + if (info) { + // Still reading checks if we are processing buffers of a HTTP request + // that has started, but we haven't seen a response yet. + if (still_reading(info)) { + // Packets are split into chunks if Beyla injected the Traceparent + // Make sure you look for split packets containing the real Traceparent. + // Essentially, when a packet is extended by our sock_msg program and + // passed down another service, the receiving side may reassemble the + // packets into one buffer or not. If they are reassembled, then the + // call to bpf_tail_call(ctx, &jump_table, TAIL_PROTOCOL_HTTP); will + // scan for the incoming 'Traceparent' header. If they are not reassembled + // we'll see something like this: + // [before the injected header],[70 bytes for 'Traceparent...'],[the rest]. + if (is_traceparent(args->small_buf)) { + unsigned char *buf = tp_char_buf(); + if (buf) { + bpf_probe_read(buf, EXTEND_SIZE, (u8 *)args->u_buf); + bpf_dbg_printk("Found traceparent %s", buf); + unsigned char *t_id = extract_trace_id(buf); + unsigned char *s_id = extract_span_id(buf); + unsigned char *f_id = extract_flags(buf); + + decode_hex(info->tp.trace_id, t_id, TRACE_ID_CHAR_LEN); + decode_hex((unsigned char *)&info->tp.flags, f_id, FLAGS_CHAR_LEN); + decode_hex(info->tp.parent_id, s_id, SPAN_ID_CHAR_LEN); + + trace_key_t t_key = {0}; + task_tid(&t_key.p_key); + t_key.extra_id = extra_runtime_id(); + + tp_info_pid_t *existing = bpf_map_lookup_elem(&server_traces, &t_key); + if (existing) { + __builtin_memcpy(&existing->tp, &info->tp, sizeof(tp_info_t)); + set_trace_info_for_connection(&args->pid_conn.conn, existing); + } else { + bpf_dbg_printk("Didn't find existing trace, this might be a bug!"); + } + } + } + } else if (still_responding(info)) { + info->end_monotime_ns = bpf_ktime_get_ns(); + } } else if (!info) { // SSL requests will see both TCP traffic and text traffic, ignore the TCP if // we are processing SSL request. HTTP2 is already checked in handle_buf_with_connection. @@ -87,7 +139,7 @@ static __always_inline void handle_buf_with_connection(void *ctx, } __builtin_memcpy(&args->pid_conn, pid_conn, sizeof(pid_connection_info_t)); - + bpf_probe_read(args->small_buf, MIN_HTTP2_SIZE, (void *)args->u_buf); handle_buf_with_args(ctx, args); } diff --git a/bpf/pid.h b/bpf/pid.h index 0dceb74e9..2f3c50191 100644 --- a/bpf/pid.h +++ b/bpf/pid.h @@ -34,9 +34,8 @@ static __always_inline u8 pid_matches(pid_key_t *p) { u64 k = (((u64)p->ns) << 32) | p->pid; // combine the namespace id and the pid into one single u64 - u32 h = - (u32)(k % - PRIME_HASH); // divide with prime number lower than max pids * 64, modulo with primes gives good hash functions + // divide with prime number lower than max pids * 64, modulo with primes gives good hash functions + u32 h = (u32)(k % PRIME_HASH); u32 segment = h / 64; // divide by the segment size (8 bytes) to find the segment u32 bit = h & 63; // lowest 64 bits gives us the placement inside the segment diff --git a/bpf/protocol_defs.h b/bpf/protocol_defs.h index e0292ef72..e9d8c03ff 100644 --- a/bpf/protocol_defs.h +++ b/bpf/protocol_defs.h @@ -6,6 +6,7 @@ #define AF_INET6 10 /* IP version 6 */ #define IP_V6_ADDR_LEN 16 +#define IP_V6_ADDR_LEN_WORDS 4 // Most Linux distros use 32768 to 61000 for the ephemeral ports, so we look up from 32768 // IANA suggests that the range should be 49152-65535, which is what Windows uses diff --git a/bpf/protocol_http2.h b/bpf/protocol_http2.h index 0fa4afea8..57969cd53 100644 --- a/bpf/protocol_http2.h +++ b/bpf/protocol_http2.h @@ -110,6 +110,7 @@ static __always_inline void process_http2_grpc_frames(pid_connection_info_t *pid int saved_buf_pos = 0; u8 found_data_frame = 0; http2_conn_stream_t stream = {0}; + stream.pid_conn = *pid_conn; unsigned char frame_buf[FRAME_HEADER_LEN]; frame_header_t frame = {0}; @@ -124,7 +125,6 @@ static __always_inline void process_http2_grpc_frames(pid_connection_info_t *pid //bpf_dbg_printk("http2 frame type = %d, len = %d, stream_id = %d, flags = %d", frame.type, frame.length, frame.stream_id, frame.flags); if (is_headers_frame(&frame)) { - stream.pid_conn = *pid_conn; stream.stream_id = frame.stream_id; if (!prev_info) { prev_info = bpf_map_lookup_elem(&ongoing_http2_grpc, &stream); diff --git a/bpf/tc_common.h b/bpf/tc_common.h new file mode 100644 index 000000000..a94b3269f --- /dev/null +++ b/bpf/tc_common.h @@ -0,0 +1,112 @@ +#ifndef TC_COMMON_H +#define TC_COMMON_H + +#include "vmlinux.h" +#include "bpf_helpers.h" + +enum { MAX_INLINE_LEN = 0x3ff }; + +const char TP[] = "Traceparent: 00-00000000000000000000000000000000-0000000000000000-01\r\n"; +const u32 EXTEND_SIZE = sizeof(TP) - 1; +const char TP_PREFIX[] = "Traceparent: "; +const u32 TP_PREFIX_SIZE = sizeof(TP_PREFIX) - 1; + +static __always_inline unsigned char * +memchar(unsigned char *haystack, char needle, const unsigned char *end, u32 size) { + for (u32 i = 0; i < size; ++i) { + if (&haystack[i] >= end) { + break; + } + + if (haystack[i] == needle) { + return &haystack[i]; + } + } + + return 0; +} + +static __always_inline unsigned char * +find_first_of(unsigned char *begin, unsigned char *end, char ch) { + return memchar(begin, ch, end, MAX_INLINE_LEN); +} + +static __always_inline int +memchar_pos(unsigned const char *haystack, char needle, const unsigned char *end, u32 size) { + for (u32 i = 0; i < size; ++i) { + if (&haystack[i] >= end) { + break; + } + + if (haystack[i] == needle) { + return i; + } + } + + return -1; +} + +static __always_inline int find_first_pos_of(unsigned char *begin, unsigned char *end, char ch) { + return memchar_pos(begin, ch, end, MAX_INLINE_LEN); +} + +static __always_inline void *ctx_data(struct __sk_buff *ctx) { + void *data; + + asm("%[res] = *(u32 *)(%[base] + %[offset])" + : [res] "=r"(data) + : [base] "r"(ctx), [offset] "i"(offsetof(struct __sk_buff, data)), "m"(*ctx)); + + return data; +} + +static __always_inline void *ctx_data_end(struct __sk_buff *ctx) { + void *data_end; + + asm("%[res] = *(u32 *)(%[base] + %[offset])" + : [res] "=r"(data_end) + : [base] "r"(ctx), [offset] "i"(offsetof(struct __sk_buff, data_end)), "m"(*ctx)); + + return data_end; +} + +static __always_inline void +sk_msg_read_remote_ip6(struct sk_msg_md *ctx, u32 *res) { //NOLINT(readability-non-const-parameter) + asm("%[res0] = *(u32 *)(%[base] + %[offset] + 0)\n" + "%[res1] = *(u32 *)(%[base] + %[offset] + 4)\n" + "%[res2] = *(u32 *)(%[base] + %[offset] + 8)\n" + "%[res3] = *(u32 *)(%[base] + %[offset] + 12)\n" + : [res0] "=r"(res[0]), [res1] "=r"(res[1]), [res2] "=r"(res[2]), [res3] "=r"(res[3]) + : [base] "r"(ctx), [offset] "i"(offsetof(struct sk_msg_md, remote_ip6)), "m"(*ctx)); +} + +static __always_inline void +sk_msg_read_local_ip6(struct sk_msg_md *ctx, u32 *res) { //NOLINT(readability-non-const-parameter) + asm("%[res0] = *(u32 *)(%[base] + %[offset] + 0)\n" + "%[res1] = *(u32 *)(%[base] + %[offset] + 4)\n" + "%[res2] = *(u32 *)(%[base] + %[offset] + 8)\n" + "%[res3] = *(u32 *)(%[base] + %[offset] + 12)\n" + : [res0] "=r"(res[0]), [res1] "=r"(res[1]), [res2] "=r"(res[2]), [res3] "=r"(res[3]) + : [base] "r"(ctx), [offset] "i"(offsetof(struct sk_msg_md, local_ip6)), "m"(*ctx)); +} + +static __always_inline u32 sk_msg_remote_port(struct sk_msg_md *ctx) { + u32 data; + + asm("%[res] = *(u32 *)(%[base] + %[offset])" + : [res] "=r"(data) + : [base] "r"(ctx), [offset] "i"(offsetof(struct sk_msg_md, remote_port)), "m"(*ctx)); + + return data; +} + +static __always_inline u32 sk_msg_local_port(struct sk_msg_md *ctx) { + u32 data; + + asm("%[res] = *(u32 *)(%[base] + %[offset])" + : [res] "=r"(data) + : [base] "r"(ctx), [offset] "i"(offsetof(struct sk_msg_md, local_port)), "m"(*ctx)); + + return data; +} +#endif diff --git a/bpf/tc_http_tp.c b/bpf/tc_http_tp.c index 625f4fda5..9e6f51d28 100644 --- a/bpf/tc_http_tp.c +++ b/bpf/tc_http_tp.c @@ -7,12 +7,12 @@ #include "http_types.h" #include "tcp_info.h" #include "tracing.h" +#include "tc_common.h" char __license[] SEC("license") = "Dual MIT/GPL"; enum { TC_ACT_OK = 0, TC_ACT_RECLASSIFY = 1, TC_ACT_SHOT = 2 }; enum { MAX_IP_PACKET_SIZE = 0x7fff }; -enum { MAX_INLINE_LEN = 0x3ff }; enum connection_state { ESTABLISHED, FIN_WAIT_1, FIN_WAIT_2, CLOSING, CLOSE_WAIT, LAST_ACK }; @@ -50,9 +50,6 @@ struct tc_http_ctx_map { __uint(max_entries, 10240); } tc_http_ctx_map SEC(".maps"); -const char TP[] = "Traceparent: 00-0123456789ABCDEFGHIJKLMNOPQRSTUV-0123456789ABCDEF-XX\r\n"; -const u32 EXTEND_SIZE = sizeof(TP) - 1; - struct datasum_loop_ctx { const unsigned char *b; const unsigned char *e; @@ -241,26 +238,6 @@ make_tp_string_skb(unsigned char *buf, const tp_info_t *tp, const unsigned char *buf++ = '\n'; } -static __always_inline void *ctx_data(struct __sk_buff *ctx) { - void *data; - - asm("%[res] = *(u32 *)(%[base] + %[offset])" - : [res] "=r"(data) - : [base] "r"(ctx), [offset] "i"(offsetof(struct __sk_buff, data)), "m"(*ctx)); - - return data; -} - -static __always_inline void *ctx_data_end(struct __sk_buff *ctx) { - void *data_end; - - asm("%[res] = *(u32 *)(%[base] + %[offset])" - : [res] "=r"(data_end) - : [base] "r"(ctx), [offset] "i"(offsetof(struct __sk_buff, data_end)), "m"(*ctx)); - - return data_end; -} - __attribute__((unused)) static __always_inline struct ethhdr *eth_header(struct __sk_buff *ctx) { void *data = ctx_data(ctx); @@ -385,21 +362,6 @@ static __always_inline int is_http_request(struct __sk_buff *ctx) { return is_http_request_buf(req_buf); } -static __always_inline unsigned char * -memchar(unsigned char *haystack, char needle, const unsigned char *end, u32 size) { - for (u32 i = 0; i < size; ++i) { - if (&haystack[i] >= end) { - break; - } - - if (haystack[i] == needle) { - return &haystack[i]; - } - } - - return NULL; -} - struct memmove_loop_ctx { unsigned char *dst; unsigned char *src; @@ -444,11 +406,6 @@ move_data(unsigned char *dst, unsigned char *src, const unsigned char *end, u32 bpf_loop(size, memmove_loop, &memmove_loop_ctx, 0); } -static __always_inline unsigned char * -find_first_of(unsigned char *begin, unsigned char *end, char ch) { - return memchar(begin, ch, end, MAX_INLINE_LEN); -} - // TAIL_PROTOCOL_HTTP SEC("tc/http") void extend_skb(struct __sk_buff *ctx) { diff --git a/bpf/tc_ip.h b/bpf/tc_ip.h index c43289638..12b17c8de 100644 --- a/bpf/tc_ip.h +++ b/bpf/tc_ip.h @@ -9,11 +9,10 @@ enum { MIN_IP_LEN = 20, MAX_TC_TP_LEN = 20, TC_TP_ID = 0x1488, MAX_IPV6_OPTS_LEN = 24 }; -static __always_inline void populate_span_id_from_tcp_info(tp_info_pid_t *new_tp, - protocol_info_t *tcp) { +static __always_inline void populate_span_id_from_tcp_info(tp_info_t *tp, protocol_info_t *tcp) { // We use a combination of the TCP sequence + TCP ack as a SpanID - *((u32 *)(&new_tp->tp.span_id[0])) = tcp->seq; - *((u32 *)(&new_tp->tp.span_id[4])) = tcp->ack; + *((u32 *)(&tp->span_id[0])) = tcp->seq; + *((u32 *)(&tp->span_id[4])) = tcp->ack; } static __always_inline void print_tp(tp_info_pid_t *new_tp) { @@ -41,7 +40,7 @@ parse_ip_options_ipv4(struct __sk_buff *skb, connection_info_t *conn, protocol_i if (!existing_tp) { bpf_dbg_printk("Found tp context in opts! ihl = %d", tcp->ip_len); tp_info_pid_t new_tp = {.pid = 0, .valid = 1}; - populate_span_id_from_tcp_info(&new_tp, tcp); + populate_span_id_from_tcp_info(&new_tp.tp, tcp); // We load the TraceID from the IP options field. We skip two bytes for the key 0x88 + len (2 bytes) bpf_skb_load_bytes( @@ -65,7 +64,7 @@ parse_ip_options_ipv6(struct __sk_buff *skb, connection_info_t *conn, protocol_i tp_info_pid_t *existing_tp = (tp_info_pid_t *)bpf_map_lookup_elem(&incoming_trace_map, conn); if (!existing_tp) { tp_info_pid_t new_tp = {.pid = 0, .valid = 1}; - populate_span_id_from_tcp_info(&new_tp, tcp); + populate_span_id_from_tcp_info(&new_tp.tp, tcp); // Skip the first 4 bytes (next header, len, dest option, dest len) int ip_off = tcp->ip_len + 4; diff --git a/bpf/tc_sock.h b/bpf/tc_sock.h new file mode 100644 index 000000000..b90a9050f --- /dev/null +++ b/bpf/tc_sock.h @@ -0,0 +1,289 @@ +#ifndef TC_SOCK_H +#define TC_SOCK_H + +#include "vmlinux.h" +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "http_types.h" +#include "tc_common.h" +#include "bpf_dbg.h" +#include "tracing.h" +#include "http_ssl_defs.h" +#include "k_tracer_defs.h" + +#define SOCKOPS_MAP_SIZE 65535 + +// A map of sockets which we track with sock_ops. The sock_msg +// program subscribes to this map and runs for each new socket +// activity +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, SOCKOPS_MAP_SIZE); + __uint(key_size, sizeof(connection_info_t)); + __uint(value_size, sizeof(uint32_t)); +} sock_dir SEC(".maps"); + +// When we split a packet with the sock_msg program to inject +// the Traceparent field, we need to keep track of what's +// written by the Traffic Control probes. +typedef struct tc_http_ctx { + u32 offset; // where inside the original packet we saw '\n` + u32 seen; // how many bytes we've seen before the offset + u32 written; // how many of the Traceparent field we've written +} __attribute__((packed)) tc_http_ctx_t; + +// A map that keeps all the HTTP packets we've extended with +// the sock_msg program and that Traffic Control needs to write to. +struct tc_http_ctx_map { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __type(key, u32); + __type(value, struct tc_http_ctx); + __uint(max_entries, 10240); +} tc_http_ctx_map SEC(".maps"); + +// Memory buffer and a map bellow as temporary storage for +// the sock_msg buffer which we use to look for the first '\n' +// in the request header +typedef struct msg_data { + u8 buf[1024]; +} msg_data_t; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, msg_data_t); + __uint(max_entries, 1); +} buf_mem SEC(".maps"); + +// Extracts what we need for connection_info_t from bpf_sock_ops if the +// communication is IPv4 +static __always_inline void sk_ops_extract_key_ip4(struct bpf_sock_ops *ops, + connection_info_t *conn) { + __builtin_memcpy(conn->s_addr, ip4ip6_prefix, sizeof(ip4ip6_prefix)); + conn->s_ip[3] = ops->local_ip4; + __builtin_memcpy(conn->d_addr, ip4ip6_prefix, sizeof(ip4ip6_prefix)); + conn->d_ip[3] = ops->remote_ip4; + + conn->s_port = ops->local_port; + conn->d_port = bpf_ntohl(ops->remote_port); +} + +// Extracts what we need for connection_info_t from bpf_sock_ops if the +// communication is IPv6 +// The order of copying the data from bpf_sock_ops matters and must match how +// the struct is laid in vmlinux.h, otherwise the verifier thinks we are modifying +// the context twice. +static __always_inline void sk_ops_extract_key_ip6(struct bpf_sock_ops *ops, + connection_info_t *conn) { + conn->d_ip[0] = ops->remote_ip6[0]; + conn->d_ip[1] = ops->remote_ip6[1]; + conn->d_ip[2] = ops->remote_ip6[2]; + conn->d_ip[3] = ops->remote_ip6[3]; + conn->s_ip[0] = ops->local_ip6[0]; + conn->s_ip[1] = ops->local_ip6[1]; + conn->s_ip[2] = ops->local_ip6[2]; + conn->s_ip[3] = ops->local_ip6[3]; + + conn->d_port = bpf_ntohl(ops->remote_port); + conn->s_port = ops->local_port; +} + +// Extracts what we need for connection_info_t from sk_msg_md if the +// communication is IPv4 +static __always_inline void sk_msg_extract_key_ip4(struct sk_msg_md *msg, connection_info_t *conn) { + __builtin_memcpy(conn->s_addr, ip4ip6_prefix, sizeof(ip4ip6_prefix)); + conn->s_ip[3] = msg->local_ip4; + __builtin_memcpy(conn->d_addr, ip4ip6_prefix, sizeof(ip4ip6_prefix)); + conn->d_ip[3] = msg->remote_ip4; + + conn->s_port = msg->local_port; + conn->d_port = bpf_ntohl(msg->remote_port); +} + +// Extracts what we need for connection_info_t from sk_msg_md if the +// communication is IPv6 +// The order of copying the data from bpf_sock_ops matters and must match how +// the struct is laid in vmlinux.h, otherwise the verifier thinks we are modifying +// the context twice. +__attribute__((__unused__)) static __always_inline void +sk_msg_extract_key_ip6(struct sk_msg_md *msg, connection_info_t *conn) { + sk_msg_read_remote_ip6(msg, conn->d_ip); + sk_msg_read_local_ip6(msg, conn->s_ip); + + conn->d_port = bpf_ntohl(sk_msg_remote_port(msg)); + conn->s_port = sk_msg_local_port(msg); +} + +// Helper that writes in the sock map for a sock_ops program +static __always_inline void bpf_sock_ops_establish_cb(struct bpf_sock_ops *skops) { + connection_info_t conn = {}; + + if (skops->family == AF_INET6) { + sk_ops_extract_key_ip6(skops, &conn); + } else { + sk_ops_extract_key_ip4(skops, &conn); + } + + bpf_printk("SET %llx:%d -> %llx:%d", conn.s_ip[3], conn.s_port, conn.d_ip[3], conn.d_port); + bpf_sock_hash_update(skops, &sock_dir, &conn, BPF_ANY); +} + +// Tracks all outgoing sockets (BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) +// We don't track incoming, those would be BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB +SEC("sockops") +int sockmap_tracker(struct bpf_sock_ops *skops) { + u32 op = skops->op; + + switch (op) { + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + bpf_sock_ops_establish_cb(skops); + break; + default: + break; + } + return 0; +} + +// Just a buffer +static __always_inline msg_data_t *buffer() { + int zero = 0; + return (msg_data_t *)bpf_map_lookup_elem(&buf_mem, &zero); +} + +// This is setup here for Go tracking. Essentially, when the Go userspace +// probes activate for an outgoing HTTP request they setup this +// outgoing_trace_map for us. We then know this is a connection we should +// be injecting the Traceparent in. Another place which sets up this map is +// the kprobe on tcp_sendmsg, however that happens after the sock_msg runs, +// so we have a different detection for that - protocol_detector. +static __always_inline u8 is_tracked(connection_info_t *conn) { + egress_key_t e_key = { + .d_port = conn->d_port, + .s_port = conn->s_port, + }; + + sort_egress_key(&e_key); + + tp_info_pid_t *tp = bpf_map_lookup_elem(&outgoing_trace_map, &e_key); + return tp != 0; +} + +// This code is copied from the kprobe on tcp_sendmsg and it's called from +// the sock_msg program, which does the packet extension for injecting the +// Traceparent. Since the sock_msg runs before the kprobe on tcp_sendmsg, we +// need to extend the packet before we'll have the opportunity to setup the +// outgoing_trace_map metadata. We can directly perhaps run the same code that +// the kprobe on tcp_sendmsg does, but it's complicated, no tail calls from +// sock_msg programs and inlining will eventually hit us with the instruction +// limit when we eventually add HTTP2/gRPC support. +static __always_inline u8 protocol_detector(struct sk_msg_md *msg, + u64 id, + connection_info_t *conn) { + bpf_dbg_printk("=== [protocol detector] %d size %d===", id, msg->size); + + egress_key_t e_key = { + .d_port = conn->d_port, + .s_port = conn->s_port, + }; + + send_args_t s_args = {.size = msg->size}; + __builtin_memcpy(&s_args.p_conn.conn, conn, sizeof(connection_info_t)); + + dbg_print_http_connection_info(&s_args.p_conn.conn); + sort_connection_info(&s_args.p_conn.conn); + s_args.p_conn.pid = pid_from_pid_tgid(id); + + void *ssl = is_ssl_connection(id); + if (s_args.size > 0) { + if (!ssl) { + void *active_ssl = is_active_ssl(&s_args.p_conn); + if (!active_ssl) { + msg_buffer_t msg_buf = { + .pos = 0, + }; + bpf_probe_read_kernel(msg_buf.buf, FULL_BUF_SIZE, msg->data); + // We setup any call that looks like HTTP request to be extended. + // This must match exactly to what the decision will be for + // the kprobe program on tcp_sendmsg, which sets up the + // outgoing_trace_map data used by Traffic Control to write the + // actual 'Traceparent:...' string. + if (is_http_request_buf((const unsigned char *)msg_buf.buf)) { + bpf_dbg_printk("Setting up request to be extended"); + bpf_map_update_elem(&msg_buffers, &e_key, &msg_buf, BPF_ANY); + + return 1; + } + } + } + } + + return 0; +} + +// Sock_msg program which detects packets where it should add space for +// the 'Traceparent' string. It doesn't write the value, only spaces the packet +// for Traffic Control to do the writing. +SEC("sk_msg") +int packet_extender(struct sk_msg_md *msg) { + u64 id = bpf_get_current_pid_tgid(); + connection_info_t conn = {}; + + if (msg->family == AF_INET6) { + sk_msg_extract_key_ip6(msg, &conn); + } else { + sk_msg_extract_key_ip4(msg, &conn); + } + u8 tracked = is_tracked(&conn); + + // We need two types of checks here. Valid PID only works for kprobes since + // Go programs don't add their PIDs to the PID map (we instrument the + // binaries). Tracked means that we have metadata setup by the Go uprobes + // telling us we should extend this packet. + if (!valid_pid(id) && !tracked) { + return SK_PASS; + } + + bpf_dbg_printk("MSG %llx:%d ->", conn.s_ip[3], conn.s_port); + bpf_dbg_printk("MSG TO %llx:%d", conn.d_ip[3], conn.d_port); + + msg_data_t *msg_data = buffer(); + if (!msg_data) { + return SK_PASS; + } + bpf_msg_pull_data(msg, 0, 1024, 0); + + if (!tracked) { + // If we didn't have metadata (sock_msg runs before the kprobe), + // we ensure to mark it for any packet we want to extend. + tracked = protocol_detector(msg, id, &conn); + } + + u64 len = (u64)msg->data_end - (u64)msg->data; + if (tracked && len > MIN_HTTP_SIZE) { + bpf_probe_read_kernel(msg_data->buf, 1024, msg->data); + bpf_dbg_printk("len %d, s_port %d, buf: %s", len, msg->local_port, msg_data->buf); + + int newline_pos = find_first_pos_of(msg_data->buf, &msg_data->buf[1023], '\n'); + + if (newline_pos >= 0) { + newline_pos++; + // Push extends the packet with empty space and sets up the + // metadata for Traffic Control to finish the writing + if (!bpf_msg_push_data(msg, newline_pos, EXTEND_SIZE, 0)) { + tc_http_ctx_t ctx = { + .offset = newline_pos, + .seen = 0, + .written = 0, + }; + u32 port = msg->local_port; + + bpf_map_update_elem(&tc_http_ctx_map, &port, &ctx, BPF_ANY); + } + bpf_dbg_printk("offset to split %d", newline_pos); + } + } + + return SK_PASS; +} + +#endif diff --git a/bpf/tc_tracer.c b/bpf/tc_tracer.c index e921f11fa..c146d1ac2 100644 --- a/bpf/tc_tracer.c +++ b/bpf/tc_tracer.c @@ -7,6 +7,8 @@ #include "go_shared.h" #include "tc_ip.h" #include "tcp_info.h" +#include "tc_sock.h" +#include "tc_tracer_l7.h" char __license[] SEC("license") = "Dual MIT/GPL"; @@ -34,15 +36,14 @@ int app_ingress(struct __sk_buff *skb) { return 0; } -static __always_inline void update_outgoing_request_span_id(connection_info_t *conn, +static __always_inline void update_outgoing_request_span_id(pid_connection_info_t *p_conn, protocol_info_t *tcp, tp_info_pid_t *tp, egress_key_t *e_key) { - http_info_t *h_info = bpf_map_lookup_elem(&ongoing_http, e_key); + http_info_t *h_info = bpf_map_lookup_elem(&ongoing_http, p_conn); if (h_info && tp->valid) { bpf_dbg_printk("Found HTTP info, resetting the span id to %x%x", tcp->seq, tcp->ack); - *((u32 *)(&h_info->tp.span_id[0])) = tcp->seq; - *((u32 *)(&h_info->tp.span_id[4])) = tcp->ack; + populate_span_id_from_tcp_info(&h_info->tp, tcp); } go_addr_key_t *g_key = bpf_map_lookup_elem(&ongoing_go_http, e_key); @@ -53,9 +54,7 @@ static __always_inline void update_outgoing_request_span_id(connection_info_t *c if (invocation) { bpf_dbg_printk( "Found Go HTTP invocation, resetting the span id to %x%x", tcp->seq, tcp->ack); - - *((u32 *)(&invocation->tp.span_id[0])) = tcp->seq; - *((u32 *)(&invocation->tp.span_id[4])) = tcp->ack; + populate_span_id_from_tcp_info(&invocation->tp, tcp); } } } @@ -70,34 +69,30 @@ static __always_inline void encode_data_in_ip_options(struct __sk_buff *skb, if (tcp->h_proto == ETH_P_IP && tcp->ip_len == MIN_IP_LEN) { bpf_dbg_printk("Adding the trace_id in the IP Options"); - if (inject_tc_ip_options_ipv4(skb, conn, tcp, tp)) { - update_outgoing_request_span_id(conn, tcp, tp, e_key); - } - - bpf_map_delete_elem(&outgoing_trace_map, e_key); + inject_tc_ip_options_ipv4(skb, conn, tcp, tp); + tp->valid = 0; } else if (tcp->h_proto == ETH_P_IPV6 && tcp->l4_proto == IPPROTO_TCP) { // Handling IPv6 bpf_dbg_printk("Found IPv6 header"); - if (inject_tc_ip_options_ipv6(skb, conn, tcp, tp)) { - update_outgoing_request_span_id(conn, tcp, tp, e_key); - } - - bpf_map_delete_elem(&outgoing_trace_map, e_key); + inject_tc_ip_options_ipv6(skb, conn, tcp, tp); + tp->valid = 0; } } SEC("tc_egress") int app_egress(struct __sk_buff *skb) { //bpf_printk("egress"); - protocol_info_t tcp = {}; connection_info_t conn = {}; + pid_connection_info_t p_conn = {}; if (!read_sk_buff(skb, &tcp, &conn)) { return 0; } - sort_connection_info(&conn); + __builtin_memcpy(&p_conn.conn, &conn, sizeof(connection_info_t)); + sort_connection_info(&p_conn.conn); + egress_key_t e_key = { .d_port = conn.d_port, .s_port = conn.s_port, @@ -105,11 +100,39 @@ int app_egress(struct __sk_buff *skb) { tp_info_pid_t *tp = bpf_map_lookup_elem(&outgoing_trace_map, &e_key); + // We look up metadata setup by the Go uprobes or the kprobes on + // a transaction we consider outgoing HTTP request. We will extend this in + // the future for other protocols, e.g. gRPC/HTTP2. + // The metadata always comes setup with the state field valid = 1, which + // means we haven't seen this request yet. if (tp) { - bpf_dbg_printk("egress flags %x, sequence %x", tcp.flags, tcp.seq); + p_conn.pid = tp->pid; + bpf_dbg_printk("egress flags %x, sequence %x, valid %d", tcp.flags, tcp.seq, tp->valid); dbg_print_http_connection_info(&conn); - encode_data_in_ip_options(skb, &conn, &tcp, tp, &e_key); + // If it's the fist packet of an request: + // We set the span information to match our TCP information. This + // is done for L4 context propagation, where we use the SEQ/ACK + // numbers for the Span ID. Since this is the first time we see + // these SEQ,ACK ids, we update the random Span ID the metadata has + // to match what we send over the wire. + if (tp->valid == 1) { + populate_span_id_from_tcp_info(&tp->tp, &tcp); + update_outgoing_request_span_id(&p_conn, &tcp, tp, &e_key); + // We set valid to 2, so we only run this once, the later packets + // will have different SEQ/ACK. + tp->valid = 2; + } + // L7 app egress runs on every packet, the packets will be split so + // it needs to do work across all packets that go out for this request + l7_app_egress(skb, tp, &conn, &tcp, &e_key); + + // The following code sets up the context information in L4 and it + // does it only once. If it successfully injected the information it + // will set valid to 0 so that we only run the L7 part from now on. + if (tp->valid) { + encode_data_in_ip_options(skb, &conn, &tcp, tp, &e_key); + } } return 0; diff --git a/bpf/tc_tracer_l7.h b/bpf/tc_tracer_l7.h new file mode 100644 index 000000000..3222e09a1 --- /dev/null +++ b/bpf/tc_tracer_l7.h @@ -0,0 +1,295 @@ +#ifndef _TC_TRACER_L7 +#define _TC_TRACER_L7 + +#include "utils.h" +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "http_types.h" +#include "tc_common.h" +#include "tc_sock.h" +#include "bpf_dbg.h" +#include "tracing.h" + +#define BPF_F_CURRENT_NETNS (-1) + +// Temporary memory we'll use to make the 'Traceparent: ...' value. +typedef struct tp_buf_data { + u8 buf[256]; +} tp_buf_data_t; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, tp_buf_data_t); + __uint(max_entries, 1); +} tp_buf_memory SEC(".maps"); + +// We use this helper to read in the connection tuple information in the +// bpf_sock_tuple format. We use this struct to add sockets which are +// established before we launched Beyla, since we'll not see them in the +// sock_ops program which tracks them. +static __always_inline struct bpf_sock_tuple * +get_tuple(void *data, __u64 nh_off, void *data_end, __u16 eth_proto, bool *ipv4) { + struct bpf_sock_tuple *result; + __u64 ihl_len = 0; + __u8 proto = 0; + + if (eth_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)(data + nh_off); + + if ((void *)(iph + 1) > data_end) { + return 0; + } + + ihl_len = iph->ihl * 4; + proto = iph->protocol; + *ipv4 = true; + result = (struct bpf_sock_tuple *)&iph->saddr; + } else if (eth_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + nh_off); + + if ((void *)(ip6h + 1) > data_end) { + return 0; + } + + ihl_len = sizeof(*ip6h); + proto = ip6h->nexthdr; + *ipv4 = true; + result = (struct bpf_sock_tuple *)&ip6h->saddr; + } + + if (data + nh_off + ihl_len > data_end || proto != IPPROTO_TCP) { + return 0; + } + + return result; +} + +// A version of __builtin_memcpy which works with a variable size +static __always_inline int buf_memcpy(char *dest, char *src, s32 size, void *end) { + u32 rem = size; + // Copy 8 bytes at a time while you can + for (int i = 0; (i < ((EXTEND_SIZE / 8) + 1)) && (rem >= 8); i++) { + if ((void *)(dest + 8) <= end) { + *(u64 *)(dest) = *(u64 *)(src); + } else { + break; + } + rem -= 8; + dest += 8; + src += 8; + } + + // Finish the remainder one by one + for (int i = 0; (i < 8) && (rem > 0); i++) { + if ((void *)(dest + 1) <= end) { + *dest = *src; + } else { + break; + } + rem--; + dest++; + src++; + } + + return 0; +} + +static __always_inline unsigned char *tp_buf_mem(tp_info_t *tp) { + int zero = 0; + tp_buf_data_t *val = (tp_buf_data_t *)bpf_map_lookup_elem(&tp_buf_memory, &zero); + + if (!val) { + return 0; + } + + __builtin_memcpy(val->buf, TP, EXTEND_SIZE); + make_tp_string(val->buf + TP_PREFIX_SIZE, tp); + + return val->buf; +} + +static __always_inline void l7_app_ctx_cleanup(egress_key_t *e_key) { + bpf_map_delete_elem(&tc_http_ctx_map, &e_key->s_port); + bpf_map_delete_elem(&outgoing_trace_map, e_key); +} + +static __always_inline struct bpf_sock *lookup_sock_from_tuple(struct __sk_buff *skb, + struct bpf_sock_tuple *tuple, + bool ipv4, + void *data_end) { + if (ipv4 && (u64)((u8 *)tuple + sizeof(tuple->ipv4)) < (u64)data_end) { + // Lookup to see if you can find a socket for this tuple in the + // kernel socket tracking. We look up in all namespaces (-1). + return bpf_sk_lookup_tcp(skb, tuple, sizeof(tuple->ipv4), BPF_F_CURRENT_NETNS, 0); + } else if (!ipv4 && (u64)((u8 *)tuple + sizeof(tuple->ipv6)) < (u64)data_end) { + return bpf_sk_lookup_tcp(skb, tuple, sizeof(tuple->ipv6), BPF_F_CURRENT_NETNS, 0); + } + + return 0; +} + +// This function does two things: +// 1. It adds sockets in the socket hash map which have already been +// established and we see them for the first time in Traffic Control, i.e +// we are using them, but they weren't seen by the sock_ops. +// 2. It writes the 'Traceparent: ...' value setup as space for us by +// the sock_msg program. +static __always_inline int l7_app_egress(struct __sk_buff *skb, + tp_info_pid_t *tp, + connection_info_t *conn, + protocol_info_t *tcp, + egress_key_t *e_key) { + bpf_skb_pull_data(skb, skb->len); + + void *data_end = ctx_data_end(skb); + void *data = ctx_data(skb); + + u32 s_port = e_key->s_port; + tc_http_ctx_t *ctx = (tc_http_ctx_t *)bpf_map_lookup_elem(&tc_http_ctx_map, &s_port); + + if (!ctx) { + struct ethhdr *eth = (struct ethhdr *)(data); + bool ipv4; + + if ((void *)(eth + 1) > data_end) { + bpf_dbg_printk("bad size"); + return 0; + } + + // Get the bpf_sock_tuple value so we can look up and see if we don't have + // this socket yet in our map. + struct bpf_sock_tuple *tuple = get_tuple(data, sizeof(*eth), data_end, eth->h_proto, &ipv4); + //bpf_printk("tuple %llx, next %llx, data end %llx", tuple, (void *)((u8 *)tuple + sizeof(*tuple)), data_end); + + if (!tuple) { + bpf_dbg_printk("bad tuple %llx, next %llx, data end %llx", + tuple, + (void *)(tuple + sizeof(struct bpf_sock_tuple)), + data_end); + } else { + struct bpf_sock *sk = lookup_sock_from_tuple(skb, tuple, ipv4, data_end); + bpf_dbg_printk("sk=%d\n", sk ? 1 : 0); + if (sk) { + bpf_dbg_printk("LOOKUP %llx:%d ->", conn->s_ip[3], conn->s_port); + bpf_dbg_printk("LOOKUP TO %llx:%d", conn->d_ip[3], conn->d_port); + + // Query the socket map to see if have added this socket. + struct bpf_sock *sk1 = (struct bpf_sock *)bpf_map_lookup_elem(&sock_dir, conn); + + // We found the socket, all good it was caught by the sock_ops, + // just release it. + if (sk1) { + bpf_dbg_printk("Found sk1 %llx", sk1); + bpf_sk_release(sk1); + } else { + // First time we see a socket, add it to the map, it will + // get tracked on the next request + bpf_map_update_elem(&sock_dir, conn, sk, BPF_NOEXIST); + } + + // We must release the reference to the original socket we looked up. + bpf_sk_release(sk); + } + } + } + + u32 tot_len = (u64)ctx_data_end(skb) - (u64)ctx_data(skb); + bpf_dbg_printk( + "egress, tot_len %d, s_port %d, data_start %d", tot_len, conn->s_port, tcp->hdr_len); + + unsigned char *tp_buf = tp_buf_mem(&tp->tp); + + if (!tp_buf) { + tp_buf = (unsigned char *)TP; + } + + // This is where the writing of the 'Traceparent: ...' field happens at L7. + // Our packets are split by sock_msg like this: + // [before the injected header],[70 bytes for 'Traceparent...'],[the rest] + // This how it always looks when I tested, but I'm not sure if it's always + // the case, seems plausible, but then the code below tries to handle any + // split. The 'fast path' handles the exact split as above. + if (ctx) { + u32 packet_size = tot_len - tcp->hdr_len; + bpf_dbg_printk("Found it! packet_size %d, offset %d", packet_size, ctx->offset); + bpf_dbg_printk("seen %d, written %d", ctx->seen, ctx->written); + + if (packet_size > 0) { + u32 len = 0; + u32 off = tcp->hdr_len; + // picked a value large enough to support TCP headers + bpf_clamp_umax(off, 128); + void *start = ctx_data(skb) + off; + + // We haven't seen enough bytes coming through from the start + // until we did the split (at ctx->offset is where we injected) + // the empty space. + if (ctx->seen < ctx->offset) { + // Diff = How much more before we cross offset + u32 diff = ctx->offset - ctx->seen; + // We received less or equal bytes to what we want to + // reach ctx->offset, i.e the split point. + if (diff <= packet_size) { + ctx->seen += packet_size; + return 0; + } else { + // We went over the split point, calculate how much can we + // write, but cap it to the max size = 70 bytes. + len = packet_size - diff; + bpf_clamp_umax(len, EXTEND_SIZE); + } + } else { + // Fast path. We are exactly at the offset, we've written + // nothing of the 'Traceparent: ...' text yet and the packet + // is exactly 70 bytes. + if (ctx->written == 0 && packet_size == EXTEND_SIZE) { + if ((start + EXTEND_SIZE) <= ctx_data_end(skb)) { + __builtin_memcpy(start, tp_buf, EXTEND_SIZE); + bpf_dbg_printk("Set the string fast_path!"); + l7_app_ctx_cleanup(e_key); + return 0; + } + } + + // Nope, we've written some bytes in another packet and we + // are not done writing yet. + if (ctx->written < EXTEND_SIZE) { + len = EXTEND_SIZE - ctx->written; + bpf_clamp_umax(len, EXTEND_SIZE); + + if (len > packet_size) { + len = packet_size; + } + } else { + // We've written everything already, just clean up + l7_app_ctx_cleanup(e_key); + return 0; + } + } + + if (len > 0) { + u32 tp_off = ctx->written; + // Keeps verifier happy + bpf_clamp_umax(tp_off, EXTEND_SIZE); + bpf_clamp_umax(len, EXTEND_SIZE); + + if ((start + len) <= ctx_data_end(skb)) { + buf_memcpy((char *)start, (char *)tp_buf + tp_off, len, ctx_data_end(skb)); + bpf_dbg_printk("Set the string off = %d, len = %d!", tp_off, len); + } + + ctx->written += len; + // If we've written the full string this time around + // cleanup the metadata. + if (ctx->written >= EXTEND_SIZE) { + l7_app_ctx_cleanup(e_key); + } + } + } + } + + return 0; +} + +#endif // _TC_TRACER_L7 \ No newline at end of file diff --git a/bpf/tcp_info.h b/bpf/tcp_info.h index 6400eadf5..cdb335ecc 100644 --- a/bpf/tcp_info.h +++ b/bpf/tcp_info.h @@ -96,7 +96,6 @@ read_sk_buff(struct __sk_buff *skb, protocol_info_t *tcp, connection_info_t *con u8 hdr_len = 0; bpf_skb_load_bytes(skb, tcp->ip_len + 1, &hdr_len, sizeof(hdr_len)); tcp->hdr_len += (hdr_len + 1) * 8; - } else { } } diff --git a/go.mod b/go.mod index c88092029..87e1d2f99 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/AlessandroPomponio/go-gibberish v0.0.0-20191004143433-a2d4156f0396 github.com/caarlos0/env/v9 v9.0.0 github.com/cilium/ebpf v0.16.0 + github.com/containers/common v0.61.0 github.com/gavv/monotime v0.0.0-20190418164738-30dba4353424 github.com/gin-gonic/gin v1.9.1 github.com/go-logr/logr v1.4.2 @@ -25,7 +26,7 @@ require ( github.com/prometheus/procfs v0.15.1 github.com/shirou/gopsutil/v3 v3.24.4 github.com/stretchr/testify v1.9.0 - github.com/vishvananda/netlink v1.1.0 + github.com/vishvananda/netlink v1.3.0 github.com/vladimirvivien/gexe v0.3.0 github.com/xwb1989/sqlparser v0.0.0-20180606152119-120387863bf2 github.com/yl2chen/cidranger v1.0.2 @@ -57,9 +58,9 @@ require ( go.opentelemetry.io/otel/trace v1.31.0 go.uber.org/zap v1.27.0 golang.org/x/arch v0.7.0 - golang.org/x/mod v0.20.0 + golang.org/x/mod v0.21.0 golang.org/x/net v0.30.0 - golang.org/x/sys v0.26.0 + golang.org/x/sys v0.27.0 google.golang.org/grpc v1.67.1 google.golang.org/protobuf v1.35.1 gopkg.in/yaml.v3 v3.0.1 @@ -89,11 +90,11 @@ require ( github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.12.1 // indirect github.com/evanphx/json-patch/v5 v5.6.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect - github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/fsnotify/fsnotify v1.8.0 // indirect github.com/gabriel-vasile/mimetype v1.4.2 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -131,7 +132,7 @@ require ( github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/pierrec/lz4/v4 v4.1.21 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/rs/cors v1.11.1 // indirect github.com/shoenig/go-m1cpu v0.1.6 // indirect @@ -140,7 +141,7 @@ require ( github.com/tklauser/numcpus v0.7.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect - github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df // indirect + github.com/vishvananda/netns v0.0.4 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect go.opentelemetry.io/collector v0.112.0 // indirect go.opentelemetry.io/collector/client v1.18.0 // indirect @@ -163,11 +164,11 @@ require ( go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.28.0 // indirect - golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 // indirect + golang.org/x/crypto v0.29.0 // indirect + golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c // indirect golang.org/x/oauth2 v0.23.0 // indirect - golang.org/x/term v0.25.0 // indirect - golang.org/x/text v0.19.0 // indirect + golang.org/x/term v0.26.0 // indirect + golang.org/x/text v0.20.0 // indirect golang.org/x/time v0.5.0 // indirect gomodules.xyz/jsonpatch/v2 v2.3.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 // indirect diff --git a/go.sum b/go.sum index b562798ad..06e9ff0f5 100644 --- a/go.sum +++ b/go.sum @@ -24,9 +24,12 @@ github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhD github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= github.com/cilium/ebpf v0.16.0 h1:+BiEnHL6Z7lXnlGUsXQPPAE7+kenAd4ES8MQ5min0Ok= github.com/cilium/ebpf v0.16.0/go.mod h1:L7u2Blt2jMM/vLAVgjxluxtBKlz3/GWjB0dMOEngfwE= +github.com/containers/common v0.61.0 h1:j/84PTqZIKKYy42OEJsZmjZ4g4Kq2ERuC3tqp2yWdh4= +github.com/containers/common v0.61.0/go.mod h1:NGRISq2vTFPSbhNqj6MLwyes4tWSlCnqbJg7R77B8xc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/evanphx/json-patch v4.12.0+incompatible h1:4onqiflcdA9EOZ4RxV643DvftH5pOlLGNtQ5lPWQu84= @@ -35,8 +38,8 @@ github.com/evanphx/json-patch/v5 v5.6.0 h1:b91NhWfaz02IuVxO9faSllyAtNXHMPkC5J8sJ github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= -github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= +github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/gavv/monotime v0.0.0-20190418164738-30dba4353424 h1:Vh7rylVZRZCj6W41lRlP17xPk4Nq260H4Xo/DDYmEZk= @@ -96,8 +99,8 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= -github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= @@ -190,10 +193,10 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= -github.com/onsi/ginkgo/v2 v2.17.2 h1:7eMhcy3GimbsA3hEnVKdw/PQM9XN9krpKVXsZdph0/g= -github.com/onsi/ginkgo/v2 v2.17.2/go.mod h1:nP2DPOQoNsQmsVyv5rDA8JkXQoCs6goXIvr/PRJ1eCc= -github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= -github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= +github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM= +github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= +github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= +github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= @@ -201,8 +204,9 @@ github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFu github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prometheus/client_golang v1.20.1 h1:IMJXHOD6eARkQpxo8KkhgEVFlBNm+nkrFUyGlIu7Na8= @@ -251,10 +255,10 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= -github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0= -github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= -github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df h1:OviZH7qLw/7ZovXvuNyL3XQl8UFofeikI1NW1Gypu7k= -github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= +github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk= +github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs= +github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8= +github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/vladimirvivien/gexe v0.3.0 h1:4xwiOwGrDob5OMR6E92B9olDXYDglXdHhzR1ggYtWJM= github.com/vladimirvivien/gexe v0.3.0/go.mod h1:fp7cy60ON1xjhtEI/+bfSEIXX35qgmI+iRYlGOqbBFM= github.com/xwb1989/sqlparser v0.0.0-20180606152119-120387863bf2 h1:zzrxE1FKn5ryBNl9eKOeqQ58Y/Qpo3Q9QNxKHX5uzzQ= @@ -371,14 +375,14 @@ golang.org/x/arch v0.7.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= -golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 h1:Jvc7gsqn21cJHCmAWx0LiimpP18LZmUxkT5Mp7EZ1mI= -golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= +golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= +golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c h1:7dEasQXItcW1xKJ2+gg5VOiBnqWrJc+rq0DPKyvvdbY= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= -golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -390,35 +394,36 @@ golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbht golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= -golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= +golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/pkg/internal/ebpf/common/bpf_arm64_bpfel.go b/pkg/internal/ebpf/common/bpf_arm64_bpfel.go index 17f827402..2b6b59ada 100644 --- a/pkg/internal/ebpf/common/bpf_arm64_bpfel.go +++ b/pkg/internal/ebpf/common/bpf_arm64_bpfel.go @@ -21,14 +21,13 @@ type bpfConnectionInfoT struct { type bpfHttp2GrpcRequestT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT Data [256]uint8 RetData [64]uint8 Type uint8 - _ [1]byte + _ [3]byte Len int32 - _ [4]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Pid struct { @@ -50,9 +49,8 @@ type bpfHttp2GrpcRequestT struct { type bpfHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -193,9 +191,8 @@ type bpfSqlRequestTrace struct { type bpfTcpReqT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [256]uint8 diff --git a/pkg/internal/ebpf/common/bpf_arm64_bpfel.o b/pkg/internal/ebpf/common/bpf_arm64_bpfel.o index 5ad5e9c95..16563d093 100644 --- a/pkg/internal/ebpf/common/bpf_arm64_bpfel.o +++ b/pkg/internal/ebpf/common/bpf_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34d534081f766ad8d8e1ad3aa023b0160c094ad8169e0749992b08521734b822 -size 4432 +oid sha256:3559bba801748d1127c4733c5a7cdde63e458f28a10744d07cee0306a7ee53e5 +size 4536 diff --git a/pkg/internal/ebpf/common/bpf_x86_bpfel.go b/pkg/internal/ebpf/common/bpf_x86_bpfel.go index 12b31bd99..91c9a24b2 100644 --- a/pkg/internal/ebpf/common/bpf_x86_bpfel.go +++ b/pkg/internal/ebpf/common/bpf_x86_bpfel.go @@ -21,14 +21,13 @@ type bpfConnectionInfoT struct { type bpfHttp2GrpcRequestT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT Data [256]uint8 RetData [64]uint8 Type uint8 - _ [1]byte + _ [3]byte Len int32 - _ [4]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Pid struct { @@ -50,9 +49,8 @@ type bpfHttp2GrpcRequestT struct { type bpfHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -193,9 +191,8 @@ type bpfSqlRequestTrace struct { type bpfTcpReqT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [256]uint8 diff --git a/pkg/internal/ebpf/common/bpf_x86_bpfel.o b/pkg/internal/ebpf/common/bpf_x86_bpfel.o index 5ad5e9c95..16563d093 100644 --- a/pkg/internal/ebpf/common/bpf_x86_bpfel.o +++ b/pkg/internal/ebpf/common/bpf_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:34d534081f766ad8d8e1ad3aa023b0160c094ad8169e0749992b08521734b822 -size 4432 +oid sha256:3559bba801748d1127c4733c5a7cdde63e458f28a10744d07cee0306a7ee53e5 +size 4536 diff --git a/pkg/internal/ebpf/common/common.go b/pkg/internal/ebpf/common/common.go index 3060a24cd..29fa15b03 100644 --- a/pkg/internal/ebpf/common/common.go +++ b/pkg/internal/ebpf/common/common.go @@ -11,6 +11,7 @@ import ( "strings" "github.com/cilium/ebpf" + "github.com/cilium/ebpf/link" "github.com/cilium/ebpf/ringbuf" "github.com/grafana/beyla/pkg/internal/goexec" @@ -62,6 +63,20 @@ type Filter struct { Fd int } +type SockOps struct { + io.Closer + Program *ebpf.Program + AttachAs ebpf.AttachType + SockopsCgroup link.Link +} + +type SockMsg struct { + io.Closer + Program *ebpf.Program + MapFD int + AttachAs ebpf.AttachType +} + type MisclassifiedEvent struct { EventType int TCPInfo *TCPRequestInfo diff --git a/pkg/internal/ebpf/common/common_linux.go b/pkg/internal/ebpf/common/common_linux.go index 8d1aefbdc..18d38c0fe 100644 --- a/pkg/internal/ebpf/common/common_linux.go +++ b/pkg/internal/ebpf/common/common_linux.go @@ -3,6 +3,7 @@ package ebpfcommon import ( "syscall" + "github.com/cilium/ebpf/link" "golang.org/x/sys/unix" "github.com/grafana/beyla/pkg/internal/helpers" @@ -12,6 +13,18 @@ func (f *Filter) Close() error { return syscall.SetsockoptInt(f.Fd, unix.SOL_SOCKET, unix.SO_DETACH_BPF, 0) } +func (s *SockMsg) Close() error { + return link.RawDetachProgram(link.RawDetachProgramOptions{ + Target: s.MapFD, + Program: s.Program, + Attach: s.AttachAs, + }) +} + +func (s *SockOps) Close() error { + return s.SockopsCgroup.Close() +} + // Copied from https://github.com/golang/go/blob/go1.21.3/src/internal/syscall/unix/kernel_version_linux.go func KernelVersion() (major, minor int) { var uname syscall.Utsname diff --git a/pkg/internal/ebpf/generictracer/bpf_arm64_bpfel.go b/pkg/internal/ebpf/generictracer/bpf_arm64_bpfel.go index 3c904c55d..18f32a95f 100644 --- a/pkg/internal/ebpf/generictracer/bpf_arm64_bpfel.go +++ b/pkg/internal/ebpf/generictracer/bpf_arm64_bpfel.go @@ -43,14 +43,13 @@ type bpfHttp2ConnStreamT struct { type bpfHttp2GrpcRequestT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT Data [256]uint8 RetData [64]uint8 Type uint8 - _ [1]byte + _ [3]byte Len int32 - _ [4]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Pid struct { @@ -81,9 +80,8 @@ type bpfHttpConnectionMetadataT struct { type bpfHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -110,6 +108,11 @@ type bpfHttpInfoT struct { _ [4]byte } +type bpfMsgBufferT struct { + Buf [256]uint8 + Pos uint16 +} + type bpfPartialConnectionInfoT struct { S_addr [16]uint8 S_port uint16 @@ -156,9 +159,8 @@ type bpfSslPidConnectionInfoT struct { type bpfTcpReqT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [256]uint8 @@ -299,6 +301,7 @@ type bpfMapSpecs struct { IncomingTraceMap *ebpf.MapSpec `ebpf:"incoming_trace_map"` IovecMem *ebpf.MapSpec `ebpf:"iovec_mem"` JumpTable *ebpf.MapSpec `ebpf:"jump_table"` + MsgBuffers *ebpf.MapSpec `ebpf:"msg_buffers"` NodejsParentMap *ebpf.MapSpec `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.MapSpec `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.MapSpec `ebpf:"ongoing_http2_connections"` @@ -359,6 +362,7 @@ type bpfMaps struct { IncomingTraceMap *ebpf.Map `ebpf:"incoming_trace_map"` IovecMem *ebpf.Map `ebpf:"iovec_mem"` JumpTable *ebpf.Map `ebpf:"jump_table"` + MsgBuffers *ebpf.Map `ebpf:"msg_buffers"` NodejsParentMap *ebpf.Map `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.Map `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.Map `ebpf:"ongoing_http2_connections"` @@ -402,6 +406,7 @@ func (m *bpfMaps) Close() error { m.IncomingTraceMap, m.IovecMem, m.JumpTable, + m.MsgBuffers, m.NodejsParentMap, m.OngoingHttp, m.OngoingHttp2Connections, diff --git a/pkg/internal/ebpf/generictracer/bpf_arm64_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_arm64_bpfel.o index 0e866b83b..61cd5fa59 100644 --- a/pkg/internal/ebpf/generictracer/bpf_arm64_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42ae803457a32667c58151079c5e52dffaca9399ee278d92049b9e48f068b8d7 -size 462544 +oid sha256:49c014eb9f22392eef3eefcfe4bef3ea44c9d06c7b8de0faf3652ec07a44bc87 +size 522112 diff --git a/pkg/internal/ebpf/generictracer/bpf_debug_arm64_bpfel.go b/pkg/internal/ebpf/generictracer/bpf_debug_arm64_bpfel.go index e075726a2..9bba3be31 100644 --- a/pkg/internal/ebpf/generictracer/bpf_debug_arm64_bpfel.go +++ b/pkg/internal/ebpf/generictracer/bpf_debug_arm64_bpfel.go @@ -43,14 +43,13 @@ type bpf_debugHttp2ConnStreamT struct { type bpf_debugHttp2GrpcRequestT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_debugConnectionInfoT Data [256]uint8 RetData [64]uint8 Type uint8 - _ [1]byte + _ [3]byte Len int32 - _ [4]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Pid struct { @@ -81,9 +80,8 @@ type bpf_debugHttpConnectionMetadataT struct { type bpf_debugHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_debugConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -110,6 +108,11 @@ type bpf_debugHttpInfoT struct { _ [4]byte } +type bpf_debugMsgBufferT struct { + Buf [256]uint8 + Pos uint16 +} + type bpf_debugPartialConnectionInfoT struct { S_addr [16]uint8 S_port uint16 @@ -156,9 +159,8 @@ type bpf_debugSslPidConnectionInfoT struct { type bpf_debugTcpReqT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_debugConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [256]uint8 @@ -300,6 +302,7 @@ type bpf_debugMapSpecs struct { IncomingTraceMap *ebpf.MapSpec `ebpf:"incoming_trace_map"` IovecMem *ebpf.MapSpec `ebpf:"iovec_mem"` JumpTable *ebpf.MapSpec `ebpf:"jump_table"` + MsgBuffers *ebpf.MapSpec `ebpf:"msg_buffers"` NodejsParentMap *ebpf.MapSpec `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.MapSpec `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.MapSpec `ebpf:"ongoing_http2_connections"` @@ -361,6 +364,7 @@ type bpf_debugMaps struct { IncomingTraceMap *ebpf.Map `ebpf:"incoming_trace_map"` IovecMem *ebpf.Map `ebpf:"iovec_mem"` JumpTable *ebpf.Map `ebpf:"jump_table"` + MsgBuffers *ebpf.Map `ebpf:"msg_buffers"` NodejsParentMap *ebpf.Map `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.Map `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.Map `ebpf:"ongoing_http2_connections"` @@ -405,6 +409,7 @@ func (m *bpf_debugMaps) Close() error { m.IncomingTraceMap, m.IovecMem, m.JumpTable, + m.MsgBuffers, m.NodejsParentMap, m.OngoingHttp, m.OngoingHttp2Connections, diff --git a/pkg/internal/ebpf/generictracer/bpf_debug_arm64_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_debug_arm64_bpfel.o index 573e45e6f..57aa50a6e 100644 --- a/pkg/internal/ebpf/generictracer/bpf_debug_arm64_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_debug_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2fe32652f06f776cf5dfd0eeb906bfd64473972a897ff6b0fa8edc98b9dfb264 -size 769376 +oid sha256:a80c335b7f38aa042a8b62518aade4f1efc5661734d5fb5e99b6caabc5384212 +size 852896 diff --git a/pkg/internal/ebpf/generictracer/bpf_debug_x86_bpfel.go b/pkg/internal/ebpf/generictracer/bpf_debug_x86_bpfel.go index 001dc0134..d4dd3e499 100644 --- a/pkg/internal/ebpf/generictracer/bpf_debug_x86_bpfel.go +++ b/pkg/internal/ebpf/generictracer/bpf_debug_x86_bpfel.go @@ -43,14 +43,13 @@ type bpf_debugHttp2ConnStreamT struct { type bpf_debugHttp2GrpcRequestT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_debugConnectionInfoT Data [256]uint8 RetData [64]uint8 Type uint8 - _ [1]byte + _ [3]byte Len int32 - _ [4]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Pid struct { @@ -81,9 +80,8 @@ type bpf_debugHttpConnectionMetadataT struct { type bpf_debugHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_debugConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -110,6 +108,11 @@ type bpf_debugHttpInfoT struct { _ [4]byte } +type bpf_debugMsgBufferT struct { + Buf [256]uint8 + Pos uint16 +} + type bpf_debugPartialConnectionInfoT struct { S_addr [16]uint8 S_port uint16 @@ -156,9 +159,8 @@ type bpf_debugSslPidConnectionInfoT struct { type bpf_debugTcpReqT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_debugConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [256]uint8 @@ -300,6 +302,7 @@ type bpf_debugMapSpecs struct { IncomingTraceMap *ebpf.MapSpec `ebpf:"incoming_trace_map"` IovecMem *ebpf.MapSpec `ebpf:"iovec_mem"` JumpTable *ebpf.MapSpec `ebpf:"jump_table"` + MsgBuffers *ebpf.MapSpec `ebpf:"msg_buffers"` NodejsParentMap *ebpf.MapSpec `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.MapSpec `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.MapSpec `ebpf:"ongoing_http2_connections"` @@ -361,6 +364,7 @@ type bpf_debugMaps struct { IncomingTraceMap *ebpf.Map `ebpf:"incoming_trace_map"` IovecMem *ebpf.Map `ebpf:"iovec_mem"` JumpTable *ebpf.Map `ebpf:"jump_table"` + MsgBuffers *ebpf.Map `ebpf:"msg_buffers"` NodejsParentMap *ebpf.Map `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.Map `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.Map `ebpf:"ongoing_http2_connections"` @@ -405,6 +409,7 @@ func (m *bpf_debugMaps) Close() error { m.IncomingTraceMap, m.IovecMem, m.JumpTable, + m.MsgBuffers, m.NodejsParentMap, m.OngoingHttp, m.OngoingHttp2Connections, diff --git a/pkg/internal/ebpf/generictracer/bpf_debug_x86_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_debug_x86_bpfel.o index 4702b0898..52edaf693 100644 --- a/pkg/internal/ebpf/generictracer/bpf_debug_x86_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_debug_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:74d9cff43f30918bd407f2be7d53f32d4c856b36a6626c7e99485d9f18573f22 -size 768888 +oid sha256:f6e6e311bd7e132fd7a94ca3078acdfeb0bff6bb23fe26ed40ecf8c108136f85 +size 852408 diff --git a/pkg/internal/ebpf/generictracer/bpf_tp_arm64_bpfel.go b/pkg/internal/ebpf/generictracer/bpf_tp_arm64_bpfel.go index d6c497f63..21a6aa111 100644 --- a/pkg/internal/ebpf/generictracer/bpf_tp_arm64_bpfel.go +++ b/pkg/internal/ebpf/generictracer/bpf_tp_arm64_bpfel.go @@ -43,14 +43,13 @@ type bpf_tpHttp2ConnStreamT struct { type bpf_tpHttp2GrpcRequestT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_tpConnectionInfoT Data [256]uint8 RetData [64]uint8 Type uint8 - _ [1]byte + _ [3]byte Len int32 - _ [4]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Pid struct { @@ -81,9 +80,8 @@ type bpf_tpHttpConnectionMetadataT struct { type bpf_tpHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_tpConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -110,6 +108,11 @@ type bpf_tpHttpInfoT struct { _ [4]byte } +type bpf_tpMsgBufferT struct { + Buf [256]uint8 + Pos uint16 +} + type bpf_tpPartialConnectionInfoT struct { S_addr [16]uint8 S_port uint16 @@ -156,9 +159,8 @@ type bpf_tpSslPidConnectionInfoT struct { type bpf_tpTcpReqT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_tpConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [256]uint8 @@ -299,6 +301,7 @@ type bpf_tpMapSpecs struct { IncomingTraceMap *ebpf.MapSpec `ebpf:"incoming_trace_map"` IovecMem *ebpf.MapSpec `ebpf:"iovec_mem"` JumpTable *ebpf.MapSpec `ebpf:"jump_table"` + MsgBuffers *ebpf.MapSpec `ebpf:"msg_buffers"` NodejsParentMap *ebpf.MapSpec `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.MapSpec `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.MapSpec `ebpf:"ongoing_http2_connections"` @@ -359,6 +362,7 @@ type bpf_tpMaps struct { IncomingTraceMap *ebpf.Map `ebpf:"incoming_trace_map"` IovecMem *ebpf.Map `ebpf:"iovec_mem"` JumpTable *ebpf.Map `ebpf:"jump_table"` + MsgBuffers *ebpf.Map `ebpf:"msg_buffers"` NodejsParentMap *ebpf.Map `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.Map `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.Map `ebpf:"ongoing_http2_connections"` @@ -402,6 +406,7 @@ func (m *bpf_tpMaps) Close() error { m.IncomingTraceMap, m.IovecMem, m.JumpTable, + m.MsgBuffers, m.NodejsParentMap, m.OngoingHttp, m.OngoingHttp2Connections, diff --git a/pkg/internal/ebpf/generictracer/bpf_tp_arm64_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_tp_arm64_bpfel.o index eec45258f..f615871b5 100644 --- a/pkg/internal/ebpf/generictracer/bpf_tp_arm64_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_tp_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:12e0c64938beeb0414a867b05d8882ff7adccc10037565105528358ee4202db4 -size 477232 +oid sha256:1eb6417fe6707d1d47b01e349bb0bcabb7e822c2d85e15b78a8945b9ce06cbeb +size 536024 diff --git a/pkg/internal/ebpf/generictracer/bpf_tp_debug_arm64_bpfel.go b/pkg/internal/ebpf/generictracer/bpf_tp_debug_arm64_bpfel.go index 758b11c6e..c6fabfe1f 100644 --- a/pkg/internal/ebpf/generictracer/bpf_tp_debug_arm64_bpfel.go +++ b/pkg/internal/ebpf/generictracer/bpf_tp_debug_arm64_bpfel.go @@ -43,14 +43,13 @@ type bpf_tp_debugHttp2ConnStreamT struct { type bpf_tp_debugHttp2GrpcRequestT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_tp_debugConnectionInfoT Data [256]uint8 RetData [64]uint8 Type uint8 - _ [1]byte + _ [3]byte Len int32 - _ [4]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Pid struct { @@ -81,9 +80,8 @@ type bpf_tp_debugHttpConnectionMetadataT struct { type bpf_tp_debugHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_tp_debugConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -110,6 +108,11 @@ type bpf_tp_debugHttpInfoT struct { _ [4]byte } +type bpf_tp_debugMsgBufferT struct { + Buf [256]uint8 + Pos uint16 +} + type bpf_tp_debugPartialConnectionInfoT struct { S_addr [16]uint8 S_port uint16 @@ -156,9 +159,8 @@ type bpf_tp_debugSslPidConnectionInfoT struct { type bpf_tp_debugTcpReqT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_tp_debugConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [256]uint8 @@ -300,6 +302,7 @@ type bpf_tp_debugMapSpecs struct { IncomingTraceMap *ebpf.MapSpec `ebpf:"incoming_trace_map"` IovecMem *ebpf.MapSpec `ebpf:"iovec_mem"` JumpTable *ebpf.MapSpec `ebpf:"jump_table"` + MsgBuffers *ebpf.MapSpec `ebpf:"msg_buffers"` NodejsParentMap *ebpf.MapSpec `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.MapSpec `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.MapSpec `ebpf:"ongoing_http2_connections"` @@ -361,6 +364,7 @@ type bpf_tp_debugMaps struct { IncomingTraceMap *ebpf.Map `ebpf:"incoming_trace_map"` IovecMem *ebpf.Map `ebpf:"iovec_mem"` JumpTable *ebpf.Map `ebpf:"jump_table"` + MsgBuffers *ebpf.Map `ebpf:"msg_buffers"` NodejsParentMap *ebpf.Map `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.Map `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.Map `ebpf:"ongoing_http2_connections"` @@ -405,6 +409,7 @@ func (m *bpf_tp_debugMaps) Close() error { m.IncomingTraceMap, m.IovecMem, m.JumpTable, + m.MsgBuffers, m.NodejsParentMap, m.OngoingHttp, m.OngoingHttp2Connections, diff --git a/pkg/internal/ebpf/generictracer/bpf_tp_debug_arm64_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_tp_debug_arm64_bpfel.o index dfe6e719e..5abea7184 100644 --- a/pkg/internal/ebpf/generictracer/bpf_tp_debug_arm64_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_tp_debug_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02ab01c95abf546beba3baf61961420647d90347789202c841bf5ac5fa3f7771 -size 787632 +oid sha256:49db60962870127eeeb26932d07429c57b86a4ca4433cb4aaf1fee36bd605847 +size 870336 diff --git a/pkg/internal/ebpf/generictracer/bpf_tp_debug_x86_bpfel.go b/pkg/internal/ebpf/generictracer/bpf_tp_debug_x86_bpfel.go index 797e32a78..1344bb046 100644 --- a/pkg/internal/ebpf/generictracer/bpf_tp_debug_x86_bpfel.go +++ b/pkg/internal/ebpf/generictracer/bpf_tp_debug_x86_bpfel.go @@ -43,14 +43,13 @@ type bpf_tp_debugHttp2ConnStreamT struct { type bpf_tp_debugHttp2GrpcRequestT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_tp_debugConnectionInfoT Data [256]uint8 RetData [64]uint8 Type uint8 - _ [1]byte + _ [3]byte Len int32 - _ [4]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Pid struct { @@ -81,9 +80,8 @@ type bpf_tp_debugHttpConnectionMetadataT struct { type bpf_tp_debugHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_tp_debugConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -110,6 +108,11 @@ type bpf_tp_debugHttpInfoT struct { _ [4]byte } +type bpf_tp_debugMsgBufferT struct { + Buf [256]uint8 + Pos uint16 +} + type bpf_tp_debugPartialConnectionInfoT struct { S_addr [16]uint8 S_port uint16 @@ -156,9 +159,8 @@ type bpf_tp_debugSslPidConnectionInfoT struct { type bpf_tp_debugTcpReqT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_tp_debugConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [256]uint8 @@ -300,6 +302,7 @@ type bpf_tp_debugMapSpecs struct { IncomingTraceMap *ebpf.MapSpec `ebpf:"incoming_trace_map"` IovecMem *ebpf.MapSpec `ebpf:"iovec_mem"` JumpTable *ebpf.MapSpec `ebpf:"jump_table"` + MsgBuffers *ebpf.MapSpec `ebpf:"msg_buffers"` NodejsParentMap *ebpf.MapSpec `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.MapSpec `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.MapSpec `ebpf:"ongoing_http2_connections"` @@ -361,6 +364,7 @@ type bpf_tp_debugMaps struct { IncomingTraceMap *ebpf.Map `ebpf:"incoming_trace_map"` IovecMem *ebpf.Map `ebpf:"iovec_mem"` JumpTable *ebpf.Map `ebpf:"jump_table"` + MsgBuffers *ebpf.Map `ebpf:"msg_buffers"` NodejsParentMap *ebpf.Map `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.Map `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.Map `ebpf:"ongoing_http2_connections"` @@ -405,6 +409,7 @@ func (m *bpf_tp_debugMaps) Close() error { m.IncomingTraceMap, m.IovecMem, m.JumpTable, + m.MsgBuffers, m.NodejsParentMap, m.OngoingHttp, m.OngoingHttp2Connections, diff --git a/pkg/internal/ebpf/generictracer/bpf_tp_debug_x86_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_tp_debug_x86_bpfel.o index 6f7b534a7..c0f27ef37 100644 --- a/pkg/internal/ebpf/generictracer/bpf_tp_debug_x86_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_tp_debug_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0d3ed640cfe8783ce63077ba1955b1a1246f22cfc913ce9d5808e7c3bf76ba4 -size 787152 +oid sha256:ed7d3ca477c03c1bfc0551645c785a2585f460398148e7c0c39f760f403916b7 +size 869856 diff --git a/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.go b/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.go index 47409e871..7a40e7dd2 100644 --- a/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.go +++ b/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.go @@ -43,14 +43,13 @@ type bpf_tpHttp2ConnStreamT struct { type bpf_tpHttp2GrpcRequestT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_tpConnectionInfoT Data [256]uint8 RetData [64]uint8 Type uint8 - _ [1]byte + _ [3]byte Len int32 - _ [4]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Pid struct { @@ -81,9 +80,8 @@ type bpf_tpHttpConnectionMetadataT struct { type bpf_tpHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_tpConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -110,6 +108,11 @@ type bpf_tpHttpInfoT struct { _ [4]byte } +type bpf_tpMsgBufferT struct { + Buf [256]uint8 + Pos uint16 +} + type bpf_tpPartialConnectionInfoT struct { S_addr [16]uint8 S_port uint16 @@ -156,9 +159,8 @@ type bpf_tpSslPidConnectionInfoT struct { type bpf_tpTcpReqT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_tpConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [256]uint8 @@ -299,6 +301,7 @@ type bpf_tpMapSpecs struct { IncomingTraceMap *ebpf.MapSpec `ebpf:"incoming_trace_map"` IovecMem *ebpf.MapSpec `ebpf:"iovec_mem"` JumpTable *ebpf.MapSpec `ebpf:"jump_table"` + MsgBuffers *ebpf.MapSpec `ebpf:"msg_buffers"` NodejsParentMap *ebpf.MapSpec `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.MapSpec `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.MapSpec `ebpf:"ongoing_http2_connections"` @@ -359,6 +362,7 @@ type bpf_tpMaps struct { IncomingTraceMap *ebpf.Map `ebpf:"incoming_trace_map"` IovecMem *ebpf.Map `ebpf:"iovec_mem"` JumpTable *ebpf.Map `ebpf:"jump_table"` + MsgBuffers *ebpf.Map `ebpf:"msg_buffers"` NodejsParentMap *ebpf.Map `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.Map `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.Map `ebpf:"ongoing_http2_connections"` @@ -402,6 +406,7 @@ func (m *bpf_tpMaps) Close() error { m.IncomingTraceMap, m.IovecMem, m.JumpTable, + m.MsgBuffers, m.NodejsParentMap, m.OngoingHttp, m.OngoingHttp2Connections, diff --git a/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.o index 0f38c3c0e..76972de99 100644 --- a/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:64cc2193dc7ecd3e954a0fb1c9b00ebb8d187a38c89749bc0e39351b68b52d7a -size 476632 +oid sha256:72b729c02cb965bb7a2cee247874dcaa177b2ca0f8d5cf931eaf95f6aa3aa5a8 +size 535424 diff --git a/pkg/internal/ebpf/generictracer/bpf_x86_bpfel.go b/pkg/internal/ebpf/generictracer/bpf_x86_bpfel.go index 65e35de6a..4dd6e40aa 100644 --- a/pkg/internal/ebpf/generictracer/bpf_x86_bpfel.go +++ b/pkg/internal/ebpf/generictracer/bpf_x86_bpfel.go @@ -43,14 +43,13 @@ type bpfHttp2ConnStreamT struct { type bpfHttp2GrpcRequestT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT Data [256]uint8 RetData [64]uint8 Type uint8 - _ [1]byte + _ [3]byte Len int32 - _ [4]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Pid struct { @@ -81,9 +80,8 @@ type bpfHttpConnectionMetadataT struct { type bpfHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -110,6 +108,11 @@ type bpfHttpInfoT struct { _ [4]byte } +type bpfMsgBufferT struct { + Buf [256]uint8 + Pos uint16 +} + type bpfPartialConnectionInfoT struct { S_addr [16]uint8 S_port uint16 @@ -156,9 +159,8 @@ type bpfSslPidConnectionInfoT struct { type bpfTcpReqT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [256]uint8 @@ -299,6 +301,7 @@ type bpfMapSpecs struct { IncomingTraceMap *ebpf.MapSpec `ebpf:"incoming_trace_map"` IovecMem *ebpf.MapSpec `ebpf:"iovec_mem"` JumpTable *ebpf.MapSpec `ebpf:"jump_table"` + MsgBuffers *ebpf.MapSpec `ebpf:"msg_buffers"` NodejsParentMap *ebpf.MapSpec `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.MapSpec `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.MapSpec `ebpf:"ongoing_http2_connections"` @@ -359,6 +362,7 @@ type bpfMaps struct { IncomingTraceMap *ebpf.Map `ebpf:"incoming_trace_map"` IovecMem *ebpf.Map `ebpf:"iovec_mem"` JumpTable *ebpf.Map `ebpf:"jump_table"` + MsgBuffers *ebpf.Map `ebpf:"msg_buffers"` NodejsParentMap *ebpf.Map `ebpf:"nodejs_parent_map"` OngoingHttp *ebpf.Map `ebpf:"ongoing_http"` OngoingHttp2Connections *ebpf.Map `ebpf:"ongoing_http2_connections"` @@ -402,6 +406,7 @@ func (m *bpfMaps) Close() error { m.IncomingTraceMap, m.IovecMem, m.JumpTable, + m.MsgBuffers, m.NodejsParentMap, m.OngoingHttp, m.OngoingHttp2Connections, diff --git a/pkg/internal/ebpf/generictracer/bpf_x86_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_x86_bpfel.o index ca950329c..5c7d77edd 100644 --- a/pkg/internal/ebpf/generictracer/bpf_x86_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a364bb584d2ae638ec7afa90a74ef0b6ee6e09cec049f01c1eea8fae0477cdea -size 461952 +oid sha256:e9795a47a221ab3988b4602f78d422256c265f2dc82c62e83dde77a2ce280b72 +size 521512 diff --git a/pkg/internal/ebpf/generictracer/generictracer.go b/pkg/internal/ebpf/generictracer/generictracer.go index 2e2568cfe..b90bd80eb 100644 --- a/pkg/internal/ebpf/generictracer/generictracer.go +++ b/pkg/internal/ebpf/generictracer/generictracer.go @@ -130,7 +130,7 @@ func (p *Tracer) Load() (*ebpf.CollectionSpec, error) { loader = loadBpf_debug } - if p.cfg.EBPF.TrackRequestHeaders || p.cfg.EBPF.UseTCForL7CP { + if p.cfg.EBPF.TrackRequestHeaders || p.cfg.EBPF.UseTCForL7CP || p.cfg.EBPF.UseTCForCP { if ebpfcommon.SupportsEBPFLoops() { p.log.Info("Found Linux kernel later than 5.17, enabling trace information parsing") loader = loadBpf_tp @@ -184,7 +184,7 @@ func (p *Tracer) Constants() map[string]any { m["filter_pids"] = int32(0) } - if p.cfg.EBPF.TrackRequestHeaders || p.cfg.EBPF.UseTCForL7CP { + if p.cfg.EBPF.TrackRequestHeaders || p.cfg.EBPF.UseTCForL7CP || p.cfg.EBPF.UseTCForCP { m["capture_header_buffer"] = int32(1) } else { m["capture_header_buffer"] = int32(0) @@ -258,7 +258,7 @@ func (p *Tracer) KProbes() map[string]ebpfcommon.FunctionPrograms { Required: true, Start: p.bpfObjects.KprobeTcpClose, }, - "tcp_sendmsg": { + "tcp_sendmsg_locked": { Required: true, Start: p.bpfObjects.KprobeTcpSendmsg, End: p.bpfObjects.KretprobeTcpSendmsg, @@ -353,6 +353,10 @@ func (p *Tracer) SocketFilters() []*ebpf.Program { return []*ebpf.Program{p.bpfObjects.SocketHttpFilter} } +func (p *Tracer) SockMsgs() []ebpfcommon.SockMsg { return nil } + +func (p *Tracer) SockOps() []ebpfcommon.SockOps { return nil } + func (p *Tracer) RecordInstrumentedLib(id uint64) { libsMux.Lock() defer libsMux.Unlock() diff --git a/pkg/internal/ebpf/generictracer/generictracer_notlinux.go b/pkg/internal/ebpf/generictracer/generictracer_notlinux.go index 6c15bf27d..cfd32128f 100644 --- a/pkg/internal/ebpf/generictracer/generictracer_notlinux.go +++ b/pkg/internal/ebpf/generictracer/generictracer_notlinux.go @@ -32,6 +32,8 @@ func (p *Tracer) KProbes() map[string]ebpfcommon.FunctionPrograms { r func (p *Tracer) UProbes() map[string]map[string]ebpfcommon.FunctionPrograms { return nil } func (p *Tracer) Tracepoints() map[string]ebpfcommon.FunctionPrograms { return nil } func (p *Tracer) SocketFilters() []*ebpf.Program { return nil } +func (p *Tracer) SockMsgs() []ebpfcommon.SockMsg { return nil } +func (p *Tracer) SockOps() []ebpfcommon.SockOps { return nil } func (p *Tracer) RecordInstrumentedLib(_ uint64) {} func (p *Tracer) UnlinkInstrumentedLib(_ uint64) {} func (p *Tracer) AlreadyInstrumentedLib(_ uint64) bool { return false } diff --git a/pkg/internal/ebpf/gotracer/bpf_arm64_bpfel.go b/pkg/internal/ebpf/gotracer/bpf_arm64_bpfel.go index 3b29b4439..96b199dcc 100644 --- a/pkg/internal/ebpf/gotracer/bpf_arm64_bpfel.go +++ b/pkg/internal/ebpf/gotracer/bpf_arm64_bpfel.go @@ -51,7 +51,7 @@ type bpfGrpcSrvFuncInvocationT struct { type bpfGrpcTransportsT struct { Type uint8 - _ [1]byte + _ [3]byte Conn bpfConnectionInfoT } diff --git a/pkg/internal/ebpf/gotracer/bpf_arm64_bpfel.o b/pkg/internal/ebpf/gotracer/bpf_arm64_bpfel.o index 8b0f9524c..4865dad0d 100644 --- a/pkg/internal/ebpf/gotracer/bpf_arm64_bpfel.o +++ b/pkg/internal/ebpf/gotracer/bpf_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5867d68bb8ad50c575bb1aec00947a720efba13fe55ed68ccee5bc82d68108e2 -size 407528 +oid sha256:a3eee4ba65ec587c99b95c1d507845fb8791fcde1a5cbf818ef1406cdd0ec19a +size 406096 diff --git a/pkg/internal/ebpf/gotracer/bpf_debug_arm64_bpfel.go b/pkg/internal/ebpf/gotracer/bpf_debug_arm64_bpfel.go index be7248e99..942f87f55 100644 --- a/pkg/internal/ebpf/gotracer/bpf_debug_arm64_bpfel.go +++ b/pkg/internal/ebpf/gotracer/bpf_debug_arm64_bpfel.go @@ -51,7 +51,7 @@ type bpf_debugGrpcSrvFuncInvocationT struct { type bpf_debugGrpcTransportsT struct { Type uint8 - _ [1]byte + _ [3]byte Conn bpf_debugConnectionInfoT } diff --git a/pkg/internal/ebpf/gotracer/bpf_debug_arm64_bpfel.o b/pkg/internal/ebpf/gotracer/bpf_debug_arm64_bpfel.o index d5e0e5d76..389f29076 100644 --- a/pkg/internal/ebpf/gotracer/bpf_debug_arm64_bpfel.o +++ b/pkg/internal/ebpf/gotracer/bpf_debug_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6bdcc06f351d441779c826b8d3e484c36dd0342fee1bb9dffb2b73fb5b14862d -size 899920 +oid sha256:89f206e5846db6d8a576464e973b36ed0132126c7eaa502c074d7eddbb4167f2 +size 898512 diff --git a/pkg/internal/ebpf/gotracer/bpf_debug_x86_bpfel.go b/pkg/internal/ebpf/gotracer/bpf_debug_x86_bpfel.go index f87332fa7..076e3e73c 100644 --- a/pkg/internal/ebpf/gotracer/bpf_debug_x86_bpfel.go +++ b/pkg/internal/ebpf/gotracer/bpf_debug_x86_bpfel.go @@ -51,7 +51,7 @@ type bpf_debugGrpcSrvFuncInvocationT struct { type bpf_debugGrpcTransportsT struct { Type uint8 - _ [1]byte + _ [3]byte Conn bpf_debugConnectionInfoT } diff --git a/pkg/internal/ebpf/gotracer/bpf_debug_x86_bpfel.o b/pkg/internal/ebpf/gotracer/bpf_debug_x86_bpfel.o index 94c670e7e..69934ccb3 100644 --- a/pkg/internal/ebpf/gotracer/bpf_debug_x86_bpfel.o +++ b/pkg/internal/ebpf/gotracer/bpf_debug_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d6fe779702bd07cfeae83f447f141dde100d6d8886b58cef94a4e0abe303958 -size 901752 +oid sha256:cec51f70daadce5cbbd74b95d8b3b4a0a0734893dc4eb8e825469bd2e1d99ecf +size 900344 diff --git a/pkg/internal/ebpf/gotracer/bpf_tp_arm64_bpfel.go b/pkg/internal/ebpf/gotracer/bpf_tp_arm64_bpfel.go index 2f5dca811..2ad690f3a 100644 --- a/pkg/internal/ebpf/gotracer/bpf_tp_arm64_bpfel.go +++ b/pkg/internal/ebpf/gotracer/bpf_tp_arm64_bpfel.go @@ -63,7 +63,7 @@ type bpf_tpGrpcSrvFuncInvocationT struct { type bpf_tpGrpcTransportsT struct { Type uint8 - _ [1]byte + _ [3]byte Conn bpf_tpConnectionInfoT } diff --git a/pkg/internal/ebpf/gotracer/bpf_tp_arm64_bpfel.o b/pkg/internal/ebpf/gotracer/bpf_tp_arm64_bpfel.o index 45c6abe3f..c7682ee2b 100644 --- a/pkg/internal/ebpf/gotracer/bpf_tp_arm64_bpfel.o +++ b/pkg/internal/ebpf/gotracer/bpf_tp_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:821f9adf0dc2e7e4eb9500e99f6ee3c9b3753308d2f90862a521ee8e64bfc464 -size 454152 +oid sha256:a4325fb501da3e08dbebbf9117013b5f0ee8434f2c6e6b9a668391201a6c7625 +size 453424 diff --git a/pkg/internal/ebpf/gotracer/bpf_tp_debug_arm64_bpfel.go b/pkg/internal/ebpf/gotracer/bpf_tp_debug_arm64_bpfel.go index ebd6ce529..210f6e100 100644 --- a/pkg/internal/ebpf/gotracer/bpf_tp_debug_arm64_bpfel.go +++ b/pkg/internal/ebpf/gotracer/bpf_tp_debug_arm64_bpfel.go @@ -63,7 +63,7 @@ type bpf_tp_debugGrpcSrvFuncInvocationT struct { type bpf_tp_debugGrpcTransportsT struct { Type uint8 - _ [1]byte + _ [3]byte Conn bpf_tp_debugConnectionInfoT } diff --git a/pkg/internal/ebpf/gotracer/bpf_tp_debug_arm64_bpfel.o b/pkg/internal/ebpf/gotracer/bpf_tp_debug_arm64_bpfel.o index e5a567cba..fe1537170 100644 --- a/pkg/internal/ebpf/gotracer/bpf_tp_debug_arm64_bpfel.o +++ b/pkg/internal/ebpf/gotracer/bpf_tp_debug_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d3fe2321ea5a6ae0a2a32e9c34828efed363bd78a568dc2eeed5446d0a74fba -size 997832 +oid sha256:1c1b188dea080271979f747a2f2e3cb141ada724324d2f5a4f03b3bb7a8507f7 +size 998824 diff --git a/pkg/internal/ebpf/gotracer/bpf_tp_debug_x86_bpfel.go b/pkg/internal/ebpf/gotracer/bpf_tp_debug_x86_bpfel.go index b234c3605..a10e2b1c2 100644 --- a/pkg/internal/ebpf/gotracer/bpf_tp_debug_x86_bpfel.go +++ b/pkg/internal/ebpf/gotracer/bpf_tp_debug_x86_bpfel.go @@ -63,7 +63,7 @@ type bpf_tp_debugGrpcSrvFuncInvocationT struct { type bpf_tp_debugGrpcTransportsT struct { Type uint8 - _ [1]byte + _ [3]byte Conn bpf_tp_debugConnectionInfoT } diff --git a/pkg/internal/ebpf/gotracer/bpf_tp_debug_x86_bpfel.o b/pkg/internal/ebpf/gotracer/bpf_tp_debug_x86_bpfel.o index 1bfdb8816..dd6e3f6fc 100644 --- a/pkg/internal/ebpf/gotracer/bpf_tp_debug_x86_bpfel.o +++ b/pkg/internal/ebpf/gotracer/bpf_tp_debug_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77fa58ed8bdfd4555c3806163f68ae7472673d0319115740aad5d7c753da9993 -size 999656 +oid sha256:cf8ad759078f1307183e6081572e0368caf95f709eff45aeb6c408d3caccf1a8 +size 1000648 diff --git a/pkg/internal/ebpf/gotracer/bpf_tp_x86_bpfel.go b/pkg/internal/ebpf/gotracer/bpf_tp_x86_bpfel.go index 7a76ff187..f2b9911cd 100644 --- a/pkg/internal/ebpf/gotracer/bpf_tp_x86_bpfel.go +++ b/pkg/internal/ebpf/gotracer/bpf_tp_x86_bpfel.go @@ -63,7 +63,7 @@ type bpf_tpGrpcSrvFuncInvocationT struct { type bpf_tpGrpcTransportsT struct { Type uint8 - _ [1]byte + _ [3]byte Conn bpf_tpConnectionInfoT } diff --git a/pkg/internal/ebpf/gotracer/bpf_tp_x86_bpfel.o b/pkg/internal/ebpf/gotracer/bpf_tp_x86_bpfel.o index 64cb25bd7..1fae870dc 100644 --- a/pkg/internal/ebpf/gotracer/bpf_tp_x86_bpfel.o +++ b/pkg/internal/ebpf/gotracer/bpf_tp_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44411f0e7f8a76cf0fd1977f9aec9778b7f8cbdf7acc0222d5d6512b90a9275e -size 455920 +oid sha256:5139581966f4a01968574f100ba80095f7ae63b41ef5e635398736024f77ef88 +size 455192 diff --git a/pkg/internal/ebpf/gotracer/bpf_x86_bpfel.go b/pkg/internal/ebpf/gotracer/bpf_x86_bpfel.go index 26513c474..31ee604d5 100644 --- a/pkg/internal/ebpf/gotracer/bpf_x86_bpfel.go +++ b/pkg/internal/ebpf/gotracer/bpf_x86_bpfel.go @@ -51,7 +51,7 @@ type bpfGrpcSrvFuncInvocationT struct { type bpfGrpcTransportsT struct { Type uint8 - _ [1]byte + _ [3]byte Conn bpfConnectionInfoT } diff --git a/pkg/internal/ebpf/gotracer/bpf_x86_bpfel.o b/pkg/internal/ebpf/gotracer/bpf_x86_bpfel.o index 6a7437d14..505247061 100644 --- a/pkg/internal/ebpf/gotracer/bpf_x86_bpfel.o +++ b/pkg/internal/ebpf/gotracer/bpf_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:138e8984bd7eab126bae805788ef55475e59374c927c39c6cb722fd2281f416d -size 409360 +oid sha256:d2f0a62fd120fe1665c4a840fcf5a4fbbccc538930c2cde5dc69f91d4fb2fbf7 +size 407920 diff --git a/pkg/internal/ebpf/gotracer/gotracer.go b/pkg/internal/ebpf/gotracer/gotracer.go index 1b8426c4c..39da8b239 100644 --- a/pkg/internal/ebpf/gotracer/gotracer.go +++ b/pkg/internal/ebpf/gotracer/gotracer.go @@ -80,8 +80,7 @@ func (p *Tracer) Load() (*ebpf.CollectionSpec, error) { loader = loadBpf_tp_debug } } else { - p.log.Info("Kernel in lockdown mode or missing CAP_SYS_ADMIN," + - " trace info propagation in HTTP headers is disabled.") + p.log.Info("Kernel in lockdown mode or missing CAP_SYS_ADMIN.") } return loader() } @@ -378,6 +377,10 @@ func (p *Tracer) SocketFilters() []*ebpf.Program { return nil } +func (p *Tracer) SockMsgs() []ebpfcommon.SockMsg { return nil } + +func (p *Tracer) SockOps() []ebpfcommon.SockOps { return nil } + func (p *Tracer) RecordInstrumentedLib(_ uint64) {} func (p *Tracer) UnlinkInstrumentedLib(_ uint64) {} diff --git a/pkg/internal/ebpf/httptracer/bpf_arm64_bpfel.o b/pkg/internal/ebpf/httptracer/bpf_arm64_bpfel.o index 186ed744b..fcad8487b 100644 --- a/pkg/internal/ebpf/httptracer/bpf_arm64_bpfel.o +++ b/pkg/internal/ebpf/httptracer/bpf_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d091613317e96ee7b18e36a7c6cad8e4c14d58bf06ae244179ca472e3efd7ce1 -size 42512 +oid sha256:0aed50f8a73c6152bc9a00d4d5108de3d9de07dde2cdba6bee4449cd6675b6cd +size 42872 diff --git a/pkg/internal/ebpf/httptracer/bpf_debug_arm64_bpfel.o b/pkg/internal/ebpf/httptracer/bpf_debug_arm64_bpfel.o index 97d515de4..5673378f2 100644 --- a/pkg/internal/ebpf/httptracer/bpf_debug_arm64_bpfel.o +++ b/pkg/internal/ebpf/httptracer/bpf_debug_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3eb35f051ce41e980c49dd8e2ce62d373b184b79263cb3fd9cf0fa3aec7d016 -size 42752 +oid sha256:facde3bae441088f2dfd58147595392624d9c65cb619bf84fb83d5c28e533ca5 +size 43112 diff --git a/pkg/internal/ebpf/httptracer/bpf_debug_x86_bpfel.o b/pkg/internal/ebpf/httptracer/bpf_debug_x86_bpfel.o index 610e9b50f..f4eb69b82 100644 --- a/pkg/internal/ebpf/httptracer/bpf_debug_x86_bpfel.o +++ b/pkg/internal/ebpf/httptracer/bpf_debug_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70fc197c8ab1850376bde51e8c1f429a57286938c65a337262807ce409f6e406 -size 42640 +oid sha256:bc461947e3b312b962beb4aa8d92ea325a2f313d1268484866e8752e3b6def0b +size 43000 diff --git a/pkg/internal/ebpf/httptracer/bpf_x86_bpfel.o b/pkg/internal/ebpf/httptracer/bpf_x86_bpfel.o index b715b233c..e19c67211 100644 --- a/pkg/internal/ebpf/httptracer/bpf_x86_bpfel.o +++ b/pkg/internal/ebpf/httptracer/bpf_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9500aa3b559104eea0df776102a8c79f4d8eed5ec85017c14f52eb860be06670 -size 42400 +oid sha256:bbc5f7d559cda9471e514afd89a22555bfa453bc6981ae393364df38ee349a1d +size 42760 diff --git a/pkg/internal/ebpf/httptracer/httptracer.go b/pkg/internal/ebpf/httptracer/httptracer.go index 961772862..bd02851d8 100644 --- a/pkg/internal/ebpf/httptracer/httptracer.go +++ b/pkg/internal/ebpf/httptracer/httptracer.go @@ -107,6 +107,10 @@ func (p *Tracer) SocketFilters() []*ebpf.Program { return nil } +func (p *Tracer) SockMsgs() []ebpfcommon.SockMsg { return nil } + +func (p *Tracer) SockOps() []ebpfcommon.SockOps { return nil } + func (p *Tracer) RecordInstrumentedLib(uint64) {} func (p *Tracer) UnlinkInstrumentedLib(uint64) {} diff --git a/pkg/internal/ebpf/httptracer/httptracer_notlinux.go b/pkg/internal/ebpf/httptracer/httptracer_notlinux.go index ddc46a26f..dd9d9e6a5 100644 --- a/pkg/internal/ebpf/httptracer/httptracer_notlinux.go +++ b/pkg/internal/ebpf/httptracer/httptracer_notlinux.go @@ -31,6 +31,8 @@ func (p *Tracer) KProbes() map[string]ebpfcommon.FunctionPrograms { r func (p *Tracer) UProbes() map[string]map[string]ebpfcommon.FunctionPrograms { return nil } func (p *Tracer) Tracepoints() map[string]ebpfcommon.FunctionPrograms { return nil } func (p *Tracer) SocketFilters() []*ebpf.Program { return nil } +func (p *Tracer) SockMsgs() []ebpfcommon.SockMsg { return nil } +func (p *Tracer) SockOps() []ebpfcommon.SockOps { return nil } func (p *Tracer) RecordInstrumentedLib(_ uint64) {} func (p *Tracer) UnlinkInstrumentedLib(_ uint64) {} func (p *Tracer) AlreadyInstrumentedLib(_ uint64) bool { return false } diff --git a/pkg/internal/ebpf/instrumenter.go b/pkg/internal/ebpf/instrumenter.go index 3f0f94096..d4c47a088 100644 --- a/pkg/internal/ebpf/instrumenter.go +++ b/pkg/internal/ebpf/instrumenter.go @@ -7,12 +7,14 @@ import ( "fmt" "log/slog" "os" + "path/filepath" "strings" "syscall" "unsafe" "github.com/cilium/ebpf" "github.com/cilium/ebpf/link" + v2 "github.com/containers/common/pkg/cgroupv2" "github.com/prometheus/procfs" "golang.org/x/sys/unix" @@ -291,6 +293,51 @@ func attachSocketFilter(filter *ebpf.Program) (int, error) { return -1, err } +func (i *instrumenter) sockmsgs(p Tracer) error { + for _, sockmsg := range p.SockMsgs() { + slog.Info("Attaching sock msgs") + err := link.RawAttachProgram(link.RawAttachProgramOptions{ + Target: sockmsg.MapFD, + Program: sockmsg.Program, + Attach: sockmsg.AttachAs, + }) + + if err != nil { + return fmt.Errorf("attaching sock_msg program: %w", err) + } + + p.AddCloser(&sockmsg) + } + + return nil +} + +func (i *instrumenter) sockops(p Tracer) error { + for _, sockops := range p.SockOps() { + cgroupPath, err := getCgroupPath() + + if err != nil { + return fmt.Errorf("error getting cgroup path for sockops: %w", err) + } + + slog.Info("Attaching sock ops", "path", cgroupPath) + + sockops.SockopsCgroup, err = link.AttachCgroup(link.CgroupOptions{ + Path: cgroupPath, + Attach: sockops.AttachAs, + Program: sockops.Program, + }) + + if err != nil { + return fmt.Errorf("attaching sockops program: %w", err) + } + + p.AddCloser(&sockops) + } + + return nil +} + func (i *instrumenter) tracepoints(p KprobesTracer) error { for sfunc, sprobes := range p.Tracepoints() { slog.Debug("going to add syscall", "function", sfunc, "probes", sprobes) @@ -349,3 +396,13 @@ func htons(a uint16) uint16 { func processMaps(pid int32) ([]*procfs.ProcMap, error) { return exec.FindLibMaps(pid) } + +func getCgroupPath() (string, error) { + cgroupPath := "/sys/fs/cgroup" + + enabled, err := v2.Enabled() + if !enabled { + cgroupPath = filepath.Join(cgroupPath, "unified") + } + return cgroupPath, err +} diff --git a/pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.go b/pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.go index 3a99bdfec..b98794077 100644 --- a/pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.go +++ b/pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.go @@ -12,6 +12,18 @@ import ( "github.com/cilium/ebpf" ) +type bpfCallProtocolArgsT struct { + PidConn bpfPidConnectionInfoT + SmallBuf [24]uint8 + U_buf uint64 + BytesLen int32 + Ssl uint8 + Direction uint8 + OrigDport uint16 + PacketType uint8 + _ [7]byte +} + type bpfConnectionInfoT struct { S_addr [16]uint8 D_addr [16]uint8 @@ -29,6 +41,48 @@ type bpfGoAddrKeyT struct { Addr uint64 } +type bpfHttp2ConnStreamT struct { + PidConn bpfPidConnectionInfoT + StreamId uint32 +} + +type bpfHttp2GrpcRequestT struct { + Flags uint8 + _ [3]byte + ConnInfo bpfConnectionInfoT + Data [256]uint8 + RetData [64]uint8 + Type uint8 + _ [3]byte + Len int32 + StartMonotimeNs uint64 + EndMonotimeNs uint64 + Pid struct { + HostPid uint32 + UserPid uint32 + Ns uint32 + } + Ssl uint8 + _ [3]byte + Tp struct { + TraceId [16]uint8 + SpanId [8]uint8 + ParentId [8]uint8 + Ts uint64 + Flags uint8 + _ [7]byte + } +} + +type bpfHttpConnectionMetadataT struct { + Pid struct { + HostPid uint32 + UserPid uint32 + Ns uint32 + } + Type uint8 +} + type bpfHttpFuncInvocationT struct { StartMonotimeNs uint64 Tp struct { @@ -43,9 +97,8 @@ type bpfHttpFuncInvocationT struct { type bpfHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -72,11 +125,71 @@ type bpfHttpInfoT struct { _ [4]byte } +type bpfMsgBufferT struct { + Buf [256]uint8 + Pos uint16 +} + +type bpfMsgDataT struct{ Buf [1024]uint8 } + type bpfPidConnectionInfoT struct { Conn bpfConnectionInfoT Pid uint32 } +type bpfPidKeyT struct { + Pid uint32 + Ns uint32 +} + +type bpfSslArgsT struct { + Ssl uint64 + Buf uint64 + LenPtr uint64 +} + +type bpfSslPidConnectionInfoT struct { + P_conn bpfPidConnectionInfoT + OrigDport uint16 + _ [2]byte +} + +type bpfTcHttpCtx struct { + Offset uint32 + Seen uint32 + Written uint32 +} + +type bpfTcpReqT struct { + Flags uint8 + _ [3]byte + ConnInfo bpfConnectionInfoT + StartMonotimeNs uint64 + EndMonotimeNs uint64 + Buf [256]uint8 + Rbuf [128]uint8 + Len uint32 + RespLen uint32 + Ssl uint8 + Direction uint8 + Pid struct { + HostPid uint32 + UserPid uint32 + Ns uint32 + } + _ [2]byte + Tp struct { + TraceId [16]uint8 + SpanId [8]uint8 + ParentId [8]uint8 + Ts uint64 + Flags uint8 + _ [7]byte + } +} + +type bpfTpBufDataT struct{ Buf [256]uint8 } + type bpfTpInfoPidT struct { Tp struct { TraceId [16]uint8 @@ -91,6 +204,11 @@ type bpfTpInfoPidT struct { _ [3]byte } +type bpfTraceKeyT struct { + P_key bpfPidKeyT + ExtraId uint64 +} + // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) @@ -132,21 +250,61 @@ type bpfSpecs struct { // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { - AppEgress *ebpf.ProgramSpec `ebpf:"app_egress"` - AppIngress *ebpf.ProgramSpec `ebpf:"app_ingress"` + AppEgress *ebpf.ProgramSpec `ebpf:"app_egress"` + AppIngress *ebpf.ProgramSpec `ebpf:"app_ingress"` + AsyncReset *ebpf.ProgramSpec `ebpf:"async_reset"` + EmitAsyncInit *ebpf.ProgramSpec `ebpf:"emit_async_init"` + PacketExtender *ebpf.ProgramSpec `ebpf:"packet_extender"` + ProtocolHttp *ebpf.ProgramSpec `ebpf:"protocol_http"` + ProtocolHttp2 *ebpf.ProgramSpec `ebpf:"protocol_http2"` + ProtocolTcp *ebpf.ProgramSpec `ebpf:"protocol_tcp"` + SockmapTracker *ebpf.ProgramSpec `ebpf:"sockmap_tracker"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { + ActiveNodejsIds *ebpf.MapSpec `ebpf:"active_nodejs_ids"` + ActiveSslConnections *ebpf.MapSpec `ebpf:"active_ssl_connections"` + ActiveSslHandshakes *ebpf.MapSpec `ebpf:"active_ssl_handshakes"` + ActiveSslReadArgs *ebpf.MapSpec `ebpf:"active_ssl_read_args"` + ActiveSslWriteArgs *ebpf.MapSpec `ebpf:"active_ssl_write_args"` + AsyncResetArgs *ebpf.MapSpec `ebpf:"async_reset_args"` + BufMem *ebpf.MapSpec `ebpf:"buf_mem"` + ClientConnectInfo *ebpf.MapSpec `ebpf:"client_connect_info"` + CloneMap *ebpf.MapSpec `ebpf:"clone_map"` + ConnectionMetaMem *ebpf.MapSpec `ebpf:"connection_meta_mem"` + Events *ebpf.MapSpec `ebpf:"events"` + Http2InfoMem *ebpf.MapSpec `ebpf:"http2_info_mem"` + HttpInfoMem *ebpf.MapSpec `ebpf:"http_info_mem"` IncomingTraceMap *ebpf.MapSpec `ebpf:"incoming_trace_map"` + IovecMem *ebpf.MapSpec `ebpf:"iovec_mem"` + JumpTable *ebpf.MapSpec `ebpf:"jump_table"` + MsgBuffers *ebpf.MapSpec `ebpf:"msg_buffers"` + NodejsParentMap *ebpf.MapSpec `ebpf:"nodejs_parent_map"` OngoingGoHttp *ebpf.MapSpec `ebpf:"ongoing_go_http"` OngoingHttp *ebpf.MapSpec `ebpf:"ongoing_http"` + OngoingHttp2Connections *ebpf.MapSpec `ebpf:"ongoing_http2_connections"` + OngoingHttp2Grpc *ebpf.MapSpec `ebpf:"ongoing_http2_grpc"` OngoingHttpClientRequests *ebpf.MapSpec `ebpf:"ongoing_http_client_requests"` OngoingHttpFallback *ebpf.MapSpec `ebpf:"ongoing_http_fallback"` + OngoingTcpReq *ebpf.MapSpec `ebpf:"ongoing_tcp_req"` OutgoingTraceMap *ebpf.MapSpec `ebpf:"outgoing_trace_map"` + PidCache *ebpf.MapSpec `ebpf:"pid_cache"` + PidTidToConn *ebpf.MapSpec `ebpf:"pid_tid_to_conn"` + ProtocolArgsMem *ebpf.MapSpec `ebpf:"protocol_args_mem"` + ServerTraces *ebpf.MapSpec `ebpf:"server_traces"` + SockDir *ebpf.MapSpec `ebpf:"sock_dir"` + SslToConn *ebpf.MapSpec `ebpf:"ssl_to_conn"` + SslToPidTid *ebpf.MapSpec `ebpf:"ssl_to_pid_tid"` + TcHttpCtxMap *ebpf.MapSpec `ebpf:"tc_http_ctx_map"` + TcpReqMem *ebpf.MapSpec `ebpf:"tcp_req_mem"` + TpBufMemory *ebpf.MapSpec `ebpf:"tp_buf_memory"` + TpCharBufMem *ebpf.MapSpec `ebpf:"tp_char_buf_mem"` + TpInfoMem *ebpf.MapSpec `ebpf:"tp_info_mem"` TraceMap *ebpf.MapSpec `ebpf:"trace_map"` + ValidPids *ebpf.MapSpec `ebpf:"valid_pids"` } // bpfObjects contains all objects after they have been loaded into the kernel. @@ -168,24 +326,90 @@ func (o *bpfObjects) Close() error { // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { + ActiveNodejsIds *ebpf.Map `ebpf:"active_nodejs_ids"` + ActiveSslConnections *ebpf.Map `ebpf:"active_ssl_connections"` + ActiveSslHandshakes *ebpf.Map `ebpf:"active_ssl_handshakes"` + ActiveSslReadArgs *ebpf.Map `ebpf:"active_ssl_read_args"` + ActiveSslWriteArgs *ebpf.Map `ebpf:"active_ssl_write_args"` + AsyncResetArgs *ebpf.Map `ebpf:"async_reset_args"` + BufMem *ebpf.Map `ebpf:"buf_mem"` + ClientConnectInfo *ebpf.Map `ebpf:"client_connect_info"` + CloneMap *ebpf.Map `ebpf:"clone_map"` + ConnectionMetaMem *ebpf.Map `ebpf:"connection_meta_mem"` + Events *ebpf.Map `ebpf:"events"` + Http2InfoMem *ebpf.Map `ebpf:"http2_info_mem"` + HttpInfoMem *ebpf.Map `ebpf:"http_info_mem"` IncomingTraceMap *ebpf.Map `ebpf:"incoming_trace_map"` + IovecMem *ebpf.Map `ebpf:"iovec_mem"` + JumpTable *ebpf.Map `ebpf:"jump_table"` + MsgBuffers *ebpf.Map `ebpf:"msg_buffers"` + NodejsParentMap *ebpf.Map `ebpf:"nodejs_parent_map"` OngoingGoHttp *ebpf.Map `ebpf:"ongoing_go_http"` OngoingHttp *ebpf.Map `ebpf:"ongoing_http"` + OngoingHttp2Connections *ebpf.Map `ebpf:"ongoing_http2_connections"` + OngoingHttp2Grpc *ebpf.Map `ebpf:"ongoing_http2_grpc"` OngoingHttpClientRequests *ebpf.Map `ebpf:"ongoing_http_client_requests"` OngoingHttpFallback *ebpf.Map `ebpf:"ongoing_http_fallback"` + OngoingTcpReq *ebpf.Map `ebpf:"ongoing_tcp_req"` OutgoingTraceMap *ebpf.Map `ebpf:"outgoing_trace_map"` + PidCache *ebpf.Map `ebpf:"pid_cache"` + PidTidToConn *ebpf.Map `ebpf:"pid_tid_to_conn"` + ProtocolArgsMem *ebpf.Map `ebpf:"protocol_args_mem"` + ServerTraces *ebpf.Map `ebpf:"server_traces"` + SockDir *ebpf.Map `ebpf:"sock_dir"` + SslToConn *ebpf.Map `ebpf:"ssl_to_conn"` + SslToPidTid *ebpf.Map `ebpf:"ssl_to_pid_tid"` + TcHttpCtxMap *ebpf.Map `ebpf:"tc_http_ctx_map"` + TcpReqMem *ebpf.Map `ebpf:"tcp_req_mem"` + TpBufMemory *ebpf.Map `ebpf:"tp_buf_memory"` + TpCharBufMem *ebpf.Map `ebpf:"tp_char_buf_mem"` + TpInfoMem *ebpf.Map `ebpf:"tp_info_mem"` TraceMap *ebpf.Map `ebpf:"trace_map"` + ValidPids *ebpf.Map `ebpf:"valid_pids"` } func (m *bpfMaps) Close() error { return _BpfClose( + m.ActiveNodejsIds, + m.ActiveSslConnections, + m.ActiveSslHandshakes, + m.ActiveSslReadArgs, + m.ActiveSslWriteArgs, + m.AsyncResetArgs, + m.BufMem, + m.ClientConnectInfo, + m.CloneMap, + m.ConnectionMetaMem, + m.Events, + m.Http2InfoMem, + m.HttpInfoMem, m.IncomingTraceMap, + m.IovecMem, + m.JumpTable, + m.MsgBuffers, + m.NodejsParentMap, m.OngoingGoHttp, m.OngoingHttp, + m.OngoingHttp2Connections, + m.OngoingHttp2Grpc, m.OngoingHttpClientRequests, m.OngoingHttpFallback, + m.OngoingTcpReq, m.OutgoingTraceMap, + m.PidCache, + m.PidTidToConn, + m.ProtocolArgsMem, + m.ServerTraces, + m.SockDir, + m.SslToConn, + m.SslToPidTid, + m.TcHttpCtxMap, + m.TcpReqMem, + m.TpBufMemory, + m.TpCharBufMem, + m.TpInfoMem, m.TraceMap, + m.ValidPids, ) } @@ -193,14 +417,28 @@ func (m *bpfMaps) Close() error { // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { - AppEgress *ebpf.Program `ebpf:"app_egress"` - AppIngress *ebpf.Program `ebpf:"app_ingress"` + AppEgress *ebpf.Program `ebpf:"app_egress"` + AppIngress *ebpf.Program `ebpf:"app_ingress"` + AsyncReset *ebpf.Program `ebpf:"async_reset"` + EmitAsyncInit *ebpf.Program `ebpf:"emit_async_init"` + PacketExtender *ebpf.Program `ebpf:"packet_extender"` + ProtocolHttp *ebpf.Program `ebpf:"protocol_http"` + ProtocolHttp2 *ebpf.Program `ebpf:"protocol_http2"` + ProtocolTcp *ebpf.Program `ebpf:"protocol_tcp"` + SockmapTracker *ebpf.Program `ebpf:"sockmap_tracker"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.AppEgress, p.AppIngress, + p.AsyncReset, + p.EmitAsyncInit, + p.PacketExtender, + p.ProtocolHttp, + p.ProtocolHttp2, + p.ProtocolTcp, + p.SockmapTracker, ) } diff --git a/pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.o b/pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.o index 6a6a239ad..be184fcb8 100644 --- a/pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.o +++ b/pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:021f8eea8cdbc5820ec1fc157da66359145394ea96fcad95484885dab73bd84a -size 36952 +oid sha256:f5747dab4356d695284625813fa95d969713644187e84f5f99d8bcb925fcca24 +size 196072 diff --git a/pkg/internal/ebpf/tctracer/bpf_debug_arm64_bpfel.go b/pkg/internal/ebpf/tctracer/bpf_debug_arm64_bpfel.go index cc2f2a7fc..a2900ffca 100644 --- a/pkg/internal/ebpf/tctracer/bpf_debug_arm64_bpfel.go +++ b/pkg/internal/ebpf/tctracer/bpf_debug_arm64_bpfel.go @@ -12,6 +12,18 @@ import ( "github.com/cilium/ebpf" ) +type bpf_debugCallProtocolArgsT struct { + PidConn bpf_debugPidConnectionInfoT + SmallBuf [24]uint8 + U_buf uint64 + BytesLen int32 + Ssl uint8 + Direction uint8 + OrigDport uint16 + PacketType uint8 + _ [7]byte +} + type bpf_debugConnectionInfoT struct { S_addr [16]uint8 D_addr [16]uint8 @@ -29,6 +41,48 @@ type bpf_debugGoAddrKeyT struct { Addr uint64 } +type bpf_debugHttp2ConnStreamT struct { + PidConn bpf_debugPidConnectionInfoT + StreamId uint32 +} + +type bpf_debugHttp2GrpcRequestT struct { + Flags uint8 + _ [3]byte + ConnInfo bpf_debugConnectionInfoT + Data [256]uint8 + RetData [64]uint8 + Type uint8 + _ [3]byte + Len int32 + StartMonotimeNs uint64 + EndMonotimeNs uint64 + Pid struct { + HostPid uint32 + UserPid uint32 + Ns uint32 + } + Ssl uint8 + _ [3]byte + Tp struct { + TraceId [16]uint8 + SpanId [8]uint8 + ParentId [8]uint8 + Ts uint64 + Flags uint8 + _ [7]byte + } +} + +type bpf_debugHttpConnectionMetadataT struct { + Pid struct { + HostPid uint32 + UserPid uint32 + Ns uint32 + } + Type uint8 +} + type bpf_debugHttpFuncInvocationT struct { StartMonotimeNs uint64 Tp struct { @@ -43,9 +97,8 @@ type bpf_debugHttpFuncInvocationT struct { type bpf_debugHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_debugConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -72,11 +125,71 @@ type bpf_debugHttpInfoT struct { _ [4]byte } +type bpf_debugMsgBufferT struct { + Buf [256]uint8 + Pos uint16 +} + +type bpf_debugMsgDataT struct{ Buf [1024]uint8 } + type bpf_debugPidConnectionInfoT struct { Conn bpf_debugConnectionInfoT Pid uint32 } +type bpf_debugPidKeyT struct { + Pid uint32 + Ns uint32 +} + +type bpf_debugSslArgsT struct { + Ssl uint64 + Buf uint64 + LenPtr uint64 +} + +type bpf_debugSslPidConnectionInfoT struct { + P_conn bpf_debugPidConnectionInfoT + OrigDport uint16 + _ [2]byte +} + +type bpf_debugTcHttpCtx struct { + Offset uint32 + Seen uint32 + Written uint32 +} + +type bpf_debugTcpReqT struct { + Flags uint8 + _ [3]byte + ConnInfo bpf_debugConnectionInfoT + StartMonotimeNs uint64 + EndMonotimeNs uint64 + Buf [256]uint8 + Rbuf [128]uint8 + Len uint32 + RespLen uint32 + Ssl uint8 + Direction uint8 + Pid struct { + HostPid uint32 + UserPid uint32 + Ns uint32 + } + _ [2]byte + Tp struct { + TraceId [16]uint8 + SpanId [8]uint8 + ParentId [8]uint8 + Ts uint64 + Flags uint8 + _ [7]byte + } +} + +type bpf_debugTpBufDataT struct{ Buf [256]uint8 } + type bpf_debugTpInfoPidT struct { Tp struct { TraceId [16]uint8 @@ -91,6 +204,11 @@ type bpf_debugTpInfoPidT struct { _ [3]byte } +type bpf_debugTraceKeyT struct { + P_key bpf_debugPidKeyT + ExtraId uint64 +} + // loadBpf_debug returns the embedded CollectionSpec for bpf_debug. func loadBpf_debug() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_Bpf_debugBytes) @@ -132,22 +250,62 @@ type bpf_debugSpecs struct { // // It can be passed ebpf.CollectionSpec.Assign. type bpf_debugProgramSpecs struct { - AppEgress *ebpf.ProgramSpec `ebpf:"app_egress"` - AppIngress *ebpf.ProgramSpec `ebpf:"app_ingress"` + AppEgress *ebpf.ProgramSpec `ebpf:"app_egress"` + AppIngress *ebpf.ProgramSpec `ebpf:"app_ingress"` + AsyncReset *ebpf.ProgramSpec `ebpf:"async_reset"` + EmitAsyncInit *ebpf.ProgramSpec `ebpf:"emit_async_init"` + PacketExtender *ebpf.ProgramSpec `ebpf:"packet_extender"` + ProtocolHttp *ebpf.ProgramSpec `ebpf:"protocol_http"` + ProtocolHttp2 *ebpf.ProgramSpec `ebpf:"protocol_http2"` + ProtocolTcp *ebpf.ProgramSpec `ebpf:"protocol_tcp"` + SockmapTracker *ebpf.ProgramSpec `ebpf:"sockmap_tracker"` } // bpf_debugMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpf_debugMapSpecs struct { + ActiveNodejsIds *ebpf.MapSpec `ebpf:"active_nodejs_ids"` + ActiveSslConnections *ebpf.MapSpec `ebpf:"active_ssl_connections"` + ActiveSslHandshakes *ebpf.MapSpec `ebpf:"active_ssl_handshakes"` + ActiveSslReadArgs *ebpf.MapSpec `ebpf:"active_ssl_read_args"` + ActiveSslWriteArgs *ebpf.MapSpec `ebpf:"active_ssl_write_args"` + AsyncResetArgs *ebpf.MapSpec `ebpf:"async_reset_args"` + BufMem *ebpf.MapSpec `ebpf:"buf_mem"` + ClientConnectInfo *ebpf.MapSpec `ebpf:"client_connect_info"` + CloneMap *ebpf.MapSpec `ebpf:"clone_map"` + ConnectionMetaMem *ebpf.MapSpec `ebpf:"connection_meta_mem"` DebugEvents *ebpf.MapSpec `ebpf:"debug_events"` + Events *ebpf.MapSpec `ebpf:"events"` + Http2InfoMem *ebpf.MapSpec `ebpf:"http2_info_mem"` + HttpInfoMem *ebpf.MapSpec `ebpf:"http_info_mem"` IncomingTraceMap *ebpf.MapSpec `ebpf:"incoming_trace_map"` + IovecMem *ebpf.MapSpec `ebpf:"iovec_mem"` + JumpTable *ebpf.MapSpec `ebpf:"jump_table"` + MsgBuffers *ebpf.MapSpec `ebpf:"msg_buffers"` + NodejsParentMap *ebpf.MapSpec `ebpf:"nodejs_parent_map"` OngoingGoHttp *ebpf.MapSpec `ebpf:"ongoing_go_http"` OngoingHttp *ebpf.MapSpec `ebpf:"ongoing_http"` + OngoingHttp2Connections *ebpf.MapSpec `ebpf:"ongoing_http2_connections"` + OngoingHttp2Grpc *ebpf.MapSpec `ebpf:"ongoing_http2_grpc"` OngoingHttpClientRequests *ebpf.MapSpec `ebpf:"ongoing_http_client_requests"` OngoingHttpFallback *ebpf.MapSpec `ebpf:"ongoing_http_fallback"` + OngoingTcpReq *ebpf.MapSpec `ebpf:"ongoing_tcp_req"` OutgoingTraceMap *ebpf.MapSpec `ebpf:"outgoing_trace_map"` + PidCache *ebpf.MapSpec `ebpf:"pid_cache"` + PidTidToConn *ebpf.MapSpec `ebpf:"pid_tid_to_conn"` + ProtocolArgsMem *ebpf.MapSpec `ebpf:"protocol_args_mem"` + ServerTraces *ebpf.MapSpec `ebpf:"server_traces"` + SockDir *ebpf.MapSpec `ebpf:"sock_dir"` + SslToConn *ebpf.MapSpec `ebpf:"ssl_to_conn"` + SslToPidTid *ebpf.MapSpec `ebpf:"ssl_to_pid_tid"` + TcHttpCtxMap *ebpf.MapSpec `ebpf:"tc_http_ctx_map"` + TcpReqMem *ebpf.MapSpec `ebpf:"tcp_req_mem"` + TpBufMemory *ebpf.MapSpec `ebpf:"tp_buf_memory"` + TpCharBufMem *ebpf.MapSpec `ebpf:"tp_char_buf_mem"` + TpInfoMem *ebpf.MapSpec `ebpf:"tp_info_mem"` TraceMap *ebpf.MapSpec `ebpf:"trace_map"` + ValidPids *ebpf.MapSpec `ebpf:"valid_pids"` } // bpf_debugObjects contains all objects after they have been loaded into the kernel. @@ -169,26 +327,92 @@ func (o *bpf_debugObjects) Close() error { // // It can be passed to loadBpf_debugObjects or ebpf.CollectionSpec.LoadAndAssign. type bpf_debugMaps struct { + ActiveNodejsIds *ebpf.Map `ebpf:"active_nodejs_ids"` + ActiveSslConnections *ebpf.Map `ebpf:"active_ssl_connections"` + ActiveSslHandshakes *ebpf.Map `ebpf:"active_ssl_handshakes"` + ActiveSslReadArgs *ebpf.Map `ebpf:"active_ssl_read_args"` + ActiveSslWriteArgs *ebpf.Map `ebpf:"active_ssl_write_args"` + AsyncResetArgs *ebpf.Map `ebpf:"async_reset_args"` + BufMem *ebpf.Map `ebpf:"buf_mem"` + ClientConnectInfo *ebpf.Map `ebpf:"client_connect_info"` + CloneMap *ebpf.Map `ebpf:"clone_map"` + ConnectionMetaMem *ebpf.Map `ebpf:"connection_meta_mem"` DebugEvents *ebpf.Map `ebpf:"debug_events"` + Events *ebpf.Map `ebpf:"events"` + Http2InfoMem *ebpf.Map `ebpf:"http2_info_mem"` + HttpInfoMem *ebpf.Map `ebpf:"http_info_mem"` IncomingTraceMap *ebpf.Map `ebpf:"incoming_trace_map"` + IovecMem *ebpf.Map `ebpf:"iovec_mem"` + JumpTable *ebpf.Map `ebpf:"jump_table"` + MsgBuffers *ebpf.Map `ebpf:"msg_buffers"` + NodejsParentMap *ebpf.Map `ebpf:"nodejs_parent_map"` OngoingGoHttp *ebpf.Map `ebpf:"ongoing_go_http"` OngoingHttp *ebpf.Map `ebpf:"ongoing_http"` + OngoingHttp2Connections *ebpf.Map `ebpf:"ongoing_http2_connections"` + OngoingHttp2Grpc *ebpf.Map `ebpf:"ongoing_http2_grpc"` OngoingHttpClientRequests *ebpf.Map `ebpf:"ongoing_http_client_requests"` OngoingHttpFallback *ebpf.Map `ebpf:"ongoing_http_fallback"` + OngoingTcpReq *ebpf.Map `ebpf:"ongoing_tcp_req"` OutgoingTraceMap *ebpf.Map `ebpf:"outgoing_trace_map"` + PidCache *ebpf.Map `ebpf:"pid_cache"` + PidTidToConn *ebpf.Map `ebpf:"pid_tid_to_conn"` + ProtocolArgsMem *ebpf.Map `ebpf:"protocol_args_mem"` + ServerTraces *ebpf.Map `ebpf:"server_traces"` + SockDir *ebpf.Map `ebpf:"sock_dir"` + SslToConn *ebpf.Map `ebpf:"ssl_to_conn"` + SslToPidTid *ebpf.Map `ebpf:"ssl_to_pid_tid"` + TcHttpCtxMap *ebpf.Map `ebpf:"tc_http_ctx_map"` + TcpReqMem *ebpf.Map `ebpf:"tcp_req_mem"` + TpBufMemory *ebpf.Map `ebpf:"tp_buf_memory"` + TpCharBufMem *ebpf.Map `ebpf:"tp_char_buf_mem"` + TpInfoMem *ebpf.Map `ebpf:"tp_info_mem"` TraceMap *ebpf.Map `ebpf:"trace_map"` + ValidPids *ebpf.Map `ebpf:"valid_pids"` } func (m *bpf_debugMaps) Close() error { return _Bpf_debugClose( + m.ActiveNodejsIds, + m.ActiveSslConnections, + m.ActiveSslHandshakes, + m.ActiveSslReadArgs, + m.ActiveSslWriteArgs, + m.AsyncResetArgs, + m.BufMem, + m.ClientConnectInfo, + m.CloneMap, + m.ConnectionMetaMem, m.DebugEvents, + m.Events, + m.Http2InfoMem, + m.HttpInfoMem, m.IncomingTraceMap, + m.IovecMem, + m.JumpTable, + m.MsgBuffers, + m.NodejsParentMap, m.OngoingGoHttp, m.OngoingHttp, + m.OngoingHttp2Connections, + m.OngoingHttp2Grpc, m.OngoingHttpClientRequests, m.OngoingHttpFallback, + m.OngoingTcpReq, m.OutgoingTraceMap, + m.PidCache, + m.PidTidToConn, + m.ProtocolArgsMem, + m.ServerTraces, + m.SockDir, + m.SslToConn, + m.SslToPidTid, + m.TcHttpCtxMap, + m.TcpReqMem, + m.TpBufMemory, + m.TpCharBufMem, + m.TpInfoMem, m.TraceMap, + m.ValidPids, ) } @@ -196,14 +420,28 @@ func (m *bpf_debugMaps) Close() error { // // It can be passed to loadBpf_debugObjects or ebpf.CollectionSpec.LoadAndAssign. type bpf_debugPrograms struct { - AppEgress *ebpf.Program `ebpf:"app_egress"` - AppIngress *ebpf.Program `ebpf:"app_ingress"` + AppEgress *ebpf.Program `ebpf:"app_egress"` + AppIngress *ebpf.Program `ebpf:"app_ingress"` + AsyncReset *ebpf.Program `ebpf:"async_reset"` + EmitAsyncInit *ebpf.Program `ebpf:"emit_async_init"` + PacketExtender *ebpf.Program `ebpf:"packet_extender"` + ProtocolHttp *ebpf.Program `ebpf:"protocol_http"` + ProtocolHttp2 *ebpf.Program `ebpf:"protocol_http2"` + ProtocolTcp *ebpf.Program `ebpf:"protocol_tcp"` + SockmapTracker *ebpf.Program `ebpf:"sockmap_tracker"` } func (p *bpf_debugPrograms) Close() error { return _Bpf_debugClose( p.AppEgress, p.AppIngress, + p.AsyncReset, + p.EmitAsyncInit, + p.PacketExtender, + p.ProtocolHttp, + p.ProtocolHttp2, + p.ProtocolTcp, + p.SockmapTracker, ) } diff --git a/pkg/internal/ebpf/tctracer/bpf_debug_arm64_bpfel.o b/pkg/internal/ebpf/tctracer/bpf_debug_arm64_bpfel.o index 6a28492bf..6ae0efd64 100644 --- a/pkg/internal/ebpf/tctracer/bpf_debug_arm64_bpfel.o +++ b/pkg/internal/ebpf/tctracer/bpf_debug_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97f7b0a0a3ee1a22e21840fee91dcfa2d3ae9664c0cb33922fa8930f08890cff -size 78760 +oid sha256:997d89ed40e3c3524e84a1ed8e45086e3d2252828838b0ce80fdf48f0021958f +size 339384 diff --git a/pkg/internal/ebpf/tctracer/bpf_debug_x86_bpfel.go b/pkg/internal/ebpf/tctracer/bpf_debug_x86_bpfel.go index 3c389e356..9ebc167f3 100644 --- a/pkg/internal/ebpf/tctracer/bpf_debug_x86_bpfel.go +++ b/pkg/internal/ebpf/tctracer/bpf_debug_x86_bpfel.go @@ -12,6 +12,18 @@ import ( "github.com/cilium/ebpf" ) +type bpf_debugCallProtocolArgsT struct { + PidConn bpf_debugPidConnectionInfoT + SmallBuf [24]uint8 + U_buf uint64 + BytesLen int32 + Ssl uint8 + Direction uint8 + OrigDport uint16 + PacketType uint8 + _ [7]byte +} + type bpf_debugConnectionInfoT struct { S_addr [16]uint8 D_addr [16]uint8 @@ -29,6 +41,48 @@ type bpf_debugGoAddrKeyT struct { Addr uint64 } +type bpf_debugHttp2ConnStreamT struct { + PidConn bpf_debugPidConnectionInfoT + StreamId uint32 +} + +type bpf_debugHttp2GrpcRequestT struct { + Flags uint8 + _ [3]byte + ConnInfo bpf_debugConnectionInfoT + Data [256]uint8 + RetData [64]uint8 + Type uint8 + _ [3]byte + Len int32 + StartMonotimeNs uint64 + EndMonotimeNs uint64 + Pid struct { + HostPid uint32 + UserPid uint32 + Ns uint32 + } + Ssl uint8 + _ [3]byte + Tp struct { + TraceId [16]uint8 + SpanId [8]uint8 + ParentId [8]uint8 + Ts uint64 + Flags uint8 + _ [7]byte + } +} + +type bpf_debugHttpConnectionMetadataT struct { + Pid struct { + HostPid uint32 + UserPid uint32 + Ns uint32 + } + Type uint8 +} + type bpf_debugHttpFuncInvocationT struct { StartMonotimeNs uint64 Tp struct { @@ -43,9 +97,8 @@ type bpf_debugHttpFuncInvocationT struct { type bpf_debugHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpf_debugConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -72,11 +125,71 @@ type bpf_debugHttpInfoT struct { _ [4]byte } +type bpf_debugMsgBufferT struct { + Buf [256]uint8 + Pos uint16 +} + +type bpf_debugMsgDataT struct{ Buf [1024]uint8 } + type bpf_debugPidConnectionInfoT struct { Conn bpf_debugConnectionInfoT Pid uint32 } +type bpf_debugPidKeyT struct { + Pid uint32 + Ns uint32 +} + +type bpf_debugSslArgsT struct { + Ssl uint64 + Buf uint64 + LenPtr uint64 +} + +type bpf_debugSslPidConnectionInfoT struct { + P_conn bpf_debugPidConnectionInfoT + OrigDport uint16 + _ [2]byte +} + +type bpf_debugTcHttpCtx struct { + Offset uint32 + Seen uint32 + Written uint32 +} + +type bpf_debugTcpReqT struct { + Flags uint8 + _ [3]byte + ConnInfo bpf_debugConnectionInfoT + StartMonotimeNs uint64 + EndMonotimeNs uint64 + Buf [256]uint8 + Rbuf [128]uint8 + Len uint32 + RespLen uint32 + Ssl uint8 + Direction uint8 + Pid struct { + HostPid uint32 + UserPid uint32 + Ns uint32 + } + _ [2]byte + Tp struct { + TraceId [16]uint8 + SpanId [8]uint8 + ParentId [8]uint8 + Ts uint64 + Flags uint8 + _ [7]byte + } +} + +type bpf_debugTpBufDataT struct{ Buf [256]uint8 } + type bpf_debugTpInfoPidT struct { Tp struct { TraceId [16]uint8 @@ -91,6 +204,11 @@ type bpf_debugTpInfoPidT struct { _ [3]byte } +type bpf_debugTraceKeyT struct { + P_key bpf_debugPidKeyT + ExtraId uint64 +} + // loadBpf_debug returns the embedded CollectionSpec for bpf_debug. func loadBpf_debug() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_Bpf_debugBytes) @@ -132,22 +250,62 @@ type bpf_debugSpecs struct { // // It can be passed ebpf.CollectionSpec.Assign. type bpf_debugProgramSpecs struct { - AppEgress *ebpf.ProgramSpec `ebpf:"app_egress"` - AppIngress *ebpf.ProgramSpec `ebpf:"app_ingress"` + AppEgress *ebpf.ProgramSpec `ebpf:"app_egress"` + AppIngress *ebpf.ProgramSpec `ebpf:"app_ingress"` + AsyncReset *ebpf.ProgramSpec `ebpf:"async_reset"` + EmitAsyncInit *ebpf.ProgramSpec `ebpf:"emit_async_init"` + PacketExtender *ebpf.ProgramSpec `ebpf:"packet_extender"` + ProtocolHttp *ebpf.ProgramSpec `ebpf:"protocol_http"` + ProtocolHttp2 *ebpf.ProgramSpec `ebpf:"protocol_http2"` + ProtocolTcp *ebpf.ProgramSpec `ebpf:"protocol_tcp"` + SockmapTracker *ebpf.ProgramSpec `ebpf:"sockmap_tracker"` } // bpf_debugMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpf_debugMapSpecs struct { + ActiveNodejsIds *ebpf.MapSpec `ebpf:"active_nodejs_ids"` + ActiveSslConnections *ebpf.MapSpec `ebpf:"active_ssl_connections"` + ActiveSslHandshakes *ebpf.MapSpec `ebpf:"active_ssl_handshakes"` + ActiveSslReadArgs *ebpf.MapSpec `ebpf:"active_ssl_read_args"` + ActiveSslWriteArgs *ebpf.MapSpec `ebpf:"active_ssl_write_args"` + AsyncResetArgs *ebpf.MapSpec `ebpf:"async_reset_args"` + BufMem *ebpf.MapSpec `ebpf:"buf_mem"` + ClientConnectInfo *ebpf.MapSpec `ebpf:"client_connect_info"` + CloneMap *ebpf.MapSpec `ebpf:"clone_map"` + ConnectionMetaMem *ebpf.MapSpec `ebpf:"connection_meta_mem"` DebugEvents *ebpf.MapSpec `ebpf:"debug_events"` + Events *ebpf.MapSpec `ebpf:"events"` + Http2InfoMem *ebpf.MapSpec `ebpf:"http2_info_mem"` + HttpInfoMem *ebpf.MapSpec `ebpf:"http_info_mem"` IncomingTraceMap *ebpf.MapSpec `ebpf:"incoming_trace_map"` + IovecMem *ebpf.MapSpec `ebpf:"iovec_mem"` + JumpTable *ebpf.MapSpec `ebpf:"jump_table"` + MsgBuffers *ebpf.MapSpec `ebpf:"msg_buffers"` + NodejsParentMap *ebpf.MapSpec `ebpf:"nodejs_parent_map"` OngoingGoHttp *ebpf.MapSpec `ebpf:"ongoing_go_http"` OngoingHttp *ebpf.MapSpec `ebpf:"ongoing_http"` + OngoingHttp2Connections *ebpf.MapSpec `ebpf:"ongoing_http2_connections"` + OngoingHttp2Grpc *ebpf.MapSpec `ebpf:"ongoing_http2_grpc"` OngoingHttpClientRequests *ebpf.MapSpec `ebpf:"ongoing_http_client_requests"` OngoingHttpFallback *ebpf.MapSpec `ebpf:"ongoing_http_fallback"` + OngoingTcpReq *ebpf.MapSpec `ebpf:"ongoing_tcp_req"` OutgoingTraceMap *ebpf.MapSpec `ebpf:"outgoing_trace_map"` + PidCache *ebpf.MapSpec `ebpf:"pid_cache"` + PidTidToConn *ebpf.MapSpec `ebpf:"pid_tid_to_conn"` + ProtocolArgsMem *ebpf.MapSpec `ebpf:"protocol_args_mem"` + ServerTraces *ebpf.MapSpec `ebpf:"server_traces"` + SockDir *ebpf.MapSpec `ebpf:"sock_dir"` + SslToConn *ebpf.MapSpec `ebpf:"ssl_to_conn"` + SslToPidTid *ebpf.MapSpec `ebpf:"ssl_to_pid_tid"` + TcHttpCtxMap *ebpf.MapSpec `ebpf:"tc_http_ctx_map"` + TcpReqMem *ebpf.MapSpec `ebpf:"tcp_req_mem"` + TpBufMemory *ebpf.MapSpec `ebpf:"tp_buf_memory"` + TpCharBufMem *ebpf.MapSpec `ebpf:"tp_char_buf_mem"` + TpInfoMem *ebpf.MapSpec `ebpf:"tp_info_mem"` TraceMap *ebpf.MapSpec `ebpf:"trace_map"` + ValidPids *ebpf.MapSpec `ebpf:"valid_pids"` } // bpf_debugObjects contains all objects after they have been loaded into the kernel. @@ -169,26 +327,92 @@ func (o *bpf_debugObjects) Close() error { // // It can be passed to loadBpf_debugObjects or ebpf.CollectionSpec.LoadAndAssign. type bpf_debugMaps struct { + ActiveNodejsIds *ebpf.Map `ebpf:"active_nodejs_ids"` + ActiveSslConnections *ebpf.Map `ebpf:"active_ssl_connections"` + ActiveSslHandshakes *ebpf.Map `ebpf:"active_ssl_handshakes"` + ActiveSslReadArgs *ebpf.Map `ebpf:"active_ssl_read_args"` + ActiveSslWriteArgs *ebpf.Map `ebpf:"active_ssl_write_args"` + AsyncResetArgs *ebpf.Map `ebpf:"async_reset_args"` + BufMem *ebpf.Map `ebpf:"buf_mem"` + ClientConnectInfo *ebpf.Map `ebpf:"client_connect_info"` + CloneMap *ebpf.Map `ebpf:"clone_map"` + ConnectionMetaMem *ebpf.Map `ebpf:"connection_meta_mem"` DebugEvents *ebpf.Map `ebpf:"debug_events"` + Events *ebpf.Map `ebpf:"events"` + Http2InfoMem *ebpf.Map `ebpf:"http2_info_mem"` + HttpInfoMem *ebpf.Map `ebpf:"http_info_mem"` IncomingTraceMap *ebpf.Map `ebpf:"incoming_trace_map"` + IovecMem *ebpf.Map `ebpf:"iovec_mem"` + JumpTable *ebpf.Map `ebpf:"jump_table"` + MsgBuffers *ebpf.Map `ebpf:"msg_buffers"` + NodejsParentMap *ebpf.Map `ebpf:"nodejs_parent_map"` OngoingGoHttp *ebpf.Map `ebpf:"ongoing_go_http"` OngoingHttp *ebpf.Map `ebpf:"ongoing_http"` + OngoingHttp2Connections *ebpf.Map `ebpf:"ongoing_http2_connections"` + OngoingHttp2Grpc *ebpf.Map `ebpf:"ongoing_http2_grpc"` OngoingHttpClientRequests *ebpf.Map `ebpf:"ongoing_http_client_requests"` OngoingHttpFallback *ebpf.Map `ebpf:"ongoing_http_fallback"` + OngoingTcpReq *ebpf.Map `ebpf:"ongoing_tcp_req"` OutgoingTraceMap *ebpf.Map `ebpf:"outgoing_trace_map"` + PidCache *ebpf.Map `ebpf:"pid_cache"` + PidTidToConn *ebpf.Map `ebpf:"pid_tid_to_conn"` + ProtocolArgsMem *ebpf.Map `ebpf:"protocol_args_mem"` + ServerTraces *ebpf.Map `ebpf:"server_traces"` + SockDir *ebpf.Map `ebpf:"sock_dir"` + SslToConn *ebpf.Map `ebpf:"ssl_to_conn"` + SslToPidTid *ebpf.Map `ebpf:"ssl_to_pid_tid"` + TcHttpCtxMap *ebpf.Map `ebpf:"tc_http_ctx_map"` + TcpReqMem *ebpf.Map `ebpf:"tcp_req_mem"` + TpBufMemory *ebpf.Map `ebpf:"tp_buf_memory"` + TpCharBufMem *ebpf.Map `ebpf:"tp_char_buf_mem"` + TpInfoMem *ebpf.Map `ebpf:"tp_info_mem"` TraceMap *ebpf.Map `ebpf:"trace_map"` + ValidPids *ebpf.Map `ebpf:"valid_pids"` } func (m *bpf_debugMaps) Close() error { return _Bpf_debugClose( + m.ActiveNodejsIds, + m.ActiveSslConnections, + m.ActiveSslHandshakes, + m.ActiveSslReadArgs, + m.ActiveSslWriteArgs, + m.AsyncResetArgs, + m.BufMem, + m.ClientConnectInfo, + m.CloneMap, + m.ConnectionMetaMem, m.DebugEvents, + m.Events, + m.Http2InfoMem, + m.HttpInfoMem, m.IncomingTraceMap, + m.IovecMem, + m.JumpTable, + m.MsgBuffers, + m.NodejsParentMap, m.OngoingGoHttp, m.OngoingHttp, + m.OngoingHttp2Connections, + m.OngoingHttp2Grpc, m.OngoingHttpClientRequests, m.OngoingHttpFallback, + m.OngoingTcpReq, m.OutgoingTraceMap, + m.PidCache, + m.PidTidToConn, + m.ProtocolArgsMem, + m.ServerTraces, + m.SockDir, + m.SslToConn, + m.SslToPidTid, + m.TcHttpCtxMap, + m.TcpReqMem, + m.TpBufMemory, + m.TpCharBufMem, + m.TpInfoMem, m.TraceMap, + m.ValidPids, ) } @@ -196,14 +420,28 @@ func (m *bpf_debugMaps) Close() error { // // It can be passed to loadBpf_debugObjects or ebpf.CollectionSpec.LoadAndAssign. type bpf_debugPrograms struct { - AppEgress *ebpf.Program `ebpf:"app_egress"` - AppIngress *ebpf.Program `ebpf:"app_ingress"` + AppEgress *ebpf.Program `ebpf:"app_egress"` + AppIngress *ebpf.Program `ebpf:"app_ingress"` + AsyncReset *ebpf.Program `ebpf:"async_reset"` + EmitAsyncInit *ebpf.Program `ebpf:"emit_async_init"` + PacketExtender *ebpf.Program `ebpf:"packet_extender"` + ProtocolHttp *ebpf.Program `ebpf:"protocol_http"` + ProtocolHttp2 *ebpf.Program `ebpf:"protocol_http2"` + ProtocolTcp *ebpf.Program `ebpf:"protocol_tcp"` + SockmapTracker *ebpf.Program `ebpf:"sockmap_tracker"` } func (p *bpf_debugPrograms) Close() error { return _Bpf_debugClose( p.AppEgress, p.AppIngress, + p.AsyncReset, + p.EmitAsyncInit, + p.PacketExtender, + p.ProtocolHttp, + p.ProtocolHttp2, + p.ProtocolTcp, + p.SockmapTracker, ) } diff --git a/pkg/internal/ebpf/tctracer/bpf_debug_x86_bpfel.o b/pkg/internal/ebpf/tctracer/bpf_debug_x86_bpfel.o index 87641db49..e498a88dd 100644 --- a/pkg/internal/ebpf/tctracer/bpf_debug_x86_bpfel.o +++ b/pkg/internal/ebpf/tctracer/bpf_debug_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:16b80c465485dca1666e4d6fec3999e5c07329c07701587ef2d22145b399663e -size 78544 +oid sha256:d8fd88b462099505abf146e9c5a42c0ef446776c01ea37f0e6324162127188b0 +size 340688 diff --git a/pkg/internal/ebpf/tctracer/bpf_x86_bpfel.go b/pkg/internal/ebpf/tctracer/bpf_x86_bpfel.go index 45b813e1f..83cccce57 100644 --- a/pkg/internal/ebpf/tctracer/bpf_x86_bpfel.go +++ b/pkg/internal/ebpf/tctracer/bpf_x86_bpfel.go @@ -12,6 +12,18 @@ import ( "github.com/cilium/ebpf" ) +type bpfCallProtocolArgsT struct { + PidConn bpfPidConnectionInfoT + SmallBuf [24]uint8 + U_buf uint64 + BytesLen int32 + Ssl uint8 + Direction uint8 + OrigDport uint16 + PacketType uint8 + _ [7]byte +} + type bpfConnectionInfoT struct { S_addr [16]uint8 D_addr [16]uint8 @@ -29,6 +41,48 @@ type bpfGoAddrKeyT struct { Addr uint64 } +type bpfHttp2ConnStreamT struct { + PidConn bpfPidConnectionInfoT + StreamId uint32 +} + +type bpfHttp2GrpcRequestT struct { + Flags uint8 + _ [3]byte + ConnInfo bpfConnectionInfoT + Data [256]uint8 + RetData [64]uint8 + Type uint8 + _ [3]byte + Len int32 + StartMonotimeNs uint64 + EndMonotimeNs uint64 + Pid struct { + HostPid uint32 + UserPid uint32 + Ns uint32 + } + Ssl uint8 + _ [3]byte + Tp struct { + TraceId [16]uint8 + SpanId [8]uint8 + ParentId [8]uint8 + Ts uint64 + Flags uint8 + _ [7]byte + } +} + +type bpfHttpConnectionMetadataT struct { + Pid struct { + HostPid uint32 + UserPid uint32 + Ns uint32 + } + Type uint8 +} + type bpfHttpFuncInvocationT struct { StartMonotimeNs uint64 Tp struct { @@ -43,9 +97,8 @@ type bpfHttpFuncInvocationT struct { type bpfHttpInfoT struct { Flags uint8 - _ [1]byte + _ [3]byte ConnInfo bpfConnectionInfoT - _ [2]byte StartMonotimeNs uint64 EndMonotimeNs uint64 Buf [192]uint8 @@ -72,11 +125,71 @@ type bpfHttpInfoT struct { _ [4]byte } +type bpfMsgBufferT struct { + Buf [256]uint8 + Pos uint16 +} + +type bpfMsgDataT struct{ Buf [1024]uint8 } + type bpfPidConnectionInfoT struct { Conn bpfConnectionInfoT Pid uint32 } +type bpfPidKeyT struct { + Pid uint32 + Ns uint32 +} + +type bpfSslArgsT struct { + Ssl uint64 + Buf uint64 + LenPtr uint64 +} + +type bpfSslPidConnectionInfoT struct { + P_conn bpfPidConnectionInfoT + OrigDport uint16 + _ [2]byte +} + +type bpfTcHttpCtx struct { + Offset uint32 + Seen uint32 + Written uint32 +} + +type bpfTcpReqT struct { + Flags uint8 + _ [3]byte + ConnInfo bpfConnectionInfoT + StartMonotimeNs uint64 + EndMonotimeNs uint64 + Buf [256]uint8 + Rbuf [128]uint8 + Len uint32 + RespLen uint32 + Ssl uint8 + Direction uint8 + Pid struct { + HostPid uint32 + UserPid uint32 + Ns uint32 + } + _ [2]byte + Tp struct { + TraceId [16]uint8 + SpanId [8]uint8 + ParentId [8]uint8 + Ts uint64 + Flags uint8 + _ [7]byte + } +} + +type bpfTpBufDataT struct{ Buf [256]uint8 } + type bpfTpInfoPidT struct { Tp struct { TraceId [16]uint8 @@ -91,6 +204,11 @@ type bpfTpInfoPidT struct { _ [3]byte } +type bpfTraceKeyT struct { + P_key bpfPidKeyT + ExtraId uint64 +} + // loadBpf returns the embedded CollectionSpec for bpf. func loadBpf() (*ebpf.CollectionSpec, error) { reader := bytes.NewReader(_BpfBytes) @@ -132,21 +250,61 @@ type bpfSpecs struct { // // It can be passed ebpf.CollectionSpec.Assign. type bpfProgramSpecs struct { - AppEgress *ebpf.ProgramSpec `ebpf:"app_egress"` - AppIngress *ebpf.ProgramSpec `ebpf:"app_ingress"` + AppEgress *ebpf.ProgramSpec `ebpf:"app_egress"` + AppIngress *ebpf.ProgramSpec `ebpf:"app_ingress"` + AsyncReset *ebpf.ProgramSpec `ebpf:"async_reset"` + EmitAsyncInit *ebpf.ProgramSpec `ebpf:"emit_async_init"` + PacketExtender *ebpf.ProgramSpec `ebpf:"packet_extender"` + ProtocolHttp *ebpf.ProgramSpec `ebpf:"protocol_http"` + ProtocolHttp2 *ebpf.ProgramSpec `ebpf:"protocol_http2"` + ProtocolTcp *ebpf.ProgramSpec `ebpf:"protocol_tcp"` + SockmapTracker *ebpf.ProgramSpec `ebpf:"sockmap_tracker"` } // bpfMapSpecs contains maps before they are loaded into the kernel. // // It can be passed ebpf.CollectionSpec.Assign. type bpfMapSpecs struct { + ActiveNodejsIds *ebpf.MapSpec `ebpf:"active_nodejs_ids"` + ActiveSslConnections *ebpf.MapSpec `ebpf:"active_ssl_connections"` + ActiveSslHandshakes *ebpf.MapSpec `ebpf:"active_ssl_handshakes"` + ActiveSslReadArgs *ebpf.MapSpec `ebpf:"active_ssl_read_args"` + ActiveSslWriteArgs *ebpf.MapSpec `ebpf:"active_ssl_write_args"` + AsyncResetArgs *ebpf.MapSpec `ebpf:"async_reset_args"` + BufMem *ebpf.MapSpec `ebpf:"buf_mem"` + ClientConnectInfo *ebpf.MapSpec `ebpf:"client_connect_info"` + CloneMap *ebpf.MapSpec `ebpf:"clone_map"` + ConnectionMetaMem *ebpf.MapSpec `ebpf:"connection_meta_mem"` + Events *ebpf.MapSpec `ebpf:"events"` + Http2InfoMem *ebpf.MapSpec `ebpf:"http2_info_mem"` + HttpInfoMem *ebpf.MapSpec `ebpf:"http_info_mem"` IncomingTraceMap *ebpf.MapSpec `ebpf:"incoming_trace_map"` + IovecMem *ebpf.MapSpec `ebpf:"iovec_mem"` + JumpTable *ebpf.MapSpec `ebpf:"jump_table"` + MsgBuffers *ebpf.MapSpec `ebpf:"msg_buffers"` + NodejsParentMap *ebpf.MapSpec `ebpf:"nodejs_parent_map"` OngoingGoHttp *ebpf.MapSpec `ebpf:"ongoing_go_http"` OngoingHttp *ebpf.MapSpec `ebpf:"ongoing_http"` + OngoingHttp2Connections *ebpf.MapSpec `ebpf:"ongoing_http2_connections"` + OngoingHttp2Grpc *ebpf.MapSpec `ebpf:"ongoing_http2_grpc"` OngoingHttpClientRequests *ebpf.MapSpec `ebpf:"ongoing_http_client_requests"` OngoingHttpFallback *ebpf.MapSpec `ebpf:"ongoing_http_fallback"` + OngoingTcpReq *ebpf.MapSpec `ebpf:"ongoing_tcp_req"` OutgoingTraceMap *ebpf.MapSpec `ebpf:"outgoing_trace_map"` + PidCache *ebpf.MapSpec `ebpf:"pid_cache"` + PidTidToConn *ebpf.MapSpec `ebpf:"pid_tid_to_conn"` + ProtocolArgsMem *ebpf.MapSpec `ebpf:"protocol_args_mem"` + ServerTraces *ebpf.MapSpec `ebpf:"server_traces"` + SockDir *ebpf.MapSpec `ebpf:"sock_dir"` + SslToConn *ebpf.MapSpec `ebpf:"ssl_to_conn"` + SslToPidTid *ebpf.MapSpec `ebpf:"ssl_to_pid_tid"` + TcHttpCtxMap *ebpf.MapSpec `ebpf:"tc_http_ctx_map"` + TcpReqMem *ebpf.MapSpec `ebpf:"tcp_req_mem"` + TpBufMemory *ebpf.MapSpec `ebpf:"tp_buf_memory"` + TpCharBufMem *ebpf.MapSpec `ebpf:"tp_char_buf_mem"` + TpInfoMem *ebpf.MapSpec `ebpf:"tp_info_mem"` TraceMap *ebpf.MapSpec `ebpf:"trace_map"` + ValidPids *ebpf.MapSpec `ebpf:"valid_pids"` } // bpfObjects contains all objects after they have been loaded into the kernel. @@ -168,24 +326,90 @@ func (o *bpfObjects) Close() error { // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfMaps struct { + ActiveNodejsIds *ebpf.Map `ebpf:"active_nodejs_ids"` + ActiveSslConnections *ebpf.Map `ebpf:"active_ssl_connections"` + ActiveSslHandshakes *ebpf.Map `ebpf:"active_ssl_handshakes"` + ActiveSslReadArgs *ebpf.Map `ebpf:"active_ssl_read_args"` + ActiveSslWriteArgs *ebpf.Map `ebpf:"active_ssl_write_args"` + AsyncResetArgs *ebpf.Map `ebpf:"async_reset_args"` + BufMem *ebpf.Map `ebpf:"buf_mem"` + ClientConnectInfo *ebpf.Map `ebpf:"client_connect_info"` + CloneMap *ebpf.Map `ebpf:"clone_map"` + ConnectionMetaMem *ebpf.Map `ebpf:"connection_meta_mem"` + Events *ebpf.Map `ebpf:"events"` + Http2InfoMem *ebpf.Map `ebpf:"http2_info_mem"` + HttpInfoMem *ebpf.Map `ebpf:"http_info_mem"` IncomingTraceMap *ebpf.Map `ebpf:"incoming_trace_map"` + IovecMem *ebpf.Map `ebpf:"iovec_mem"` + JumpTable *ebpf.Map `ebpf:"jump_table"` + MsgBuffers *ebpf.Map `ebpf:"msg_buffers"` + NodejsParentMap *ebpf.Map `ebpf:"nodejs_parent_map"` OngoingGoHttp *ebpf.Map `ebpf:"ongoing_go_http"` OngoingHttp *ebpf.Map `ebpf:"ongoing_http"` + OngoingHttp2Connections *ebpf.Map `ebpf:"ongoing_http2_connections"` + OngoingHttp2Grpc *ebpf.Map `ebpf:"ongoing_http2_grpc"` OngoingHttpClientRequests *ebpf.Map `ebpf:"ongoing_http_client_requests"` OngoingHttpFallback *ebpf.Map `ebpf:"ongoing_http_fallback"` + OngoingTcpReq *ebpf.Map `ebpf:"ongoing_tcp_req"` OutgoingTraceMap *ebpf.Map `ebpf:"outgoing_trace_map"` + PidCache *ebpf.Map `ebpf:"pid_cache"` + PidTidToConn *ebpf.Map `ebpf:"pid_tid_to_conn"` + ProtocolArgsMem *ebpf.Map `ebpf:"protocol_args_mem"` + ServerTraces *ebpf.Map `ebpf:"server_traces"` + SockDir *ebpf.Map `ebpf:"sock_dir"` + SslToConn *ebpf.Map `ebpf:"ssl_to_conn"` + SslToPidTid *ebpf.Map `ebpf:"ssl_to_pid_tid"` + TcHttpCtxMap *ebpf.Map `ebpf:"tc_http_ctx_map"` + TcpReqMem *ebpf.Map `ebpf:"tcp_req_mem"` + TpBufMemory *ebpf.Map `ebpf:"tp_buf_memory"` + TpCharBufMem *ebpf.Map `ebpf:"tp_char_buf_mem"` + TpInfoMem *ebpf.Map `ebpf:"tp_info_mem"` TraceMap *ebpf.Map `ebpf:"trace_map"` + ValidPids *ebpf.Map `ebpf:"valid_pids"` } func (m *bpfMaps) Close() error { return _BpfClose( + m.ActiveNodejsIds, + m.ActiveSslConnections, + m.ActiveSslHandshakes, + m.ActiveSslReadArgs, + m.ActiveSslWriteArgs, + m.AsyncResetArgs, + m.BufMem, + m.ClientConnectInfo, + m.CloneMap, + m.ConnectionMetaMem, + m.Events, + m.Http2InfoMem, + m.HttpInfoMem, m.IncomingTraceMap, + m.IovecMem, + m.JumpTable, + m.MsgBuffers, + m.NodejsParentMap, m.OngoingGoHttp, m.OngoingHttp, + m.OngoingHttp2Connections, + m.OngoingHttp2Grpc, m.OngoingHttpClientRequests, m.OngoingHttpFallback, + m.OngoingTcpReq, m.OutgoingTraceMap, + m.PidCache, + m.PidTidToConn, + m.ProtocolArgsMem, + m.ServerTraces, + m.SockDir, + m.SslToConn, + m.SslToPidTid, + m.TcHttpCtxMap, + m.TcpReqMem, + m.TpBufMemory, + m.TpCharBufMem, + m.TpInfoMem, m.TraceMap, + m.ValidPids, ) } @@ -193,14 +417,28 @@ func (m *bpfMaps) Close() error { // // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. type bpfPrograms struct { - AppEgress *ebpf.Program `ebpf:"app_egress"` - AppIngress *ebpf.Program `ebpf:"app_ingress"` + AppEgress *ebpf.Program `ebpf:"app_egress"` + AppIngress *ebpf.Program `ebpf:"app_ingress"` + AsyncReset *ebpf.Program `ebpf:"async_reset"` + EmitAsyncInit *ebpf.Program `ebpf:"emit_async_init"` + PacketExtender *ebpf.Program `ebpf:"packet_extender"` + ProtocolHttp *ebpf.Program `ebpf:"protocol_http"` + ProtocolHttp2 *ebpf.Program `ebpf:"protocol_http2"` + ProtocolTcp *ebpf.Program `ebpf:"protocol_tcp"` + SockmapTracker *ebpf.Program `ebpf:"sockmap_tracker"` } func (p *bpfPrograms) Close() error { return _BpfClose( p.AppEgress, p.AppIngress, + p.AsyncReset, + p.EmitAsyncInit, + p.PacketExtender, + p.ProtocolHttp, + p.ProtocolHttp2, + p.ProtocolTcp, + p.SockmapTracker, ) } diff --git a/pkg/internal/ebpf/tctracer/bpf_x86_bpfel.o b/pkg/internal/ebpf/tctracer/bpf_x86_bpfel.o index 2253d66fb..948ad02ac 100644 --- a/pkg/internal/ebpf/tctracer/bpf_x86_bpfel.o +++ b/pkg/internal/ebpf/tctracer/bpf_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e782c2881f64ff39008afa683bbb0fc6cf5c4b78fd9d6f88d27cb74b638d3007 -size 36736 +oid sha256:3a6291a056ec861690c2c54e35e14e78ddf5f27e32a0df51513d60d30b10a348 +size 197384 diff --git a/pkg/internal/ebpf/tctracer/tctracer.go b/pkg/internal/ebpf/tctracer/tctracer.go index 9bc552857..881173a51 100644 --- a/pkg/internal/ebpf/tctracer/tctracer.go +++ b/pkg/internal/ebpf/tctracer/tctracer.go @@ -6,6 +6,7 @@ import ( "context" "io" "log/slog" + "unsafe" "github.com/cilium/ebpf" "github.com/vishvananda/netlink" @@ -58,7 +59,21 @@ func (p *Tracer) Load() (*ebpf.CollectionSpec, error) { func (p *Tracer) SetupTailCalls() {} func (p *Tracer) Constants() map[string]any { - return map[string]any{} + m := make(map[string]any, 2) + + m["wakeup_data_bytes"] = uint32(p.cfg.EBPF.WakeupLen) * uint32(unsafe.Sizeof(ebpfcommon.HTTPRequestTrace{})) + + // The eBPF side does some basic filtering of events that do not belong to + // processes which we monitor. We filter more accurately in the userspace, but + // for performance reasons we enable the PID based filtering in eBPF. + // This must match httpfltr.go, otherwise we get partial events in userspace. + if !p.cfg.Discovery.SystemWide && !p.cfg.Discovery.BPFPidFilterOff { + m["filter_pids"] = int32(1) + } else { + m["filter_pids"] = int32(0) + } + + return m } func (p *Tracer) RegisterOffsets(_ *exec.FileInfo, _ *goexec.Offsets) {} @@ -91,6 +106,25 @@ func (p *Tracer) SocketFilters() []*ebpf.Program { return nil } +func (p *Tracer) SockMsgs() []ebpfcommon.SockMsg { + return []ebpfcommon.SockMsg{ + { + Program: p.bpfObjects.PacketExtender, + MapFD: p.bpfObjects.bpfMaps.SockDir.FD(), + AttachAs: ebpf.AttachSkMsgVerdict, + }, + } +} + +func (p *Tracer) SockOps() []ebpfcommon.SockOps { + return []ebpfcommon.SockOps{ + { + Program: p.bpfObjects.SockmapTracker, + AttachAs: ebpf.AttachCGroupSockOps, + }, + } +} + func (p *Tracer) RecordInstrumentedLib(uint64) {} func (p *Tracer) UnlinkInstrumentedLib(uint64) {} diff --git a/pkg/internal/ebpf/tctracer/tctracer_notlinux.go b/pkg/internal/ebpf/tctracer/tctracer_notlinux.go index f08f27e3e..4d1ca6789 100644 --- a/pkg/internal/ebpf/tctracer/tctracer_notlinux.go +++ b/pkg/internal/ebpf/tctracer/tctracer_notlinux.go @@ -31,6 +31,8 @@ func (p *Tracer) KProbes() map[string]ebpfcommon.FunctionPrograms { r func (p *Tracer) UProbes() map[string]map[string]ebpfcommon.FunctionPrograms { return nil } func (p *Tracer) Tracepoints() map[string]ebpfcommon.FunctionPrograms { return nil } func (p *Tracer) SocketFilters() []*ebpf.Program { return nil } +func (p *Tracer) SockMsgs() []ebpfcommon.SockMsg { return nil } +func (p *Tracer) SockOps() []ebpfcommon.SockOps { return nil } func (p *Tracer) RecordInstrumentedLib(_ uint64) {} func (p *Tracer) UnlinkInstrumentedLib(_ uint64) {} func (p *Tracer) AlreadyInstrumentedLib(_ uint64) bool { return false } diff --git a/pkg/internal/ebpf/tracer.go b/pkg/internal/ebpf/tracer.go index 4d3495f3a..9facb31fd 100644 --- a/pkg/internal/ebpf/tracer.go +++ b/pkg/internal/ebpf/tracer.go @@ -78,6 +78,12 @@ type Tracer interface { // SocketFilters returns a list of programs that need to be loaded as a // generic eBPF socket filter SocketFilters() []*ebpf.Program + // SockMsgs returns a list of programs that need to be loaded as a + // BPF_PROG_TYPE_SK_MSG eBPF programs + SockMsgs() []ebpfcommon.SockMsg + // SockOps returns a list of programs that need to be loaded as a + // BPF_PROG_TYPE_SOCK_OPS eBPF programs + SockOps() []ebpfcommon.SockOps // Sets up Linux traffic control egress/ingress SetupTC() // Probes can potentially instrument a shared library among multiple executables diff --git a/pkg/internal/ebpf/tracer_linux.go b/pkg/internal/ebpf/tracer_linux.go index aaa7f7d2b..7243574ff 100644 --- a/pkg/internal/ebpf/tracer_linux.go +++ b/pkg/internal/ebpf/tracer_linux.go @@ -203,6 +203,18 @@ func (pt *ProcessTracer) loadTracer(p Tracer, log *slog.Logger) error { return err } + // Sock_msg support + if err := i.sockmsgs(p); err != nil { + printVerifierErrorInfo(err) + return err + } + + // Sockops support + if err := i.sockops(p); err != nil { + printVerifierErrorInfo(err) + return err + } + return nil } diff --git a/pkg/internal/ebpf/watcher/bpf_arm64_bpfel.o b/pkg/internal/ebpf/watcher/bpf_arm64_bpfel.o index cd6d838b9..2c971376b 100644 --- a/pkg/internal/ebpf/watcher/bpf_arm64_bpfel.o +++ b/pkg/internal/ebpf/watcher/bpf_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e141ab06f68d4f191f0e3e505784bb6a61b78c30819966df7c7c301b34ed59c2 -size 6912 +oid sha256:c6a74e1816189d2c7557733f11ae019088e0a1bba4269e543e81ce558bc7418c +size 7016 diff --git a/pkg/internal/ebpf/watcher/bpf_debug_arm64_bpfel.o b/pkg/internal/ebpf/watcher/bpf_debug_arm64_bpfel.o index 570d08a84..4ee42b6f2 100644 --- a/pkg/internal/ebpf/watcher/bpf_debug_arm64_bpfel.o +++ b/pkg/internal/ebpf/watcher/bpf_debug_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c91beff8f0dca86a2b5156051ff0ed436390d582aec75f508ac38667daed8839 -size 8864 +oid sha256:850a4ee9ea5cb1dd7c49fb79093940fa958d21b643f858a16466a4acfc7073b4 +size 8976 diff --git a/pkg/internal/ebpf/watcher/bpf_debug_x86_bpfel.o b/pkg/internal/ebpf/watcher/bpf_debug_x86_bpfel.o index e5b7fd9c6..7239fb8cc 100644 --- a/pkg/internal/ebpf/watcher/bpf_debug_x86_bpfel.o +++ b/pkg/internal/ebpf/watcher/bpf_debug_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ac5deb1999a2e133818760b3683d755e34ef0abc5a106dc904d4c3ed15f94f86 -size 8728 +oid sha256:ffb9ed51a621991434d07342beb262be86883439d2feaed35ad05dcb9717d3f0 +size 8832 diff --git a/pkg/internal/ebpf/watcher/bpf_x86_bpfel.o b/pkg/internal/ebpf/watcher/bpf_x86_bpfel.o index 826e57682..49fa97b9b 100644 --- a/pkg/internal/ebpf/watcher/bpf_x86_bpfel.o +++ b/pkg/internal/ebpf/watcher/bpf_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92eb613aa18fd1fb18a59297ede757f6f444f13014fadfb7262c2d2869e23e06 -size 6768 +oid sha256:6c15c6bf114566ff8ed4aec9234c1c1ac4cf2244429c4028d59df11658321f23 +size 6872 diff --git a/pkg/internal/netolly/ebpf/net_arm64_bpfel.o b/pkg/internal/netolly/ebpf/net_arm64_bpfel.o index 29755eeae..c26a2a9fd 100644 --- a/pkg/internal/netolly/ebpf/net_arm64_bpfel.o +++ b/pkg/internal/netolly/ebpf/net_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f1926ab4ebb8e41fa9d2d5f9bc0bfc351cec56b6d48ba22be01beb4335d2c3e +oid sha256:b2d5bbea40924b5d2e5954cb9ee69a8ec5008777747516586225d77ae3b5fa26 size 31232 diff --git a/test/integration/docker-compose-multiexec-host.yml b/test/integration/docker-compose-multiexec-host.yml index 8cc5872d1..73a7085ce 100644 --- a/test/integration/docker-compose-multiexec-host.yml +++ b/test/integration/docker-compose-multiexec-host.yml @@ -123,6 +123,7 @@ services: volumes: - ./configs/:/configs - ./system/sys/kernel/security:/sys/kernel/security + - /sys/fs/cgroup:/sys/fs/cgroup - ../../testoutput:/coverage - ../../testoutput/run-multi:/var/run/beyla image: hatest-autoinstrumenter diff --git a/test/integration/multiprocess_test.go b/test/integration/multiprocess_test.go index b82362708..557428f1d 100644 --- a/test/integration/multiprocess_test.go +++ b/test/integration/multiprocess_test.go @@ -106,11 +106,9 @@ func TestMultiProcess(t *testing.T) { } func TestMultiProcessAppTC(t *testing.T) { - t.Skip("doesn't work yet, we need a different approach") - - compose, err := docker.ComposeSuite("docker-compose-multiexec.yml", path.Join(pathOutput, "test-suite-multiexec-tc.log")) + compose, err := docker.ComposeSuite("docker-compose-multiexec-host.yml", path.Join(pathOutput, "test-suite-multiexec-tc.log")) // we are going to setup discovery directly in the configuration file - compose.Env = append(compose.Env, `BEYLA_BPF_DISABLE_BLACK_BOX_CP=1`, `BEYLA_BPF_TC_CP=1`) + compose.Env = append(compose.Env, `BEYLA_BPF_DISABLE_BLACK_BOX_CP=1`, `BEYLA_BPF_TC_CP=1`, `BEYLA_BPF_TRACK_REQUEST_HEADERS=1`) require.NoError(t, err) require.NoError(t, compose.Up()) @@ -118,10 +116,6 @@ func TestMultiProcessAppTC(t *testing.T) { t.Run("Nested traces with kprobes: rust -> java -> node -> go -> python -> rails", func(t *testing.T) { testNestedHTTPTracesKProbes(t) }) - - t.Run("Nested traces with kprobes: SSL node python rails", func(t *testing.T) { - testNestedHTTPSTracesKProbes(t) - }) } require.NoError(t, compose.Close()) } diff --git a/third_party_licenses.csv b/third_party_licenses.csv index f5b1c8037..142391081 100644 --- a/third_party_licenses.csv +++ b/third_party_licenses.csv @@ -8,6 +8,7 @@ github.com/caarlos0/env/v9,https://github.com/grafana/beyla/blob/HEAD/vendor/git github.com/cenkalti/backoff/v4,https://github.com/grafana/beyla/blob/HEAD/vendor/github.com/cenkalti/backoff/v4/LICENSE,MIT github.com/cespare/xxhash/v2,https://github.com/grafana/beyla/blob/HEAD/vendor/github.com/cespare/xxhash/v2/LICENSE.txt,MIT github.com/cilium/ebpf,https://github.com/grafana/beyla/blob/HEAD/vendor/github.com/cilium/ebpf/LICENSE,MIT +github.com/containers/common/pkg/cgroupv2,https://github.com/grafana/beyla/blob/HEAD/vendor/github.com/containers/common/LICENSE,Apache-2.0 github.com/davecgh/go-spew/spew,https://github.com/grafana/beyla/blob/HEAD/vendor/github.com/davecgh/go-spew/LICENSE,ISC github.com/emicklei/go-restful/v3,https://github.com/grafana/beyla/blob/HEAD/vendor/github.com/emicklei/go-restful/v3/LICENSE,MIT github.com/evanphx/json-patch/v5,https://github.com/grafana/beyla/blob/HEAD/vendor/github.com/evanphx/json-patch/v5/LICENSE,BSD-3-Clause diff --git a/vendor/github.com/containers/common/LICENSE b/vendor/github.com/containers/common/LICENSE new file mode 100644 index 000000000..8dada3eda --- /dev/null +++ b/vendor/github.com/containers/common/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/containers/common/pkg/cgroupv2/cgroups_linux.go b/vendor/github.com/containers/common/pkg/cgroupv2/cgroups_linux.go new file mode 100644 index 000000000..749c89932 --- /dev/null +++ b/vendor/github.com/containers/common/pkg/cgroupv2/cgroups_linux.go @@ -0,0 +1,27 @@ +package cgroupv2 + +import ( + "sync" + "syscall" + + "golang.org/x/sys/unix" +) + +var ( + isCgroupV2Once sync.Once + isCgroupV2 bool + isCgroupV2Err error +) + +// Enabled returns whether we are running on cgroup v2 +func Enabled() (bool, error) { + isCgroupV2Once.Do(func() { + var st syscall.Statfs_t + if err := syscall.Statfs("/sys/fs/cgroup", &st); err != nil { + isCgroupV2, isCgroupV2Err = false, err + } else { + isCgroupV2, isCgroupV2Err = st.Type == unix.CGROUP2_SUPER_MAGIC, nil + } + }) + return isCgroupV2, isCgroupV2Err +} diff --git a/vendor/github.com/containers/common/pkg/cgroupv2/cgroups_unsupported.go b/vendor/github.com/containers/common/pkg/cgroupv2/cgroups_unsupported.go new file mode 100644 index 000000000..56269aa42 --- /dev/null +++ b/vendor/github.com/containers/common/pkg/cgroupv2/cgroups_unsupported.go @@ -0,0 +1,8 @@ +//go:build !linux + +package cgroupv2 + +// Enabled returns whether we are running on cgroup v2 +func Enabled() (bool, error) { + return false, nil +} diff --git a/vendor/github.com/fsnotify/fsnotify/.cirrus.yml b/vendor/github.com/fsnotify/fsnotify/.cirrus.yml index ffc7b992b..f4e7dbf37 100644 --- a/vendor/github.com/fsnotify/fsnotify/.cirrus.yml +++ b/vendor/github.com/fsnotify/fsnotify/.cirrus.yml @@ -1,7 +1,7 @@ freebsd_task: name: 'FreeBSD' freebsd_instance: - image_family: freebsd-13-2 + image_family: freebsd-14-1 install_script: - pkg update -f - pkg install -y go @@ -9,5 +9,6 @@ freebsd_task: # run tests as user "cirrus" instead of root - pw useradd cirrus -m - chown -R cirrus:cirrus . - - FSNOTIFY_BUFFER=4096 sudo --preserve-env=FSNOTIFY_BUFFER -u cirrus go test -parallel 1 -race ./... - - sudo --preserve-env=FSNOTIFY_BUFFER -u cirrus go test -parallel 1 -race ./... + - FSNOTIFY_BUFFER=4096 sudo --preserve-env=FSNOTIFY_BUFFER -u cirrus go test -parallel 1 -race ./... + - sudo --preserve-env=FSNOTIFY_BUFFER -u cirrus go test -parallel 1 -race ./... + - FSNOTIFY_DEBUG=1 sudo --preserve-env=FSNOTIFY_BUFFER -u cirrus go test -parallel 1 -race -v ./... diff --git a/vendor/github.com/fsnotify/fsnotify/.editorconfig b/vendor/github.com/fsnotify/fsnotify/.editorconfig deleted file mode 100644 index fad895851..000000000 --- a/vendor/github.com/fsnotify/fsnotify/.editorconfig +++ /dev/null @@ -1,12 +0,0 @@ -root = true - -[*.go] -indent_style = tab -indent_size = 4 -insert_final_newline = true - -[*.{yml,yaml}] -indent_style = space -indent_size = 2 -insert_final_newline = true -trim_trailing_whitespace = true diff --git a/vendor/github.com/fsnotify/fsnotify/.gitattributes b/vendor/github.com/fsnotify/fsnotify/.gitattributes deleted file mode 100644 index 32f1001be..000000000 --- a/vendor/github.com/fsnotify/fsnotify/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -go.sum linguist-generated diff --git a/vendor/github.com/fsnotify/fsnotify/.gitignore b/vendor/github.com/fsnotify/fsnotify/.gitignore index 391cc076b..daea9dd6d 100644 --- a/vendor/github.com/fsnotify/fsnotify/.gitignore +++ b/vendor/github.com/fsnotify/fsnotify/.gitignore @@ -5,3 +5,6 @@ # Output of go build ./cmd/fsnotify /fsnotify /fsnotify.exe + +/test/kqueue +/test/a.out diff --git a/vendor/github.com/fsnotify/fsnotify/CHANGELOG.md b/vendor/github.com/fsnotify/fsnotify/CHANGELOG.md index e0e575754..fa854785d 100644 --- a/vendor/github.com/fsnotify/fsnotify/CHANGELOG.md +++ b/vendor/github.com/fsnotify/fsnotify/CHANGELOG.md @@ -1,8 +1,36 @@ # Changelog -Unreleased ----------- -Nothing yet. +1.8.0 2023-10-31 +---------------- + +### Additions + +- all: add `FSNOTIFY_DEBUG` to print debug logs to stderr ([#619]) + +### Changes and fixes + +- windows: fix behaviour of `WatchList()` to be consistent with other platforms ([#610]) + +- kqueue: ignore events with Ident=0 ([#590]) + +- kqueue: set O_CLOEXEC to prevent passing file descriptors to children ([#617]) + +- kqueue: emit events as "/path/dir/file" instead of "path/link/file" when watching a symlink ([#625]) + +- inotify: don't send event for IN_DELETE_SELF when also watching the parent ([#620]) + +- inotify: fix panic when calling Remove() in a goroutine ([#650]) + +- fen: allow watching subdirectories of watched directories ([#621]) + +[#590]: https://github.com/fsnotify/fsnotify/pull/590 +[#610]: https://github.com/fsnotify/fsnotify/pull/610 +[#617]: https://github.com/fsnotify/fsnotify/pull/617 +[#619]: https://github.com/fsnotify/fsnotify/pull/619 +[#620]: https://github.com/fsnotify/fsnotify/pull/620 +[#621]: https://github.com/fsnotify/fsnotify/pull/621 +[#625]: https://github.com/fsnotify/fsnotify/pull/625 +[#650]: https://github.com/fsnotify/fsnotify/pull/650 1.7.0 - 2023-10-22 ------------------ diff --git a/vendor/github.com/fsnotify/fsnotify/CONTRIBUTING.md b/vendor/github.com/fsnotify/fsnotify/CONTRIBUTING.md index ea379759d..e4ac2a2ff 100644 --- a/vendor/github.com/fsnotify/fsnotify/CONTRIBUTING.md +++ b/vendor/github.com/fsnotify/fsnotify/CONTRIBUTING.md @@ -1,7 +1,7 @@ Thank you for your interest in contributing to fsnotify! We try to review and merge PRs in a reasonable timeframe, but please be aware that: -- To avoid "wasted" work, please discus changes on the issue tracker first. You +- To avoid "wasted" work, please discuss changes on the issue tracker first. You can just send PRs, but they may end up being rejected for one reason or the other. @@ -20,6 +20,124 @@ platforms. Testing different platforms locally can be done with something like Use the `-short` flag to make the "stress test" run faster. +Writing new tests +----------------- +Scripts in the testdata directory allow creating test cases in a "shell-like" +syntax. The basic format is: + + script + + Output: + desired output + +For example: + + # Create a new empty file with some data. + watch / + echo data >/file + + Output: + create /file + write /file + +Just create a new file to add a new test; select which tests to run with +`-run TestScript/[path]`. + +script +------ +The script is a "shell-like" script: + + cmd arg arg + +Comments are supported with `#`: + + # Comment + cmd arg arg # Comment + +All operations are done in a temp directory; a path like "/foo" is rewritten to +"/tmp/TestFoo/foo". + +Arguments can be quoted with `"` or `'`; there are no escapes and they're +functionally identical right now, but this may change in the future, so best to +assume shell-like rules. + + touch "/file with spaces" + +End-of-line escapes with `\` are not supported. + +### Supported commands + + watch path [ops] # Watch the path, reporting events for it. Nothing is + # watched by default. Optionally a list of ops can be + # given, as with AddWith(path, WithOps(...)). + unwatch path # Stop watching the path. + watchlist n # Assert watchlist length. + + stop # Stop running the script; for debugging. + debug [yes/no] # Enable/disable FSNOTIFY_DEBUG (tests are run in + parallel by default, so -parallel=1 is probably a good + idea). + + touch path + mkdir [-p] dir + ln -s target link # Only ln -s supported. + mkfifo path + mknod dev path + mv src dst + rm [-r] path + chmod mode path # Octal only + sleep time-in-ms + + cat path # Read path (does nothing with the data; just reads it). + echo str >>path # Append "str" to "path". + echo str >path # Truncate "path" and write "str". + + require reason # Skip the test if "reason" is true; "skip" and + skip reason # "require" behave identical; it supports both for + # readability. Possible reasons are: + # + # always Always skip this test. + # symlink Symlinks are supported (requires admin + # permissions on Windows). + # mkfifo Platform doesn't support FIFO named sockets. + # mknod Platform doesn't support device nodes. + + +output +------ +After `Output:` the desired output is given; this is indented by convention, but +that's not required. + +The format of that is: + + # Comment + event path # Comment + + system: + event path + system2: + event path + +Every event is one line, and any whitespace between the event and path are +ignored. The path can optionally be surrounded in ". Anything after a "#" is +ignored. + +Platform-specific tests can be added after GOOS; for example: + + watch / + touch /file + + Output: + # Tested if nothing else matches + create /file + + # Windows-specific test. + windows: + write /file + +You can specify multiple platforms with a comma (e.g. "windows, linux:"). +"kqueue" is a shortcut for all kqueue systems (BSD, macOS). + [goon]: https://github.com/arp242/goon [Vagrant]: https://www.vagrantup.com/ diff --git a/vendor/github.com/fsnotify/fsnotify/backend_fen.go b/vendor/github.com/fsnotify/fsnotify/backend_fen.go index 28497f1dd..c349c326c 100644 --- a/vendor/github.com/fsnotify/fsnotify/backend_fen.go +++ b/vendor/github.com/fsnotify/fsnotify/backend_fen.go @@ -1,8 +1,8 @@ //go:build solaris -// +build solaris -// Note: the documentation on the Watcher type and methods is generated from -// mkdoc.zsh +// FEN backend for illumos (supported) and Solaris (untested, but should work). +// +// See port_create(3c) etc. for docs. https://www.illumos.org/man/3C/port_create package fsnotify @@ -12,150 +12,33 @@ import ( "os" "path/filepath" "sync" + "time" + "github.com/fsnotify/fsnotify/internal" "golang.org/x/sys/unix" ) -// Watcher watches a set of paths, delivering events on a channel. -// -// A watcher should not be copied (e.g. pass it by pointer, rather than by -// value). -// -// # Linux notes -// -// When a file is removed a Remove event won't be emitted until all file -// descriptors are closed, and deletes will always emit a Chmod. For example: -// -// fp := os.Open("file") -// os.Remove("file") // Triggers Chmod -// fp.Close() // Triggers Remove -// -// This is the event that inotify sends, so not much can be changed about this. -// -// The fs.inotify.max_user_watches sysctl variable specifies the upper limit -// for the number of watches per user, and fs.inotify.max_user_instances -// specifies the maximum number of inotify instances per user. Every Watcher you -// create is an "instance", and every path you add is a "watch". -// -// These are also exposed in /proc as /proc/sys/fs/inotify/max_user_watches and -// /proc/sys/fs/inotify/max_user_instances -// -// To increase them you can use sysctl or write the value to the /proc file: -// -// # Default values on Linux 5.18 -// sysctl fs.inotify.max_user_watches=124983 -// sysctl fs.inotify.max_user_instances=128 -// -// To make the changes persist on reboot edit /etc/sysctl.conf or -// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check -// your distro's documentation): -// -// fs.inotify.max_user_watches=124983 -// fs.inotify.max_user_instances=128 -// -// Reaching the limit will result in a "no space left on device" or "too many open -// files" error. -// -// # kqueue notes (macOS, BSD) -// -// kqueue requires opening a file descriptor for every file that's being watched; -// so if you're watching a directory with five files then that's six file -// descriptors. You will run in to your system's "max open files" limit faster on -// these platforms. -// -// The sysctl variables kern.maxfiles and kern.maxfilesperproc can be used to -// control the maximum number of open files, as well as /etc/login.conf on BSD -// systems. -// -// # Windows notes -// -// Paths can be added as "C:\path\to\dir", but forward slashes -// ("C:/path/to/dir") will also work. -// -// When a watched directory is removed it will always send an event for the -// directory itself, but may not send events for all files in that directory. -// Sometimes it will send events for all times, sometimes it will send no -// events, and often only for some files. -// -// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest -// value that is guaranteed to work with SMB filesystems. If you have many -// events in quick succession this may not be enough, and you will have to use -// [WithBufferSize] to increase the value. -type Watcher struct { - // Events sends the filesystem change events. - // - // fsnotify can send the following events; a "path" here can refer to a - // file, directory, symbolic link, or special file like a FIFO. - // - // fsnotify.Create A new path was created; this may be followed by one - // or more Write events if data also gets written to a - // file. - // - // fsnotify.Remove A path was removed. - // - // fsnotify.Rename A path was renamed. A rename is always sent with the - // old path as Event.Name, and a Create event will be - // sent with the new name. Renames are only sent for - // paths that are currently watched; e.g. moving an - // unmonitored file into a monitored directory will - // show up as just a Create. Similarly, renaming a file - // to outside a monitored directory will show up as - // only a Rename. - // - // fsnotify.Write A file or named pipe was written to. A Truncate will - // also trigger a Write. A single "write action" - // initiated by the user may show up as one or multiple - // writes, depending on when the system syncs things to - // disk. For example when compiling a large Go program - // you may get hundreds of Write events, and you may - // want to wait until you've stopped receiving them - // (see the dedup example in cmd/fsnotify). - // - // Some systems may send Write event for directories - // when the directory content changes. - // - // fsnotify.Chmod Attributes were changed. On Linux this is also sent - // when a file is removed (or more accurately, when a - // link to an inode is removed). On kqueue it's sent - // when a file is truncated. On Windows it's never - // sent. +type fen struct { Events chan Event - - // Errors sends any errors. - // - // ErrEventOverflow is used to indicate there are too many events: - // - // - inotify: There are too many queued events (fs.inotify.max_queued_events sysctl) - // - windows: The buffer size is too small; WithBufferSize() can be used to increase it. - // - kqueue, fen: Not used. Errors chan error mu sync.Mutex port *unix.EventPort - done chan struct{} // Channel for sending a "quit message" to the reader goroutine - dirs map[string]struct{} // Explicitly watched directories - watches map[string]struct{} // Explicitly watched non-directories + done chan struct{} // Channel for sending a "quit message" to the reader goroutine + dirs map[string]Op // Explicitly watched directories + watches map[string]Op // Explicitly watched non-directories } -// NewWatcher creates a new Watcher. -func NewWatcher() (*Watcher, error) { - return NewBufferedWatcher(0) +func newBackend(ev chan Event, errs chan error) (backend, error) { + return newBufferedBackend(0, ev, errs) } -// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events -// channel. -// -// The main use case for this is situations with a very large number of events -// where the kernel buffer size can't be increased (e.g. due to lack of -// permissions). An unbuffered Watcher will perform better for almost all use -// cases, and whenever possible you will be better off increasing the kernel -// buffers instead of adding a large userspace buffer. -func NewBufferedWatcher(sz uint) (*Watcher, error) { - w := &Watcher{ - Events: make(chan Event, sz), - Errors: make(chan error), - dirs: make(map[string]struct{}), - watches: make(map[string]struct{}), +func newBufferedBackend(sz uint, ev chan Event, errs chan error) (backend, error) { + w := &fen{ + Events: ev, + Errors: errs, + dirs: make(map[string]Op), + watches: make(map[string]Op), done: make(chan struct{}), } @@ -171,27 +54,30 @@ func NewBufferedWatcher(sz uint) (*Watcher, error) { // sendEvent attempts to send an event to the user, returning true if the event // was put in the channel successfully and false if the watcher has been closed. -func (w *Watcher) sendEvent(name string, op Op) (sent bool) { +func (w *fen) sendEvent(name string, op Op) (sent bool) { select { - case w.Events <- Event{Name: name, Op: op}: - return true case <-w.done: return false + case w.Events <- Event{Name: name, Op: op}: + return true } } // sendError attempts to send an error to the user, returning true if the error // was put in the channel successfully and false if the watcher has been closed. -func (w *Watcher) sendError(err error) (sent bool) { - select { - case w.Errors <- err: +func (w *fen) sendError(err error) (sent bool) { + if err == nil { return true + } + select { case <-w.done: return false + case w.Errors <- err: + return true } } -func (w *Watcher) isClosed() bool { +func (w *fen) isClosed() bool { select { case <-w.done: return true @@ -200,8 +86,7 @@ func (w *Watcher) isClosed() bool { } } -// Close removes all watches and closes the Events channel. -func (w *Watcher) Close() error { +func (w *fen) Close() error { // Take the lock used by associateFile to prevent lingering events from // being processed after the close w.mu.Lock() @@ -213,60 +98,21 @@ func (w *Watcher) Close() error { return w.port.Close() } -// Add starts monitoring the path for changes. -// -// A path can only be watched once; watching it more than once is a no-op and will -// not return an error. Paths that do not yet exist on the filesystem cannot be -// watched. -// -// A watch will be automatically removed if the watched path is deleted or -// renamed. The exception is the Windows backend, which doesn't remove the -// watcher on renames. -// -// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special -// filesystems (/proc, /sys, etc.) generally don't work. -// -// Returns [ErrClosed] if [Watcher.Close] was called. -// -// See [Watcher.AddWith] for a version that allows adding options. -// -// # Watching directories -// -// All files in a directory are monitored, including new files that are created -// after the watcher is started. Subdirectories are not watched (i.e. it's -// non-recursive). -// -// # Watching files -// -// Watching individual files (rather than directories) is generally not -// recommended as many programs (especially editors) update files atomically: it -// will write to a temporary file which is then moved to to destination, -// overwriting the original (or some variant thereof). The watcher on the -// original file is now lost, as that no longer exists. -// -// The upshot of this is that a power failure or crash won't leave a -// half-written file. -// -// Watch the parent directory and use Event.Name to filter out files you're not -// interested in. There is an example of this in cmd/fsnotify/file.go. -func (w *Watcher) Add(name string) error { return w.AddWith(name) } +func (w *fen) Add(name string) error { return w.AddWith(name) } -// AddWith is like [Watcher.Add], but allows adding options. When using Add() -// the defaults described below are used. -// -// Possible options are: -// -// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on -// other platforms. The default is 64K (65536 bytes). -func (w *Watcher) AddWith(name string, opts ...addOpt) error { +func (w *fen) AddWith(name string, opts ...addOpt) error { if w.isClosed() { return ErrClosed } - if w.port.PathIsWatched(name) { - return nil + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s AddWith(%q)\n", + time.Now().Format("15:04:05.000000000"), name) } - _ = getOptions(opts...) + with := getOptions(opts...) + if !w.xSupports(with.op) { + return fmt.Errorf("%w: %s", xErrUnsupported, with.op) + } // Currently we resolve symlinks that were explicitly requested to be // watched. Otherwise we would use LStat here. @@ -283,7 +129,7 @@ func (w *Watcher) AddWith(name string, opts ...addOpt) error { } w.mu.Lock() - w.dirs[name] = struct{}{} + w.dirs[name] = with.op w.mu.Unlock() return nil } @@ -294,26 +140,22 @@ func (w *Watcher) AddWith(name string, opts ...addOpt) error { } w.mu.Lock() - w.watches[name] = struct{}{} + w.watches[name] = with.op w.mu.Unlock() return nil } -// Remove stops monitoring the path for changes. -// -// Directories are always removed non-recursively. For example, if you added -// /tmp/dir and /tmp/dir/subdir then you will need to remove both. -// -// Removing a path that has not yet been added returns [ErrNonExistentWatch]. -// -// Returns nil if [Watcher.Close] was called. -func (w *Watcher) Remove(name string) error { +func (w *fen) Remove(name string) error { if w.isClosed() { return nil } if !w.port.PathIsWatched(name) { return fmt.Errorf("%w: %s", ErrNonExistentWatch, name) } + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s Remove(%q)\n", + time.Now().Format("15:04:05.000000000"), name) + } // The user has expressed an intent. Immediately remove this name from // whichever watch list it might be in. If it's not in there the delete @@ -346,7 +188,7 @@ func (w *Watcher) Remove(name string) error { } // readEvents contains the main loop that runs in a goroutine watching for events. -func (w *Watcher) readEvents() { +func (w *fen) readEvents() { // If this function returns, the watcher has been closed and we can close // these channels defer func() { @@ -382,17 +224,19 @@ func (w *Watcher) readEvents() { continue } + if debug { + internal.Debug(pevent.Path, pevent.Events) + } + err = w.handleEvent(&pevent) - if err != nil { - if !w.sendError(err) { - return - } + if !w.sendError(err) { + return } } } } -func (w *Watcher) handleDirectory(path string, stat os.FileInfo, follow bool, handler func(string, os.FileInfo, bool) error) error { +func (w *fen) handleDirectory(path string, stat os.FileInfo, follow bool, handler func(string, os.FileInfo, bool) error) error { files, err := os.ReadDir(path) if err != nil { return err @@ -418,7 +262,7 @@ func (w *Watcher) handleDirectory(path string, stat os.FileInfo, follow bool, ha // bitmap matches more than one event type (e.g. the file was both modified and // had the attributes changed between when the association was created and the // when event was returned) -func (w *Watcher) handleEvent(event *unix.PortEvent) error { +func (w *fen) handleEvent(event *unix.PortEvent) error { var ( events = event.Events path = event.Path @@ -510,15 +354,9 @@ func (w *Watcher) handleEvent(event *unix.PortEvent) error { } if events&unix.FILE_MODIFIED != 0 { - if fmode.IsDir() { - if watchedDir { - if err := w.updateDirectory(path); err != nil { - return err - } - } else { - if !w.sendEvent(path, Write) { - return nil - } + if fmode.IsDir() && watchedDir { + if err := w.updateDirectory(path); err != nil { + return err } } else { if !w.sendEvent(path, Write) { @@ -543,7 +381,7 @@ func (w *Watcher) handleEvent(event *unix.PortEvent) error { return nil } -func (w *Watcher) updateDirectory(path string) error { +func (w *fen) updateDirectory(path string) error { // The directory was modified, so we must find unwatched entities and watch // them. If something was removed from the directory, nothing will happen, // as everything else should still be watched. @@ -563,10 +401,8 @@ func (w *Watcher) updateDirectory(path string) error { return err } err = w.associateFile(path, finfo, false) - if err != nil { - if !w.sendError(err) { - return nil - } + if !w.sendError(err) { + return nil } if !w.sendEvent(path, Create) { return nil @@ -575,7 +411,7 @@ func (w *Watcher) updateDirectory(path string) error { return nil } -func (w *Watcher) associateFile(path string, stat os.FileInfo, follow bool) error { +func (w *fen) associateFile(path string, stat os.FileInfo, follow bool) error { if w.isClosed() { return ErrClosed } @@ -593,34 +429,34 @@ func (w *Watcher) associateFile(path string, stat os.FileInfo, follow bool) erro // cleared up that discrepancy. The most likely cause is that the event // has fired but we haven't processed it yet. err := w.port.DissociatePath(path) - if err != nil && err != unix.ENOENT { + if err != nil && !errors.Is(err, unix.ENOENT) { return err } } - // FILE_NOFOLLOW means we watch symlinks themselves rather than their - // targets. - events := unix.FILE_MODIFIED | unix.FILE_ATTRIB | unix.FILE_NOFOLLOW - if follow { - // We *DO* follow symlinks for explicitly watched entries. - events = unix.FILE_MODIFIED | unix.FILE_ATTRIB + + var events int + if !follow { + // Watch symlinks themselves rather than their targets unless this entry + // is explicitly watched. + events |= unix.FILE_NOFOLLOW + } + if true { // TODO: implement withOps() + events |= unix.FILE_MODIFIED } - return w.port.AssociatePath(path, stat, - events, - stat.Mode()) + if true { + events |= unix.FILE_ATTRIB + } + return w.port.AssociatePath(path, stat, events, stat.Mode()) } -func (w *Watcher) dissociateFile(path string, stat os.FileInfo, unused bool) error { +func (w *fen) dissociateFile(path string, stat os.FileInfo, unused bool) error { if !w.port.PathIsWatched(path) { return nil } return w.port.DissociatePath(path) } -// WatchList returns all paths explicitly added with [Watcher.Add] (and are not -// yet removed). -// -// Returns nil if [Watcher.Close] was called. -func (w *Watcher) WatchList() []string { +func (w *fen) WatchList() []string { if w.isClosed() { return nil } @@ -638,3 +474,11 @@ func (w *Watcher) WatchList() []string { return entries } + +func (w *fen) xSupports(op Op) bool { + if op.Has(xUnportableOpen) || op.Has(xUnportableRead) || + op.Has(xUnportableCloseWrite) || op.Has(xUnportableCloseRead) { + return false + } + return true +} diff --git a/vendor/github.com/fsnotify/fsnotify/backend_inotify.go b/vendor/github.com/fsnotify/fsnotify/backend_inotify.go index 921c1c1e4..36c311694 100644 --- a/vendor/github.com/fsnotify/fsnotify/backend_inotify.go +++ b/vendor/github.com/fsnotify/fsnotify/backend_inotify.go @@ -1,8 +1,4 @@ //go:build linux && !appengine -// +build linux,!appengine - -// Note: the documentation on the Watcher type and methods is generated from -// mkdoc.zsh package fsnotify @@ -10,127 +6,20 @@ import ( "errors" "fmt" "io" + "io/fs" "os" "path/filepath" "strings" "sync" + "time" "unsafe" + "github.com/fsnotify/fsnotify/internal" "golang.org/x/sys/unix" ) -// Watcher watches a set of paths, delivering events on a channel. -// -// A watcher should not be copied (e.g. pass it by pointer, rather than by -// value). -// -// # Linux notes -// -// When a file is removed a Remove event won't be emitted until all file -// descriptors are closed, and deletes will always emit a Chmod. For example: -// -// fp := os.Open("file") -// os.Remove("file") // Triggers Chmod -// fp.Close() // Triggers Remove -// -// This is the event that inotify sends, so not much can be changed about this. -// -// The fs.inotify.max_user_watches sysctl variable specifies the upper limit -// for the number of watches per user, and fs.inotify.max_user_instances -// specifies the maximum number of inotify instances per user. Every Watcher you -// create is an "instance", and every path you add is a "watch". -// -// These are also exposed in /proc as /proc/sys/fs/inotify/max_user_watches and -// /proc/sys/fs/inotify/max_user_instances -// -// To increase them you can use sysctl or write the value to the /proc file: -// -// # Default values on Linux 5.18 -// sysctl fs.inotify.max_user_watches=124983 -// sysctl fs.inotify.max_user_instances=128 -// -// To make the changes persist on reboot edit /etc/sysctl.conf or -// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check -// your distro's documentation): -// -// fs.inotify.max_user_watches=124983 -// fs.inotify.max_user_instances=128 -// -// Reaching the limit will result in a "no space left on device" or "too many open -// files" error. -// -// # kqueue notes (macOS, BSD) -// -// kqueue requires opening a file descriptor for every file that's being watched; -// so if you're watching a directory with five files then that's six file -// descriptors. You will run in to your system's "max open files" limit faster on -// these platforms. -// -// The sysctl variables kern.maxfiles and kern.maxfilesperproc can be used to -// control the maximum number of open files, as well as /etc/login.conf on BSD -// systems. -// -// # Windows notes -// -// Paths can be added as "C:\path\to\dir", but forward slashes -// ("C:/path/to/dir") will also work. -// -// When a watched directory is removed it will always send an event for the -// directory itself, but may not send events for all files in that directory. -// Sometimes it will send events for all times, sometimes it will send no -// events, and often only for some files. -// -// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest -// value that is guaranteed to work with SMB filesystems. If you have many -// events in quick succession this may not be enough, and you will have to use -// [WithBufferSize] to increase the value. -type Watcher struct { - // Events sends the filesystem change events. - // - // fsnotify can send the following events; a "path" here can refer to a - // file, directory, symbolic link, or special file like a FIFO. - // - // fsnotify.Create A new path was created; this may be followed by one - // or more Write events if data also gets written to a - // file. - // - // fsnotify.Remove A path was removed. - // - // fsnotify.Rename A path was renamed. A rename is always sent with the - // old path as Event.Name, and a Create event will be - // sent with the new name. Renames are only sent for - // paths that are currently watched; e.g. moving an - // unmonitored file into a monitored directory will - // show up as just a Create. Similarly, renaming a file - // to outside a monitored directory will show up as - // only a Rename. - // - // fsnotify.Write A file or named pipe was written to. A Truncate will - // also trigger a Write. A single "write action" - // initiated by the user may show up as one or multiple - // writes, depending on when the system syncs things to - // disk. For example when compiling a large Go program - // you may get hundreds of Write events, and you may - // want to wait until you've stopped receiving them - // (see the dedup example in cmd/fsnotify). - // - // Some systems may send Write event for directories - // when the directory content changes. - // - // fsnotify.Chmod Attributes were changed. On Linux this is also sent - // when a file is removed (or more accurately, when a - // link to an inode is removed). On kqueue it's sent - // when a file is truncated. On Windows it's never - // sent. +type inotify struct { Events chan Event - - // Errors sends any errors. - // - // ErrEventOverflow is used to indicate there are too many events: - // - // - inotify: There are too many queued events (fs.inotify.max_queued_events sysctl) - // - windows: The buffer size is too small; WithBufferSize() can be used to increase it. - // - kqueue, fen: Not used. Errors chan error // Store fd here as os.File.Read() will no longer return on close after @@ -139,8 +28,26 @@ type Watcher struct { inotifyFile *os.File watches *watches done chan struct{} // Channel for sending a "quit message" to the reader goroutine - closeMu sync.Mutex + doneMu sync.Mutex doneResp chan struct{} // Channel to respond to Close + + // Store rename cookies in an array, with the index wrapping to 0. Almost + // all of the time what we get is a MOVED_FROM to set the cookie and the + // next event inotify sends will be MOVED_TO to read it. However, this is + // not guaranteed – as described in inotify(7) – and we may get other events + // between the two MOVED_* events (including other MOVED_* ones). + // + // A second issue is that moving a file outside the watched directory will + // trigger a MOVED_FROM to set the cookie, but we never see the MOVED_TO to + // read and delete it. So just storing it in a map would slowly leak memory. + // + // Doing it like this gives us a simple fast LRU-cache that won't allocate. + // Ten items should be more than enough for our purpose, and a loop over + // such a short array is faster than a map access anyway (not that it hugely + // matters since we're talking about hundreds of ns at the most, but still). + cookies [10]koekje + cookieIndex uint8 + cookiesMu sync.Mutex } type ( @@ -150,9 +57,14 @@ type ( path map[string]uint32 // pathname → wd } watch struct { - wd uint32 // Watch descriptor (as returned by the inotify_add_watch() syscall) - flags uint32 // inotify flags of this watch (see inotify(7) for the list of valid flags) - path string // Watch path. + wd uint32 // Watch descriptor (as returned by the inotify_add_watch() syscall) + flags uint32 // inotify flags of this watch (see inotify(7) for the list of valid flags) + path string // Watch path. + recurse bool // Recursion with ./...? + } + koekje struct { + cookie uint32 + path string } ) @@ -179,23 +91,45 @@ func (w *watches) add(ww *watch) { func (w *watches) remove(wd uint32) { w.mu.Lock() defer w.mu.Unlock() - delete(w.path, w.wd[wd].path) + watch := w.wd[wd] // Could have had Remove() called. See #616. + if watch == nil { + return + } + delete(w.path, watch.path) delete(w.wd, wd) } -func (w *watches) removePath(path string) (uint32, bool) { +func (w *watches) removePath(path string) ([]uint32, error) { w.mu.Lock() defer w.mu.Unlock() + path, recurse := recursivePath(path) wd, ok := w.path[path] if !ok { - return 0, false + return nil, fmt.Errorf("%w: %s", ErrNonExistentWatch, path) + } + + watch := w.wd[wd] + if recurse && !watch.recurse { + return nil, fmt.Errorf("can't use /... with non-recursive watch %q", path) } delete(w.path, path) delete(w.wd, wd) + if !watch.recurse { + return []uint32{wd}, nil + } - return wd, true + wds := make([]uint32, 0, 8) + wds = append(wds, wd) + for p, rwd := range w.path { + if filepath.HasPrefix(p, path) { + delete(w.path, p) + delete(w.wd, rwd) + wds = append(wds, rwd) + } + } + return wds, nil } func (w *watches) byPath(path string) *watch { @@ -236,20 +170,11 @@ func (w *watches) updatePath(path string, f func(*watch) (*watch, error)) error return nil } -// NewWatcher creates a new Watcher. -func NewWatcher() (*Watcher, error) { - return NewBufferedWatcher(0) +func newBackend(ev chan Event, errs chan error) (backend, error) { + return newBufferedBackend(0, ev, errs) } -// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events -// channel. -// -// The main use case for this is situations with a very large number of events -// where the kernel buffer size can't be increased (e.g. due to lack of -// permissions). An unbuffered Watcher will perform better for almost all use -// cases, and whenever possible you will be better off increasing the kernel -// buffers instead of adding a large userspace buffer. -func NewBufferedWatcher(sz uint) (*Watcher, error) { +func newBufferedBackend(sz uint, ev chan Event, errs chan error) (backend, error) { // Need to set nonblocking mode for SetDeadline to work, otherwise blocking // I/O operations won't terminate on close. fd, errno := unix.InotifyInit1(unix.IN_CLOEXEC | unix.IN_NONBLOCK) @@ -257,12 +182,12 @@ func NewBufferedWatcher(sz uint) (*Watcher, error) { return nil, errno } - w := &Watcher{ + w := &inotify{ + Events: ev, + Errors: errs, fd: fd, inotifyFile: os.NewFile(uintptr(fd), ""), watches: newWatches(), - Events: make(chan Event, sz), - Errors: make(chan error), done: make(chan struct{}), doneResp: make(chan struct{}), } @@ -272,26 +197,29 @@ func NewBufferedWatcher(sz uint) (*Watcher, error) { } // Returns true if the event was sent, or false if watcher is closed. -func (w *Watcher) sendEvent(e Event) bool { +func (w *inotify) sendEvent(e Event) bool { select { - case w.Events <- e: - return true case <-w.done: return false + case w.Events <- e: + return true } } // Returns true if the error was sent, or false if watcher is closed. -func (w *Watcher) sendError(err error) bool { - select { - case w.Errors <- err: +func (w *inotify) sendError(err error) bool { + if err == nil { return true + } + select { case <-w.done: return false + case w.Errors <- err: + return true } } -func (w *Watcher) isClosed() bool { +func (w *inotify) isClosed() bool { select { case <-w.done: return true @@ -300,15 +228,14 @@ func (w *Watcher) isClosed() bool { } } -// Close removes all watches and closes the Events channel. -func (w *Watcher) Close() error { - w.closeMu.Lock() +func (w *inotify) Close() error { + w.doneMu.Lock() if w.isClosed() { - w.closeMu.Unlock() + w.doneMu.Unlock() return nil } close(w.done) - w.closeMu.Unlock() + w.doneMu.Unlock() // Causes any blocking reads to return with an error, provided the file // still supports deadline operations. @@ -323,78 +250,104 @@ func (w *Watcher) Close() error { return nil } -// Add starts monitoring the path for changes. -// -// A path can only be watched once; watching it more than once is a no-op and will -// not return an error. Paths that do not yet exist on the filesystem cannot be -// watched. -// -// A watch will be automatically removed if the watched path is deleted or -// renamed. The exception is the Windows backend, which doesn't remove the -// watcher on renames. -// -// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special -// filesystems (/proc, /sys, etc.) generally don't work. -// -// Returns [ErrClosed] if [Watcher.Close] was called. -// -// See [Watcher.AddWith] for a version that allows adding options. -// -// # Watching directories -// -// All files in a directory are monitored, including new files that are created -// after the watcher is started. Subdirectories are not watched (i.e. it's -// non-recursive). -// -// # Watching files -// -// Watching individual files (rather than directories) is generally not -// recommended as many programs (especially editors) update files atomically: it -// will write to a temporary file which is then moved to to destination, -// overwriting the original (or some variant thereof). The watcher on the -// original file is now lost, as that no longer exists. -// -// The upshot of this is that a power failure or crash won't leave a -// half-written file. -// -// Watch the parent directory and use Event.Name to filter out files you're not -// interested in. There is an example of this in cmd/fsnotify/file.go. -func (w *Watcher) Add(name string) error { return w.AddWith(name) } - -// AddWith is like [Watcher.Add], but allows adding options. When using Add() -// the defaults described below are used. -// -// Possible options are: -// -// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on -// other platforms. The default is 64K (65536 bytes). -func (w *Watcher) AddWith(name string, opts ...addOpt) error { +func (w *inotify) Add(name string) error { return w.AddWith(name) } + +func (w *inotify) AddWith(path string, opts ...addOpt) error { if w.isClosed() { return ErrClosed } + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s AddWith(%q)\n", + time.Now().Format("15:04:05.000000000"), path) + } + + with := getOptions(opts...) + if !w.xSupports(with.op) { + return fmt.Errorf("%w: %s", xErrUnsupported, with.op) + } - name = filepath.Clean(name) - _ = getOptions(opts...) + path, recurse := recursivePath(path) + if recurse { + return filepath.WalkDir(path, func(root string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if !d.IsDir() { + if root == path { + return fmt.Errorf("fsnotify: not a directory: %q", path) + } + return nil + } - var flags uint32 = unix.IN_MOVED_TO | unix.IN_MOVED_FROM | - unix.IN_CREATE | unix.IN_ATTRIB | unix.IN_MODIFY | - unix.IN_MOVE_SELF | unix.IN_DELETE | unix.IN_DELETE_SELF + // Send a Create event when adding new directory from a recursive + // watch; this is for "mkdir -p one/two/three". Usually all those + // directories will be created before we can set up watchers on the + // subdirectories, so only "one" would be sent as a Create event and + // not "one/two" and "one/two/three" (inotifywait -r has the same + // problem). + if with.sendCreate && root != path { + w.sendEvent(Event{Name: root, Op: Create}) + } + + return w.add(root, with, true) + }) + } - return w.watches.updatePath(name, func(existing *watch) (*watch, error) { + return w.add(path, with, false) +} + +func (w *inotify) add(path string, with withOpts, recurse bool) error { + var flags uint32 + if with.noFollow { + flags |= unix.IN_DONT_FOLLOW + } + if with.op.Has(Create) { + flags |= unix.IN_CREATE + } + if with.op.Has(Write) { + flags |= unix.IN_MODIFY + } + if with.op.Has(Remove) { + flags |= unix.IN_DELETE | unix.IN_DELETE_SELF + } + if with.op.Has(Rename) { + flags |= unix.IN_MOVED_TO | unix.IN_MOVED_FROM | unix.IN_MOVE_SELF + } + if with.op.Has(Chmod) { + flags |= unix.IN_ATTRIB + } + if with.op.Has(xUnportableOpen) { + flags |= unix.IN_OPEN + } + if with.op.Has(xUnportableRead) { + flags |= unix.IN_ACCESS + } + if with.op.Has(xUnportableCloseWrite) { + flags |= unix.IN_CLOSE_WRITE + } + if with.op.Has(xUnportableCloseRead) { + flags |= unix.IN_CLOSE_NOWRITE + } + return w.register(path, flags, recurse) +} + +func (w *inotify) register(path string, flags uint32, recurse bool) error { + return w.watches.updatePath(path, func(existing *watch) (*watch, error) { if existing != nil { flags |= existing.flags | unix.IN_MASK_ADD } - wd, err := unix.InotifyAddWatch(w.fd, name, flags) + wd, err := unix.InotifyAddWatch(w.fd, path, flags) if wd == -1 { return nil, err } if existing == nil { return &watch{ - wd: uint32(wd), - path: name, - flags: flags, + wd: uint32(wd), + path: path, + flags: flags, + recurse: recurse, }, nil } @@ -404,49 +357,44 @@ func (w *Watcher) AddWith(name string, opts ...addOpt) error { }) } -// Remove stops monitoring the path for changes. -// -// Directories are always removed non-recursively. For example, if you added -// /tmp/dir and /tmp/dir/subdir then you will need to remove both. -// -// Removing a path that has not yet been added returns [ErrNonExistentWatch]. -// -// Returns nil if [Watcher.Close] was called. -func (w *Watcher) Remove(name string) error { +func (w *inotify) Remove(name string) error { if w.isClosed() { return nil } + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s Remove(%q)\n", + time.Now().Format("15:04:05.000000000"), name) + } return w.remove(filepath.Clean(name)) } -func (w *Watcher) remove(name string) error { - wd, ok := w.watches.removePath(name) - if !ok { - return fmt.Errorf("%w: %s", ErrNonExistentWatch, name) - } - - success, errno := unix.InotifyRmWatch(w.fd, wd) - if success == -1 { - // TODO: Perhaps it's not helpful to return an error here in every case; - // The only two possible errors are: - // - // - EBADF, which happens when w.fd is not a valid file descriptor - // of any kind. - // - EINVAL, which is when fd is not an inotify descriptor or wd - // is not a valid watch descriptor. Watch descriptors are - // invalidated when they are removed explicitly or implicitly; - // explicitly by inotify_rm_watch, implicitly when the file they - // are watching is deleted. - return errno +func (w *inotify) remove(name string) error { + wds, err := w.watches.removePath(name) + if err != nil { + return err + } + + for _, wd := range wds { + _, err := unix.InotifyRmWatch(w.fd, wd) + if err != nil { + // TODO: Perhaps it's not helpful to return an error here in every + // case; the only two possible errors are: + // + // EBADF, which happens when w.fd is not a valid file descriptor of + // any kind. + // + // EINVAL, which is when fd is not an inotify descriptor or wd is + // not a valid watch descriptor. Watch descriptors are invalidated + // when they are removed explicitly or implicitly; explicitly by + // inotify_rm_watch, implicitly when the file they are watching is + // deleted. + return err + } } return nil } -// WatchList returns all paths explicitly added with [Watcher.Add] (and are not -// yet removed). -// -// Returns nil if [Watcher.Close] was called. -func (w *Watcher) WatchList() []string { +func (w *inotify) WatchList() []string { if w.isClosed() { return nil } @@ -463,7 +411,7 @@ func (w *Watcher) WatchList() []string { // readEvents reads from the inotify file descriptor, converts the // received events into Event objects and sends them via the Events channel -func (w *Watcher) readEvents() { +func (w *inotify) readEvents() { defer func() { close(w.doneResp) close(w.Errors) @@ -506,15 +454,17 @@ func (w *Watcher) readEvents() { continue } - var offset uint32 // We don't know how many events we just read into the buffer // While the offset points to at least one whole event... + var offset uint32 for offset <= uint32(n-unix.SizeofInotifyEvent) { var ( // Point "raw" to the event in the buffer raw = (*unix.InotifyEvent)(unsafe.Pointer(&buf[offset])) mask = uint32(raw.Mask) nameLen = uint32(raw.Len) + // Move to the next event in the buffer + next = func() { offset += unix.SizeofInotifyEvent + nameLen } ) if mask&unix.IN_Q_OVERFLOW != 0 { @@ -523,21 +473,53 @@ func (w *Watcher) readEvents() { } } - // If the event happened to the watched directory or the watched file, the kernel - // doesn't append the filename to the event, but we would like to always fill the - // the "Name" field with a valid filename. We retrieve the path of the watch from - // the "paths" map. + /// If the event happened to the watched directory or the watched + /// file, the kernel doesn't append the filename to the event, but + /// we would like to always fill the the "Name" field with a valid + /// filename. We retrieve the path of the watch from the "paths" + /// map. watch := w.watches.byWd(uint32(raw.Wd)) + /// Can be nil if Remove() was called in another goroutine for this + /// path inbetween reading the events from the kernel and reading + /// the internal state. Not much we can do about it, so just skip. + /// See #616. + if watch == nil { + next() + continue + } + + name := watch.path + if nameLen > 0 { + /// Point "bytes" at the first byte of the filename + bytes := (*[unix.PathMax]byte)(unsafe.Pointer(&buf[offset+unix.SizeofInotifyEvent]))[:nameLen:nameLen] + /// The filename is padded with NULL bytes. TrimRight() gets rid of those. + name += "/" + strings.TrimRight(string(bytes[0:nameLen]), "\000") + } + + if debug { + internal.Debug(name, raw.Mask, raw.Cookie) + } + + if mask&unix.IN_IGNORED != 0 { //&& event.Op != 0 + next() + continue + } // inotify will automatically remove the watch on deletes; just need // to clean our state here. - if watch != nil && mask&unix.IN_DELETE_SELF == unix.IN_DELETE_SELF { + if mask&unix.IN_DELETE_SELF == unix.IN_DELETE_SELF { w.watches.remove(watch.wd) } + // We can't really update the state when a watched path is moved; // only IN_MOVE_SELF is sent and not IN_MOVED_{FROM,TO}. So remove // the watch. - if watch != nil && mask&unix.IN_MOVE_SELF == unix.IN_MOVE_SELF { + if mask&unix.IN_MOVE_SELF == unix.IN_MOVE_SELF { + if watch.recurse { + next() // Do nothing + continue + } + err := w.remove(watch.path) if err != nil && !errors.Is(err, ErrNonExistentWatch) { if !w.sendError(err) { @@ -546,34 +528,69 @@ func (w *Watcher) readEvents() { } } - var name string - if watch != nil { - name = watch.path - } - if nameLen > 0 { - // Point "bytes" at the first byte of the filename - bytes := (*[unix.PathMax]byte)(unsafe.Pointer(&buf[offset+unix.SizeofInotifyEvent]))[:nameLen:nameLen] - // The filename is padded with NULL bytes. TrimRight() gets rid of those. - name += "/" + strings.TrimRight(string(bytes[0:nameLen]), "\000") + /// Skip if we're watching both this path and the parent; the parent + /// will already send a delete so no need to do it twice. + if mask&unix.IN_DELETE_SELF != 0 { + if _, ok := w.watches.path[filepath.Dir(watch.path)]; ok { + next() + continue + } } - event := w.newEvent(name, mask) + ev := w.newEvent(name, mask, raw.Cookie) + // Need to update watch path for recurse. + if watch.recurse { + isDir := mask&unix.IN_ISDIR == unix.IN_ISDIR + /// New directory created: set up watch on it. + if isDir && ev.Has(Create) { + err := w.register(ev.Name, watch.flags, true) + if !w.sendError(err) { + return + } - // Send the events that are not ignored on the events channel - if mask&unix.IN_IGNORED == 0 { - if !w.sendEvent(event) { - return + // This was a directory rename, so we need to update all + // the children. + // + // TODO: this is of course pretty slow; we should use a + // better data structure for storing all of this, e.g. store + // children in the watch. I have some code for this in my + // kqueue refactor we can use in the future. For now I'm + // okay with this as it's not publicly available. + // Correctness first, performance second. + if ev.renamedFrom != "" { + w.watches.mu.Lock() + for k, ww := range w.watches.wd { + if k == watch.wd || ww.path == ev.Name { + continue + } + if strings.HasPrefix(ww.path, ev.renamedFrom) { + ww.path = strings.Replace(ww.path, ev.renamedFrom, ev.Name, 1) + w.watches.wd[k] = ww + } + } + w.watches.mu.Unlock() + } } } - // Move to the next event in the buffer - offset += unix.SizeofInotifyEvent + nameLen + /// Send the events that are not ignored on the events channel + if !w.sendEvent(ev) { + return + } + next() } } } -// newEvent returns an platform-independent Event based on an inotify mask. -func (w *Watcher) newEvent(name string, mask uint32) Event { +func (w *inotify) isRecursive(path string) bool { + ww := w.watches.byPath(path) + if ww == nil { // path could be a file, so also check the Dir. + ww = w.watches.byPath(filepath.Dir(path)) + } + return ww != nil && ww.recurse +} + +func (w *inotify) newEvent(name string, mask, cookie uint32) Event { e := Event{Name: name} if mask&unix.IN_CREATE == unix.IN_CREATE || mask&unix.IN_MOVED_TO == unix.IN_MOVED_TO { e.Op |= Create @@ -584,11 +601,58 @@ func (w *Watcher) newEvent(name string, mask uint32) Event { if mask&unix.IN_MODIFY == unix.IN_MODIFY { e.Op |= Write } + if mask&unix.IN_OPEN == unix.IN_OPEN { + e.Op |= xUnportableOpen + } + if mask&unix.IN_ACCESS == unix.IN_ACCESS { + e.Op |= xUnportableRead + } + if mask&unix.IN_CLOSE_WRITE == unix.IN_CLOSE_WRITE { + e.Op |= xUnportableCloseWrite + } + if mask&unix.IN_CLOSE_NOWRITE == unix.IN_CLOSE_NOWRITE { + e.Op |= xUnportableCloseRead + } if mask&unix.IN_MOVE_SELF == unix.IN_MOVE_SELF || mask&unix.IN_MOVED_FROM == unix.IN_MOVED_FROM { e.Op |= Rename } if mask&unix.IN_ATTRIB == unix.IN_ATTRIB { e.Op |= Chmod } + + if cookie != 0 { + if mask&unix.IN_MOVED_FROM == unix.IN_MOVED_FROM { + w.cookiesMu.Lock() + w.cookies[w.cookieIndex] = koekje{cookie: cookie, path: e.Name} + w.cookieIndex++ + if w.cookieIndex > 9 { + w.cookieIndex = 0 + } + w.cookiesMu.Unlock() + } else if mask&unix.IN_MOVED_TO == unix.IN_MOVED_TO { + w.cookiesMu.Lock() + var prev string + for _, c := range w.cookies { + if c.cookie == cookie { + prev = c.path + break + } + } + w.cookiesMu.Unlock() + e.renamedFrom = prev + } + } return e } + +func (w *inotify) xSupports(op Op) bool { + return true // Supports everything. +} + +func (w *inotify) state() { + w.watches.mu.Lock() + defer w.watches.mu.Unlock() + for wd, ww := range w.watches.wd { + fmt.Fprintf(os.Stderr, "%4d: recurse=%t %q\n", wd, ww.recurse, ww.path) + } +} diff --git a/vendor/github.com/fsnotify/fsnotify/backend_kqueue.go b/vendor/github.com/fsnotify/fsnotify/backend_kqueue.go index 063a0915a..d8de5ab76 100644 --- a/vendor/github.com/fsnotify/fsnotify/backend_kqueue.go +++ b/vendor/github.com/fsnotify/fsnotify/backend_kqueue.go @@ -1,8 +1,4 @@ //go:build freebsd || openbsd || netbsd || dragonfly || darwin -// +build freebsd openbsd netbsd dragonfly darwin - -// Note: the documentation on the Watcher type and methods is generated from -// mkdoc.zsh package fsnotify @@ -11,174 +7,195 @@ import ( "fmt" "os" "path/filepath" + "runtime" "sync" + "time" + "github.com/fsnotify/fsnotify/internal" "golang.org/x/sys/unix" ) -// Watcher watches a set of paths, delivering events on a channel. -// -// A watcher should not be copied (e.g. pass it by pointer, rather than by -// value). -// -// # Linux notes -// -// When a file is removed a Remove event won't be emitted until all file -// descriptors are closed, and deletes will always emit a Chmod. For example: -// -// fp := os.Open("file") -// os.Remove("file") // Triggers Chmod -// fp.Close() // Triggers Remove -// -// This is the event that inotify sends, so not much can be changed about this. -// -// The fs.inotify.max_user_watches sysctl variable specifies the upper limit -// for the number of watches per user, and fs.inotify.max_user_instances -// specifies the maximum number of inotify instances per user. Every Watcher you -// create is an "instance", and every path you add is a "watch". -// -// These are also exposed in /proc as /proc/sys/fs/inotify/max_user_watches and -// /proc/sys/fs/inotify/max_user_instances -// -// To increase them you can use sysctl or write the value to the /proc file: -// -// # Default values on Linux 5.18 -// sysctl fs.inotify.max_user_watches=124983 -// sysctl fs.inotify.max_user_instances=128 -// -// To make the changes persist on reboot edit /etc/sysctl.conf or -// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check -// your distro's documentation): -// -// fs.inotify.max_user_watches=124983 -// fs.inotify.max_user_instances=128 -// -// Reaching the limit will result in a "no space left on device" or "too many open -// files" error. -// -// # kqueue notes (macOS, BSD) -// -// kqueue requires opening a file descriptor for every file that's being watched; -// so if you're watching a directory with five files then that's six file -// descriptors. You will run in to your system's "max open files" limit faster on -// these platforms. -// -// The sysctl variables kern.maxfiles and kern.maxfilesperproc can be used to -// control the maximum number of open files, as well as /etc/login.conf on BSD -// systems. -// -// # Windows notes -// -// Paths can be added as "C:\path\to\dir", but forward slashes -// ("C:/path/to/dir") will also work. -// -// When a watched directory is removed it will always send an event for the -// directory itself, but may not send events for all files in that directory. -// Sometimes it will send events for all times, sometimes it will send no -// events, and often only for some files. -// -// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest -// value that is guaranteed to work with SMB filesystems. If you have many -// events in quick succession this may not be enough, and you will have to use -// [WithBufferSize] to increase the value. -type Watcher struct { - // Events sends the filesystem change events. - // - // fsnotify can send the following events; a "path" here can refer to a - // file, directory, symbolic link, or special file like a FIFO. - // - // fsnotify.Create A new path was created; this may be followed by one - // or more Write events if data also gets written to a - // file. - // - // fsnotify.Remove A path was removed. - // - // fsnotify.Rename A path was renamed. A rename is always sent with the - // old path as Event.Name, and a Create event will be - // sent with the new name. Renames are only sent for - // paths that are currently watched; e.g. moving an - // unmonitored file into a monitored directory will - // show up as just a Create. Similarly, renaming a file - // to outside a monitored directory will show up as - // only a Rename. - // - // fsnotify.Write A file or named pipe was written to. A Truncate will - // also trigger a Write. A single "write action" - // initiated by the user may show up as one or multiple - // writes, depending on when the system syncs things to - // disk. For example when compiling a large Go program - // you may get hundreds of Write events, and you may - // want to wait until you've stopped receiving them - // (see the dedup example in cmd/fsnotify). - // - // Some systems may send Write event for directories - // when the directory content changes. - // - // fsnotify.Chmod Attributes were changed. On Linux this is also sent - // when a file is removed (or more accurately, when a - // link to an inode is removed). On kqueue it's sent - // when a file is truncated. On Windows it's never - // sent. +type kqueue struct { Events chan Event - - // Errors sends any errors. - // - // ErrEventOverflow is used to indicate there are too many events: - // - // - inotify: There are too many queued events (fs.inotify.max_queued_events sysctl) - // - windows: The buffer size is too small; WithBufferSize() can be used to increase it. - // - kqueue, fen: Not used. Errors chan error - done chan struct{} - kq int // File descriptor (as returned by the kqueue() syscall). - closepipe [2]int // Pipe used for closing. - mu sync.Mutex // Protects access to watcher data - watches map[string]int // Watched file descriptors (key: path). - watchesByDir map[string]map[int]struct{} // Watched file descriptors indexed by the parent directory (key: dirname(path)). - userWatches map[string]struct{} // Watches added with Watcher.Add() - dirFlags map[string]uint32 // Watched directories to fflags used in kqueue. - paths map[int]pathInfo // File descriptors to path names for processing kqueue events. - fileExists map[string]struct{} // Keep track of if we know this file exists (to stop duplicate create events). - isClosed bool // Set to true when Close() is first called + kq int // File descriptor (as returned by the kqueue() syscall). + closepipe [2]int // Pipe used for closing kq. + watches *watches + done chan struct{} + doneMu sync.Mutex } -type pathInfo struct { - name string - isDir bool +type ( + watches struct { + mu sync.RWMutex + wd map[int]watch // wd → watch + path map[string]int // pathname → wd + byDir map[string]map[int]struct{} // dirname(path) → wd + seen map[string]struct{} // Keep track of if we know this file exists. + byUser map[string]struct{} // Watches added with Watcher.Add() + } + watch struct { + wd int + name string + linkName string // In case of links; name is the target, and this is the link. + isDir bool + dirFlags uint32 + } +) + +func newWatches() *watches { + return &watches{ + wd: make(map[int]watch), + path: make(map[string]int), + byDir: make(map[string]map[int]struct{}), + seen: make(map[string]struct{}), + byUser: make(map[string]struct{}), + } } -// NewWatcher creates a new Watcher. -func NewWatcher() (*Watcher, error) { - return NewBufferedWatcher(0) +func (w *watches) listPaths(userOnly bool) []string { + w.mu.RLock() + defer w.mu.RUnlock() + + if userOnly { + l := make([]string, 0, len(w.byUser)) + for p := range w.byUser { + l = append(l, p) + } + return l + } + + l := make([]string, 0, len(w.path)) + for p := range w.path { + l = append(l, p) + } + return l } -// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events -// channel. -// -// The main use case for this is situations with a very large number of events -// where the kernel buffer size can't be increased (e.g. due to lack of -// permissions). An unbuffered Watcher will perform better for almost all use -// cases, and whenever possible you will be better off increasing the kernel -// buffers instead of adding a large userspace buffer. -func NewBufferedWatcher(sz uint) (*Watcher, error) { +func (w *watches) watchesInDir(path string) []string { + w.mu.RLock() + defer w.mu.RUnlock() + + l := make([]string, 0, 4) + for fd := range w.byDir[path] { + info := w.wd[fd] + if _, ok := w.byUser[info.name]; !ok { + l = append(l, info.name) + } + } + return l +} + +// Mark path as added by the user. +func (w *watches) addUserWatch(path string) { + w.mu.Lock() + defer w.mu.Unlock() + w.byUser[path] = struct{}{} +} + +func (w *watches) addLink(path string, fd int) { + w.mu.Lock() + defer w.mu.Unlock() + + w.path[path] = fd + w.seen[path] = struct{}{} +} + +func (w *watches) add(path, linkPath string, fd int, isDir bool) { + w.mu.Lock() + defer w.mu.Unlock() + + w.path[path] = fd + w.wd[fd] = watch{wd: fd, name: path, linkName: linkPath, isDir: isDir} + + parent := filepath.Dir(path) + byDir, ok := w.byDir[parent] + if !ok { + byDir = make(map[int]struct{}, 1) + w.byDir[parent] = byDir + } + byDir[fd] = struct{}{} +} + +func (w *watches) byWd(fd int) (watch, bool) { + w.mu.RLock() + defer w.mu.RUnlock() + info, ok := w.wd[fd] + return info, ok +} + +func (w *watches) byPath(path string) (watch, bool) { + w.mu.RLock() + defer w.mu.RUnlock() + info, ok := w.wd[w.path[path]] + return info, ok +} + +func (w *watches) updateDirFlags(path string, flags uint32) { + w.mu.Lock() + defer w.mu.Unlock() + + fd := w.path[path] + info := w.wd[fd] + info.dirFlags = flags + w.wd[fd] = info +} + +func (w *watches) remove(fd int, path string) bool { + w.mu.Lock() + defer w.mu.Unlock() + + isDir := w.wd[fd].isDir + delete(w.path, path) + delete(w.byUser, path) + + parent := filepath.Dir(path) + delete(w.byDir[parent], fd) + + if len(w.byDir[parent]) == 0 { + delete(w.byDir, parent) + } + + delete(w.wd, fd) + delete(w.seen, path) + return isDir +} + +func (w *watches) markSeen(path string, exists bool) { + w.mu.Lock() + defer w.mu.Unlock() + if exists { + w.seen[path] = struct{}{} + } else { + delete(w.seen, path) + } +} + +func (w *watches) seenBefore(path string) bool { + w.mu.RLock() + defer w.mu.RUnlock() + _, ok := w.seen[path] + return ok +} + +func newBackend(ev chan Event, errs chan error) (backend, error) { + return newBufferedBackend(0, ev, errs) +} + +func newBufferedBackend(sz uint, ev chan Event, errs chan error) (backend, error) { kq, closepipe, err := newKqueue() if err != nil { return nil, err } - w := &Watcher{ - kq: kq, - closepipe: closepipe, - watches: make(map[string]int), - watchesByDir: make(map[string]map[int]struct{}), - dirFlags: make(map[string]uint32), - paths: make(map[int]pathInfo), - fileExists: make(map[string]struct{}), - userWatches: make(map[string]struct{}), - Events: make(chan Event, sz), - Errors: make(chan error), - done: make(chan struct{}), + w := &kqueue{ + Events: ev, + Errors: errs, + kq: kq, + closepipe: closepipe, + done: make(chan struct{}), + watches: newWatches(), } go w.readEvents() @@ -203,6 +220,8 @@ func newKqueue() (kq int, closepipe [2]int, err error) { unix.Close(kq) return kq, closepipe, err } + unix.CloseOnExec(closepipe[0]) + unix.CloseOnExec(closepipe[1]) // Register changes to listen on the closepipe. changes := make([]unix.Kevent_t, 1) @@ -221,166 +240,108 @@ func newKqueue() (kq int, closepipe [2]int, err error) { } // Returns true if the event was sent, or false if watcher is closed. -func (w *Watcher) sendEvent(e Event) bool { +func (w *kqueue) sendEvent(e Event) bool { select { - case w.Events <- e: - return true case <-w.done: return false + case w.Events <- e: + return true } } // Returns true if the error was sent, or false if watcher is closed. -func (w *Watcher) sendError(err error) bool { +func (w *kqueue) sendError(err error) bool { + if err == nil { + return true + } select { + case <-w.done: + return false case w.Errors <- err: return true + } +} + +func (w *kqueue) isClosed() bool { + select { case <-w.done: + return true + default: return false } } -// Close removes all watches and closes the Events channel. -func (w *Watcher) Close() error { - w.mu.Lock() - if w.isClosed { - w.mu.Unlock() +func (w *kqueue) Close() error { + w.doneMu.Lock() + if w.isClosed() { + w.doneMu.Unlock() return nil } - w.isClosed = true + close(w.done) + w.doneMu.Unlock() - // copy paths to remove while locked - pathsToRemove := make([]string, 0, len(w.watches)) - for name := range w.watches { - pathsToRemove = append(pathsToRemove, name) - } - w.mu.Unlock() // Unlock before calling Remove, which also locks + pathsToRemove := w.watches.listPaths(false) for _, name := range pathsToRemove { w.Remove(name) } // Send "quit" message to the reader goroutine. unix.Close(w.closepipe[1]) - close(w.done) - return nil } -// Add starts monitoring the path for changes. -// -// A path can only be watched once; watching it more than once is a no-op and will -// not return an error. Paths that do not yet exist on the filesystem cannot be -// watched. -// -// A watch will be automatically removed if the watched path is deleted or -// renamed. The exception is the Windows backend, which doesn't remove the -// watcher on renames. -// -// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special -// filesystems (/proc, /sys, etc.) generally don't work. -// -// Returns [ErrClosed] if [Watcher.Close] was called. -// -// See [Watcher.AddWith] for a version that allows adding options. -// -// # Watching directories -// -// All files in a directory are monitored, including new files that are created -// after the watcher is started. Subdirectories are not watched (i.e. it's -// non-recursive). -// -// # Watching files -// -// Watching individual files (rather than directories) is generally not -// recommended as many programs (especially editors) update files atomically: it -// will write to a temporary file which is then moved to to destination, -// overwriting the original (or some variant thereof). The watcher on the -// original file is now lost, as that no longer exists. -// -// The upshot of this is that a power failure or crash won't leave a -// half-written file. -// -// Watch the parent directory and use Event.Name to filter out files you're not -// interested in. There is an example of this in cmd/fsnotify/file.go. -func (w *Watcher) Add(name string) error { return w.AddWith(name) } +func (w *kqueue) Add(name string) error { return w.AddWith(name) } -// AddWith is like [Watcher.Add], but allows adding options. When using Add() -// the defaults described below are used. -// -// Possible options are: -// -// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on -// other platforms. The default is 64K (65536 bytes). -func (w *Watcher) AddWith(name string, opts ...addOpt) error { - _ = getOptions(opts...) +func (w *kqueue) AddWith(name string, opts ...addOpt) error { + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s AddWith(%q)\n", + time.Now().Format("15:04:05.000000000"), name) + } + + with := getOptions(opts...) + if !w.xSupports(with.op) { + return fmt.Errorf("%w: %s", xErrUnsupported, with.op) + } - w.mu.Lock() - w.userWatches[name] = struct{}{} - w.mu.Unlock() _, err := w.addWatch(name, noteAllEvents) - return err + if err != nil { + return err + } + w.watches.addUserWatch(name) + return nil } -// Remove stops monitoring the path for changes. -// -// Directories are always removed non-recursively. For example, if you added -// /tmp/dir and /tmp/dir/subdir then you will need to remove both. -// -// Removing a path that has not yet been added returns [ErrNonExistentWatch]. -// -// Returns nil if [Watcher.Close] was called. -func (w *Watcher) Remove(name string) error { +func (w *kqueue) Remove(name string) error { + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s Remove(%q)\n", + time.Now().Format("15:04:05.000000000"), name) + } return w.remove(name, true) } -func (w *Watcher) remove(name string, unwatchFiles bool) error { - name = filepath.Clean(name) - w.mu.Lock() - if w.isClosed { - w.mu.Unlock() +func (w *kqueue) remove(name string, unwatchFiles bool) error { + if w.isClosed() { return nil } - watchfd, ok := w.watches[name] - w.mu.Unlock() + + name = filepath.Clean(name) + info, ok := w.watches.byPath(name) if !ok { return fmt.Errorf("%w: %s", ErrNonExistentWatch, name) } - err := w.register([]int{watchfd}, unix.EV_DELETE, 0) + err := w.register([]int{info.wd}, unix.EV_DELETE, 0) if err != nil { return err } - unix.Close(watchfd) - - w.mu.Lock() - isDir := w.paths[watchfd].isDir - delete(w.watches, name) - delete(w.userWatches, name) - - parentName := filepath.Dir(name) - delete(w.watchesByDir[parentName], watchfd) - - if len(w.watchesByDir[parentName]) == 0 { - delete(w.watchesByDir, parentName) - } + unix.Close(info.wd) - delete(w.paths, watchfd) - delete(w.dirFlags, name) - delete(w.fileExists, name) - w.mu.Unlock() + isDir := w.watches.remove(info.wd, name) // Find all watched paths that are in this directory that are not external. if unwatchFiles && isDir { - var pathsToRemove []string - w.mu.Lock() - for fd := range w.watchesByDir[name] { - path := w.paths[fd] - if _, ok := w.userWatches[path.name]; !ok { - pathsToRemove = append(pathsToRemove, path.name) - } - } - w.mu.Unlock() + pathsToRemove := w.watches.watchesInDir(name) for _, name := range pathsToRemove { // Since these are internal, not much sense in propagating error to // the user, as that will just confuse them with an error about a @@ -391,23 +352,11 @@ func (w *Watcher) remove(name string, unwatchFiles bool) error { return nil } -// WatchList returns all paths explicitly added with [Watcher.Add] (and are not -// yet removed). -// -// Returns nil if [Watcher.Close] was called. -func (w *Watcher) WatchList() []string { - w.mu.Lock() - defer w.mu.Unlock() - if w.isClosed { +func (w *kqueue) WatchList() []string { + if w.isClosed() { return nil } - - entries := make([]string, 0, len(w.userWatches)) - for pathname := range w.userWatches { - entries = append(entries, pathname) - } - - return entries + return w.watches.listPaths(true) } // Watch all events (except NOTE_EXTEND, NOTE_LINK, NOTE_REVOKE) @@ -417,34 +366,26 @@ const noteAllEvents = unix.NOTE_DELETE | unix.NOTE_WRITE | unix.NOTE_ATTRIB | un // described in kevent(2). // // Returns the real path to the file which was added, with symlinks resolved. -func (w *Watcher) addWatch(name string, flags uint32) (string, error) { - var isDir bool - name = filepath.Clean(name) - - w.mu.Lock() - if w.isClosed { - w.mu.Unlock() +func (w *kqueue) addWatch(name string, flags uint32) (string, error) { + if w.isClosed() { return "", ErrClosed } - watchfd, alreadyWatching := w.watches[name] - // We already have a watch, but we can still override flags. - if alreadyWatching { - isDir = w.paths[watchfd].isDir - } - w.mu.Unlock() + name = filepath.Clean(name) + + info, alreadyWatching := w.watches.byPath(name) if !alreadyWatching { fi, err := os.Lstat(name) if err != nil { return "", err } - // Don't watch sockets or named pipes + // Don't watch sockets or named pipes. if (fi.Mode()&os.ModeSocket == os.ModeSocket) || (fi.Mode()&os.ModeNamedPipe == os.ModeNamedPipe) { return "", nil } - // Follow Symlinks. + // Follow symlinks. if fi.Mode()&os.ModeSymlink == os.ModeSymlink { link, err := os.Readlink(name) if err != nil { @@ -455,18 +396,15 @@ func (w *Watcher) addWatch(name string, flags uint32) (string, error) { return "", nil } - w.mu.Lock() - _, alreadyWatching = w.watches[link] - w.mu.Unlock() - + _, alreadyWatching = w.watches.byPath(link) if alreadyWatching { // Add to watches so we don't get spurious Create events later // on when we diff the directories. - w.watches[name] = 0 - w.fileExists[name] = struct{}{} + w.watches.addLink(name, 0) return link, nil } + info.linkName = name name = link fi, err = os.Lstat(name) if err != nil { @@ -477,7 +415,7 @@ func (w *Watcher) addWatch(name string, flags uint32) (string, error) { // Retry on EINTR; open() can return EINTR in practice on macOS. // See #354, and Go issues 11180 and 39237. for { - watchfd, err = unix.Open(name, openMode, 0) + info.wd, err = unix.Open(name, openMode, 0) if err == nil { break } @@ -488,40 +426,25 @@ func (w *Watcher) addWatch(name string, flags uint32) (string, error) { return "", err } - isDir = fi.IsDir() + info.isDir = fi.IsDir() } - err := w.register([]int{watchfd}, unix.EV_ADD|unix.EV_CLEAR|unix.EV_ENABLE, flags) + err := w.register([]int{info.wd}, unix.EV_ADD|unix.EV_CLEAR|unix.EV_ENABLE, flags) if err != nil { - unix.Close(watchfd) + unix.Close(info.wd) return "", err } if !alreadyWatching { - w.mu.Lock() - parentName := filepath.Dir(name) - w.watches[name] = watchfd - - watchesByDir, ok := w.watchesByDir[parentName] - if !ok { - watchesByDir = make(map[int]struct{}, 1) - w.watchesByDir[parentName] = watchesByDir - } - watchesByDir[watchfd] = struct{}{} - w.paths[watchfd] = pathInfo{name: name, isDir: isDir} - w.mu.Unlock() + w.watches.add(name, info.linkName, info.wd, info.isDir) } - if isDir { - // Watch the directory if it has not been watched before, or if it was - // watched before, but perhaps only a NOTE_DELETE (watchDirectoryFiles) - w.mu.Lock() - + // Watch the directory if it has not been watched before, or if it was + // watched before, but perhaps only a NOTE_DELETE (watchDirectoryFiles) + if info.isDir { watchDir := (flags&unix.NOTE_WRITE) == unix.NOTE_WRITE && - (!alreadyWatching || (w.dirFlags[name]&unix.NOTE_WRITE) != unix.NOTE_WRITE) - // Store flags so this watch can be updated later - w.dirFlags[name] = flags - w.mu.Unlock() + (!alreadyWatching || (info.dirFlags&unix.NOTE_WRITE) != unix.NOTE_WRITE) + w.watches.updateDirFlags(name, flags) if watchDir { if err := w.watchDirectoryFiles(name); err != nil { @@ -534,7 +457,7 @@ func (w *Watcher) addWatch(name string, flags uint32) (string, error) { // readEvents reads from kqueue and converts the received kevents into // Event values that it sends down the Events channel. -func (w *Watcher) readEvents() { +func (w *kqueue) readEvents() { defer func() { close(w.Events) close(w.Errors) @@ -543,50 +466,65 @@ func (w *Watcher) readEvents() { }() eventBuffer := make([]unix.Kevent_t, 10) - for closed := false; !closed; { + for { kevents, err := w.read(eventBuffer) // EINTR is okay, the syscall was interrupted before timeout expired. if err != nil && err != unix.EINTR { if !w.sendError(fmt.Errorf("fsnotify.readEvents: %w", err)) { - closed = true + return } - continue } - // Flush the events we received to the Events channel for _, kevent := range kevents { var ( - watchfd = int(kevent.Ident) - mask = uint32(kevent.Fflags) + wd = int(kevent.Ident) + mask = uint32(kevent.Fflags) ) // Shut down the loop when the pipe is closed, but only after all // other events have been processed. - if watchfd == w.closepipe[0] { - closed = true - continue + if wd == w.closepipe[0] { + return } - w.mu.Lock() - path := w.paths[watchfd] - w.mu.Unlock() + path, ok := w.watches.byWd(wd) + if debug { + internal.Debug(path.name, &kevent) + } - event := w.newEvent(path.name, mask) + // On macOS it seems that sometimes an event with Ident=0 is + // delivered, and no other flags/information beyond that, even + // though we never saw such a file descriptor. For example in + // TestWatchSymlink/277 (usually at the end, but sometimes sooner): + // + // fmt.Printf("READ: %2d %#v\n", kevent.Ident, kevent) + // unix.Kevent_t{Ident:0x2a, Filter:-4, Flags:0x25, Fflags:0x2, Data:0, Udata:(*uint8)(nil)} + // unix.Kevent_t{Ident:0x0, Filter:-4, Flags:0x25, Fflags:0x2, Data:0, Udata:(*uint8)(nil)} + // + // The first is a normal event, the second with Ident 0. No error + // flag, no data, no ... nothing. + // + // I read a bit through bsd/kern_event.c from the xnu source, but I + // don't really see an obvious location where this is triggered – + // this doesn't seem intentional, but idk... + // + // Technically fd 0 is a valid descriptor, so only skip it if + // there's no path, and if we're on macOS. + if !ok && kevent.Ident == 0 && runtime.GOOS == "darwin" { + continue + } + + event := w.newEvent(path.name, path.linkName, mask) if event.Has(Rename) || event.Has(Remove) { w.remove(event.Name, false) - w.mu.Lock() - delete(w.fileExists, event.Name) - w.mu.Unlock() + w.watches.markSeen(event.Name, false) } if path.isDir && event.Has(Write) && !event.Has(Remove) { - w.sendDirectoryChangeEvents(event.Name) - } else { - if !w.sendEvent(event) { - closed = true - continue - } + w.dirChange(event.Name) + } else if !w.sendEvent(event) { + return } if event.Has(Remove) { @@ -594,25 +532,34 @@ func (w *Watcher) readEvents() { // mv f1 f2 will delete f2, then create f2. if path.isDir { fileDir := filepath.Clean(event.Name) - w.mu.Lock() - _, found := w.watches[fileDir] - w.mu.Unlock() + _, found := w.watches.byPath(fileDir) if found { - err := w.sendDirectoryChangeEvents(fileDir) - if err != nil { - if !w.sendError(err) { - closed = true - } + // TODO: this branch is never triggered in any test. + // Added in d6220df (2012). + // isDir check added in 8611c35 (2016): https://github.com/fsnotify/fsnotify/pull/111 + // + // I don't really get how this can be triggered either. + // And it wasn't triggered in the patch that added it, + // either. + // + // Original also had a comment: + // make sure the directory exists before we watch for + // changes. When we do a recursive watch and perform + // rm -rf, the parent directory might have gone + // missing, ignore the missing directory and let the + // upcoming delete event remove the watch from the + // parent directory. + err := w.dirChange(fileDir) + if !w.sendError(err) { + return } } } else { - filePath := filepath.Clean(event.Name) - if fi, err := os.Lstat(filePath); err == nil { - err := w.sendFileCreatedEventIfNew(filePath, fi) - if err != nil { - if !w.sendError(err) { - closed = true - } + path := filepath.Clean(event.Name) + if fi, err := os.Lstat(path); err == nil { + err := w.sendCreateIfNew(path, fi) + if !w.sendError(err) { + return } } } @@ -622,8 +569,14 @@ func (w *Watcher) readEvents() { } // newEvent returns an platform-independent Event based on kqueue Fflags. -func (w *Watcher) newEvent(name string, mask uint32) Event { +func (w *kqueue) newEvent(name, linkName string, mask uint32) Event { e := Event{Name: name} + if linkName != "" { + // If the user watched "/path/link" then emit events as "/path/link" + // rather than "/path/target". + e.Name = linkName + } + if mask&unix.NOTE_DELETE == unix.NOTE_DELETE { e.Op |= Remove } @@ -645,8 +598,7 @@ func (w *Watcher) newEvent(name string, mask uint32) Event { } // watchDirectoryFiles to mimic inotify when adding a watch on a directory -func (w *Watcher) watchDirectoryFiles(dirPath string) error { - // Get all files +func (w *kqueue) watchDirectoryFiles(dirPath string) error { files, err := os.ReadDir(dirPath) if err != nil { return err @@ -674,9 +626,7 @@ func (w *Watcher) watchDirectoryFiles(dirPath string) error { } } - w.mu.Lock() - w.fileExists[cleanPath] = struct{}{} - w.mu.Unlock() + w.watches.markSeen(cleanPath, true) } return nil @@ -686,7 +636,7 @@ func (w *Watcher) watchDirectoryFiles(dirPath string) error { // // This functionality is to have the BSD watcher match the inotify, which sends // a create event for files created in a watched directory. -func (w *Watcher) sendDirectoryChangeEvents(dir string) error { +func (w *kqueue) dirChange(dir string) error { files, err := os.ReadDir(dir) if err != nil { // Directory no longer exists: we can ignore this safely. kqueue will @@ -694,61 +644,51 @@ func (w *Watcher) sendDirectoryChangeEvents(dir string) error { if errors.Is(err, os.ErrNotExist) { return nil } - return fmt.Errorf("fsnotify.sendDirectoryChangeEvents: %w", err) + return fmt.Errorf("fsnotify.dirChange: %w", err) } for _, f := range files { fi, err := f.Info() if err != nil { - return fmt.Errorf("fsnotify.sendDirectoryChangeEvents: %w", err) + return fmt.Errorf("fsnotify.dirChange: %w", err) } - err = w.sendFileCreatedEventIfNew(filepath.Join(dir, fi.Name()), fi) + err = w.sendCreateIfNew(filepath.Join(dir, fi.Name()), fi) if err != nil { // Don't need to send an error if this file isn't readable. if errors.Is(err, unix.EACCES) || errors.Is(err, unix.EPERM) { return nil } - return fmt.Errorf("fsnotify.sendDirectoryChangeEvents: %w", err) + return fmt.Errorf("fsnotify.dirChange: %w", err) } } return nil } -// sendFileCreatedEvent sends a create event if the file isn't already being tracked. -func (w *Watcher) sendFileCreatedEventIfNew(filePath string, fi os.FileInfo) (err error) { - w.mu.Lock() - _, doesExist := w.fileExists[filePath] - w.mu.Unlock() - if !doesExist { - if !w.sendEvent(Event{Name: filePath, Op: Create}) { - return +// Send a create event if the file isn't already being tracked, and start +// watching this file. +func (w *kqueue) sendCreateIfNew(path string, fi os.FileInfo) error { + if !w.watches.seenBefore(path) { + if !w.sendEvent(Event{Name: path, Op: Create}) { + return nil } } - // like watchDirectoryFiles (but without doing another ReadDir) - filePath, err = w.internalWatch(filePath, fi) + // Like watchDirectoryFiles, but without doing another ReadDir. + path, err := w.internalWatch(path, fi) if err != nil { return err } - - w.mu.Lock() - w.fileExists[filePath] = struct{}{} - w.mu.Unlock() - + w.watches.markSeen(path, true) return nil } -func (w *Watcher) internalWatch(name string, fi os.FileInfo) (string, error) { +func (w *kqueue) internalWatch(name string, fi os.FileInfo) (string, error) { if fi.IsDir() { // mimic Linux providing delete events for subdirectories, but preserve // the flags used if currently watching subdirectory - w.mu.Lock() - flags := w.dirFlags[name] - w.mu.Unlock() - - flags |= unix.NOTE_DELETE | unix.NOTE_RENAME - return w.addWatch(name, flags) + info, _ := w.watches.byPath(name) + return w.addWatch(name, info.dirFlags|unix.NOTE_DELETE|unix.NOTE_RENAME) } // watch file to mimic Linux inotify @@ -756,7 +696,7 @@ func (w *Watcher) internalWatch(name string, fi os.FileInfo) (string, error) { } // Register events with the queue. -func (w *Watcher) register(fds []int, flags int, fflags uint32) error { +func (w *kqueue) register(fds []int, flags int, fflags uint32) error { changes := make([]unix.Kevent_t, len(fds)) for i, fd := range fds { // SetKevent converts int to the platform-specific types. @@ -773,10 +713,21 @@ func (w *Watcher) register(fds []int, flags int, fflags uint32) error { } // read retrieves pending events, or waits until an event occurs. -func (w *Watcher) read(events []unix.Kevent_t) ([]unix.Kevent_t, error) { +func (w *kqueue) read(events []unix.Kevent_t) ([]unix.Kevent_t, error) { n, err := unix.Kevent(w.kq, nil, events, nil) if err != nil { return nil, err } return events[0:n], nil } + +func (w *kqueue) xSupports(op Op) bool { + if runtime.GOOS == "freebsd" { + //return true // Supports everything. + } + if op.Has(xUnportableOpen) || op.Has(xUnportableRead) || + op.Has(xUnportableCloseWrite) || op.Has(xUnportableCloseRead) { + return false + } + return true +} diff --git a/vendor/github.com/fsnotify/fsnotify/backend_other.go b/vendor/github.com/fsnotify/fsnotify/backend_other.go index d34a23c01..5eb5dbc66 100644 --- a/vendor/github.com/fsnotify/fsnotify/backend_other.go +++ b/vendor/github.com/fsnotify/fsnotify/backend_other.go @@ -1,205 +1,23 @@ //go:build appengine || (!darwin && !dragonfly && !freebsd && !openbsd && !linux && !netbsd && !solaris && !windows) -// +build appengine !darwin,!dragonfly,!freebsd,!openbsd,!linux,!netbsd,!solaris,!windows - -// Note: the documentation on the Watcher type and methods is generated from -// mkdoc.zsh package fsnotify import "errors" -// Watcher watches a set of paths, delivering events on a channel. -// -// A watcher should not be copied (e.g. pass it by pointer, rather than by -// value). -// -// # Linux notes -// -// When a file is removed a Remove event won't be emitted until all file -// descriptors are closed, and deletes will always emit a Chmod. For example: -// -// fp := os.Open("file") -// os.Remove("file") // Triggers Chmod -// fp.Close() // Triggers Remove -// -// This is the event that inotify sends, so not much can be changed about this. -// -// The fs.inotify.max_user_watches sysctl variable specifies the upper limit -// for the number of watches per user, and fs.inotify.max_user_instances -// specifies the maximum number of inotify instances per user. Every Watcher you -// create is an "instance", and every path you add is a "watch". -// -// These are also exposed in /proc as /proc/sys/fs/inotify/max_user_watches and -// /proc/sys/fs/inotify/max_user_instances -// -// To increase them you can use sysctl or write the value to the /proc file: -// -// # Default values on Linux 5.18 -// sysctl fs.inotify.max_user_watches=124983 -// sysctl fs.inotify.max_user_instances=128 -// -// To make the changes persist on reboot edit /etc/sysctl.conf or -// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check -// your distro's documentation): -// -// fs.inotify.max_user_watches=124983 -// fs.inotify.max_user_instances=128 -// -// Reaching the limit will result in a "no space left on device" or "too many open -// files" error. -// -// # kqueue notes (macOS, BSD) -// -// kqueue requires opening a file descriptor for every file that's being watched; -// so if you're watching a directory with five files then that's six file -// descriptors. You will run in to your system's "max open files" limit faster on -// these platforms. -// -// The sysctl variables kern.maxfiles and kern.maxfilesperproc can be used to -// control the maximum number of open files, as well as /etc/login.conf on BSD -// systems. -// -// # Windows notes -// -// Paths can be added as "C:\path\to\dir", but forward slashes -// ("C:/path/to/dir") will also work. -// -// When a watched directory is removed it will always send an event for the -// directory itself, but may not send events for all files in that directory. -// Sometimes it will send events for all times, sometimes it will send no -// events, and often only for some files. -// -// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest -// value that is guaranteed to work with SMB filesystems. If you have many -// events in quick succession this may not be enough, and you will have to use -// [WithBufferSize] to increase the value. -type Watcher struct { - // Events sends the filesystem change events. - // - // fsnotify can send the following events; a "path" here can refer to a - // file, directory, symbolic link, or special file like a FIFO. - // - // fsnotify.Create A new path was created; this may be followed by one - // or more Write events if data also gets written to a - // file. - // - // fsnotify.Remove A path was removed. - // - // fsnotify.Rename A path was renamed. A rename is always sent with the - // old path as Event.Name, and a Create event will be - // sent with the new name. Renames are only sent for - // paths that are currently watched; e.g. moving an - // unmonitored file into a monitored directory will - // show up as just a Create. Similarly, renaming a file - // to outside a monitored directory will show up as - // only a Rename. - // - // fsnotify.Write A file or named pipe was written to. A Truncate will - // also trigger a Write. A single "write action" - // initiated by the user may show up as one or multiple - // writes, depending on when the system syncs things to - // disk. For example when compiling a large Go program - // you may get hundreds of Write events, and you may - // want to wait until you've stopped receiving them - // (see the dedup example in cmd/fsnotify). - // - // Some systems may send Write event for directories - // when the directory content changes. - // - // fsnotify.Chmod Attributes were changed. On Linux this is also sent - // when a file is removed (or more accurately, when a - // link to an inode is removed). On kqueue it's sent - // when a file is truncated. On Windows it's never - // sent. +type other struct { Events chan Event - - // Errors sends any errors. - // - // ErrEventOverflow is used to indicate there are too many events: - // - // - inotify: There are too many queued events (fs.inotify.max_queued_events sysctl) - // - windows: The buffer size is too small; WithBufferSize() can be used to increase it. - // - kqueue, fen: Not used. Errors chan error } -// NewWatcher creates a new Watcher. -func NewWatcher() (*Watcher, error) { +func newBackend(ev chan Event, errs chan error) (backend, error) { return nil, errors.New("fsnotify not supported on the current platform") } - -// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events -// channel. -// -// The main use case for this is situations with a very large number of events -// where the kernel buffer size can't be increased (e.g. due to lack of -// permissions). An unbuffered Watcher will perform better for almost all use -// cases, and whenever possible you will be better off increasing the kernel -// buffers instead of adding a large userspace buffer. -func NewBufferedWatcher(sz uint) (*Watcher, error) { return NewWatcher() } - -// Close removes all watches and closes the Events channel. -func (w *Watcher) Close() error { return nil } - -// WatchList returns all paths explicitly added with [Watcher.Add] (and are not -// yet removed). -// -// Returns nil if [Watcher.Close] was called. -func (w *Watcher) WatchList() []string { return nil } - -// Add starts monitoring the path for changes. -// -// A path can only be watched once; watching it more than once is a no-op and will -// not return an error. Paths that do not yet exist on the filesystem cannot be -// watched. -// -// A watch will be automatically removed if the watched path is deleted or -// renamed. The exception is the Windows backend, which doesn't remove the -// watcher on renames. -// -// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special -// filesystems (/proc, /sys, etc.) generally don't work. -// -// Returns [ErrClosed] if [Watcher.Close] was called. -// -// See [Watcher.AddWith] for a version that allows adding options. -// -// # Watching directories -// -// All files in a directory are monitored, including new files that are created -// after the watcher is started. Subdirectories are not watched (i.e. it's -// non-recursive). -// -// # Watching files -// -// Watching individual files (rather than directories) is generally not -// recommended as many programs (especially editors) update files atomically: it -// will write to a temporary file which is then moved to to destination, -// overwriting the original (or some variant thereof). The watcher on the -// original file is now lost, as that no longer exists. -// -// The upshot of this is that a power failure or crash won't leave a -// half-written file. -// -// Watch the parent directory and use Event.Name to filter out files you're not -// interested in. There is an example of this in cmd/fsnotify/file.go. -func (w *Watcher) Add(name string) error { return nil } - -// AddWith is like [Watcher.Add], but allows adding options. When using Add() -// the defaults described below are used. -// -// Possible options are: -// -// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on -// other platforms. The default is 64K (65536 bytes). -func (w *Watcher) AddWith(name string, opts ...addOpt) error { return nil } - -// Remove stops monitoring the path for changes. -// -// Directories are always removed non-recursively. For example, if you added -// /tmp/dir and /tmp/dir/subdir then you will need to remove both. -// -// Removing a path that has not yet been added returns [ErrNonExistentWatch]. -// -// Returns nil if [Watcher.Close] was called. -func (w *Watcher) Remove(name string) error { return nil } +func newBufferedBackend(sz uint, ev chan Event, errs chan error) (backend, error) { + return newBackend(ev, errs) +} +func (w *other) Close() error { return nil } +func (w *other) WatchList() []string { return nil } +func (w *other) Add(name string) error { return nil } +func (w *other) AddWith(name string, opts ...addOpt) error { return nil } +func (w *other) Remove(name string) error { return nil } +func (w *other) xSupports(op Op) bool { return false } diff --git a/vendor/github.com/fsnotify/fsnotify/backend_windows.go b/vendor/github.com/fsnotify/fsnotify/backend_windows.go index 9bc91e5d6..c54a63083 100644 --- a/vendor/github.com/fsnotify/fsnotify/backend_windows.go +++ b/vendor/github.com/fsnotify/fsnotify/backend_windows.go @@ -1,12 +1,8 @@ //go:build windows -// +build windows // Windows backend based on ReadDirectoryChangesW() // // https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-readdirectorychangesw -// -// Note: the documentation on the Watcher type and methods is generated from -// mkdoc.zsh package fsnotify @@ -19,123 +15,15 @@ import ( "runtime" "strings" "sync" + "time" "unsafe" + "github.com/fsnotify/fsnotify/internal" "golang.org/x/sys/windows" ) -// Watcher watches a set of paths, delivering events on a channel. -// -// A watcher should not be copied (e.g. pass it by pointer, rather than by -// value). -// -// # Linux notes -// -// When a file is removed a Remove event won't be emitted until all file -// descriptors are closed, and deletes will always emit a Chmod. For example: -// -// fp := os.Open("file") -// os.Remove("file") // Triggers Chmod -// fp.Close() // Triggers Remove -// -// This is the event that inotify sends, so not much can be changed about this. -// -// The fs.inotify.max_user_watches sysctl variable specifies the upper limit -// for the number of watches per user, and fs.inotify.max_user_instances -// specifies the maximum number of inotify instances per user. Every Watcher you -// create is an "instance", and every path you add is a "watch". -// -// These are also exposed in /proc as /proc/sys/fs/inotify/max_user_watches and -// /proc/sys/fs/inotify/max_user_instances -// -// To increase them you can use sysctl or write the value to the /proc file: -// -// # Default values on Linux 5.18 -// sysctl fs.inotify.max_user_watches=124983 -// sysctl fs.inotify.max_user_instances=128 -// -// To make the changes persist on reboot edit /etc/sysctl.conf or -// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check -// your distro's documentation): -// -// fs.inotify.max_user_watches=124983 -// fs.inotify.max_user_instances=128 -// -// Reaching the limit will result in a "no space left on device" or "too many open -// files" error. -// -// # kqueue notes (macOS, BSD) -// -// kqueue requires opening a file descriptor for every file that's being watched; -// so if you're watching a directory with five files then that's six file -// descriptors. You will run in to your system's "max open files" limit faster on -// these platforms. -// -// The sysctl variables kern.maxfiles and kern.maxfilesperproc can be used to -// control the maximum number of open files, as well as /etc/login.conf on BSD -// systems. -// -// # Windows notes -// -// Paths can be added as "C:\path\to\dir", but forward slashes -// ("C:/path/to/dir") will also work. -// -// When a watched directory is removed it will always send an event for the -// directory itself, but may not send events for all files in that directory. -// Sometimes it will send events for all times, sometimes it will send no -// events, and often only for some files. -// -// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest -// value that is guaranteed to work with SMB filesystems. If you have many -// events in quick succession this may not be enough, and you will have to use -// [WithBufferSize] to increase the value. -type Watcher struct { - // Events sends the filesystem change events. - // - // fsnotify can send the following events; a "path" here can refer to a - // file, directory, symbolic link, or special file like a FIFO. - // - // fsnotify.Create A new path was created; this may be followed by one - // or more Write events if data also gets written to a - // file. - // - // fsnotify.Remove A path was removed. - // - // fsnotify.Rename A path was renamed. A rename is always sent with the - // old path as Event.Name, and a Create event will be - // sent with the new name. Renames are only sent for - // paths that are currently watched; e.g. moving an - // unmonitored file into a monitored directory will - // show up as just a Create. Similarly, renaming a file - // to outside a monitored directory will show up as - // only a Rename. - // - // fsnotify.Write A file or named pipe was written to. A Truncate will - // also trigger a Write. A single "write action" - // initiated by the user may show up as one or multiple - // writes, depending on when the system syncs things to - // disk. For example when compiling a large Go program - // you may get hundreds of Write events, and you may - // want to wait until you've stopped receiving them - // (see the dedup example in cmd/fsnotify). - // - // Some systems may send Write event for directories - // when the directory content changes. - // - // fsnotify.Chmod Attributes were changed. On Linux this is also sent - // when a file is removed (or more accurately, when a - // link to an inode is removed). On kqueue it's sent - // when a file is truncated. On Windows it's never - // sent. +type readDirChangesW struct { Events chan Event - - // Errors sends any errors. - // - // ErrEventOverflow is used to indicate there are too many events: - // - // - inotify: There are too many queued events (fs.inotify.max_queued_events sysctl) - // - windows: The buffer size is too small; WithBufferSize() can be used to increase it. - // - kqueue, fen: Not used. Errors chan error port windows.Handle // Handle to completion port @@ -147,48 +35,40 @@ type Watcher struct { closed bool // Set to true when Close() is first called } -// NewWatcher creates a new Watcher. -func NewWatcher() (*Watcher, error) { - return NewBufferedWatcher(50) +func newBackend(ev chan Event, errs chan error) (backend, error) { + return newBufferedBackend(50, ev, errs) } -// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events -// channel. -// -// The main use case for this is situations with a very large number of events -// where the kernel buffer size can't be increased (e.g. due to lack of -// permissions). An unbuffered Watcher will perform better for almost all use -// cases, and whenever possible you will be better off increasing the kernel -// buffers instead of adding a large userspace buffer. -func NewBufferedWatcher(sz uint) (*Watcher, error) { +func newBufferedBackend(sz uint, ev chan Event, errs chan error) (backend, error) { port, err := windows.CreateIoCompletionPort(windows.InvalidHandle, 0, 0, 0) if err != nil { return nil, os.NewSyscallError("CreateIoCompletionPort", err) } - w := &Watcher{ + w := &readDirChangesW{ + Events: ev, + Errors: errs, port: port, watches: make(watchMap), input: make(chan *input, 1), - Events: make(chan Event, sz), - Errors: make(chan error), quit: make(chan chan<- error, 1), } go w.readEvents() return w, nil } -func (w *Watcher) isClosed() bool { +func (w *readDirChangesW) isClosed() bool { w.mu.Lock() defer w.mu.Unlock() return w.closed } -func (w *Watcher) sendEvent(name string, mask uint64) bool { +func (w *readDirChangesW) sendEvent(name, renamedFrom string, mask uint64) bool { if mask == 0 { return false } event := w.newEvent(name, uint32(mask)) + event.renamedFrom = renamedFrom select { case ch := <-w.quit: w.quit <- ch @@ -198,17 +78,19 @@ func (w *Watcher) sendEvent(name string, mask uint64) bool { } // Returns true if the error was sent, or false if watcher is closed. -func (w *Watcher) sendError(err error) bool { +func (w *readDirChangesW) sendError(err error) bool { + if err == nil { + return true + } select { case w.Errors <- err: return true case <-w.quit: + return false } - return false } -// Close removes all watches and closes the Events channel. -func (w *Watcher) Close() error { +func (w *readDirChangesW) Close() error { if w.isClosed() { return nil } @@ -226,57 +108,21 @@ func (w *Watcher) Close() error { return <-ch } -// Add starts monitoring the path for changes. -// -// A path can only be watched once; watching it more than once is a no-op and will -// not return an error. Paths that do not yet exist on the filesystem cannot be -// watched. -// -// A watch will be automatically removed if the watched path is deleted or -// renamed. The exception is the Windows backend, which doesn't remove the -// watcher on renames. -// -// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special -// filesystems (/proc, /sys, etc.) generally don't work. -// -// Returns [ErrClosed] if [Watcher.Close] was called. -// -// See [Watcher.AddWith] for a version that allows adding options. -// -// # Watching directories -// -// All files in a directory are monitored, including new files that are created -// after the watcher is started. Subdirectories are not watched (i.e. it's -// non-recursive). -// -// # Watching files -// -// Watching individual files (rather than directories) is generally not -// recommended as many programs (especially editors) update files atomically: it -// will write to a temporary file which is then moved to to destination, -// overwriting the original (or some variant thereof). The watcher on the -// original file is now lost, as that no longer exists. -// -// The upshot of this is that a power failure or crash won't leave a -// half-written file. -// -// Watch the parent directory and use Event.Name to filter out files you're not -// interested in. There is an example of this in cmd/fsnotify/file.go. -func (w *Watcher) Add(name string) error { return w.AddWith(name) } +func (w *readDirChangesW) Add(name string) error { return w.AddWith(name) } -// AddWith is like [Watcher.Add], but allows adding options. When using Add() -// the defaults described below are used. -// -// Possible options are: -// -// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on -// other platforms. The default is 64K (65536 bytes). -func (w *Watcher) AddWith(name string, opts ...addOpt) error { +func (w *readDirChangesW) AddWith(name string, opts ...addOpt) error { if w.isClosed() { return ErrClosed } + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s AddWith(%q)\n", + time.Now().Format("15:04:05.000000000"), filepath.ToSlash(name)) + } with := getOptions(opts...) + if !w.xSupports(with.op) { + return fmt.Errorf("%w: %s", xErrUnsupported, with.op) + } if with.bufsize < 4096 { return fmt.Errorf("fsnotify.WithBufferSize: buffer size cannot be smaller than 4096 bytes") } @@ -295,18 +141,14 @@ func (w *Watcher) AddWith(name string, opts ...addOpt) error { return <-in.reply } -// Remove stops monitoring the path for changes. -// -// Directories are always removed non-recursively. For example, if you added -// /tmp/dir and /tmp/dir/subdir then you will need to remove both. -// -// Removing a path that has not yet been added returns [ErrNonExistentWatch]. -// -// Returns nil if [Watcher.Close] was called. -func (w *Watcher) Remove(name string) error { +func (w *readDirChangesW) Remove(name string) error { if w.isClosed() { return nil } + if debug { + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s Remove(%q)\n", + time.Now().Format("15:04:05.000000000"), filepath.ToSlash(name)) + } in := &input{ op: opRemoveWatch, @@ -320,11 +162,7 @@ func (w *Watcher) Remove(name string) error { return <-in.reply } -// WatchList returns all paths explicitly added with [Watcher.Add] (and are not -// yet removed). -// -// Returns nil if [Watcher.Close] was called. -func (w *Watcher) WatchList() []string { +func (w *readDirChangesW) WatchList() []string { if w.isClosed() { return nil } @@ -335,7 +173,13 @@ func (w *Watcher) WatchList() []string { entries := make([]string, 0, len(w.watches)) for _, entry := range w.watches { for _, watchEntry := range entry { - entries = append(entries, watchEntry.path) + for name := range watchEntry.names { + entries = append(entries, filepath.Join(watchEntry.path, name)) + } + // the directory itself is being watched + if watchEntry.mask != 0 { + entries = append(entries, watchEntry.path) + } } } @@ -361,7 +205,7 @@ const ( sysFSIGNORED = 0x8000 ) -func (w *Watcher) newEvent(name string, mask uint32) Event { +func (w *readDirChangesW) newEvent(name string, mask uint32) Event { e := Event{Name: name} if mask&sysFSCREATE == sysFSCREATE || mask&sysFSMOVEDTO == sysFSMOVEDTO { e.Op |= Create @@ -417,7 +261,7 @@ type ( watchMap map[uint32]indexMap ) -func (w *Watcher) wakeupReader() error { +func (w *readDirChangesW) wakeupReader() error { err := windows.PostQueuedCompletionStatus(w.port, 0, 0, nil) if err != nil { return os.NewSyscallError("PostQueuedCompletionStatus", err) @@ -425,7 +269,7 @@ func (w *Watcher) wakeupReader() error { return nil } -func (w *Watcher) getDir(pathname string) (dir string, err error) { +func (w *readDirChangesW) getDir(pathname string) (dir string, err error) { attr, err := windows.GetFileAttributes(windows.StringToUTF16Ptr(pathname)) if err != nil { return "", os.NewSyscallError("GetFileAttributes", err) @@ -439,7 +283,7 @@ func (w *Watcher) getDir(pathname string) (dir string, err error) { return } -func (w *Watcher) getIno(path string) (ino *inode, err error) { +func (w *readDirChangesW) getIno(path string) (ino *inode, err error) { h, err := windows.CreateFile(windows.StringToUTF16Ptr(path), windows.FILE_LIST_DIRECTORY, windows.FILE_SHARE_READ|windows.FILE_SHARE_WRITE|windows.FILE_SHARE_DELETE, @@ -482,9 +326,8 @@ func (m watchMap) set(ino *inode, watch *watch) { } // Must run within the I/O thread. -func (w *Watcher) addWatch(pathname string, flags uint64, bufsize int) error { - //pathname, recurse := recursivePath(pathname) - recurse := false +func (w *readDirChangesW) addWatch(pathname string, flags uint64, bufsize int) error { + pathname, recurse := recursivePath(pathname) dir, err := w.getDir(pathname) if err != nil { @@ -538,7 +381,7 @@ func (w *Watcher) addWatch(pathname string, flags uint64, bufsize int) error { } // Must run within the I/O thread. -func (w *Watcher) remWatch(pathname string) error { +func (w *readDirChangesW) remWatch(pathname string) error { pathname, recurse := recursivePath(pathname) dir, err := w.getDir(pathname) @@ -566,11 +409,11 @@ func (w *Watcher) remWatch(pathname string) error { return fmt.Errorf("%w: %s", ErrNonExistentWatch, pathname) } if pathname == dir { - w.sendEvent(watch.path, watch.mask&sysFSIGNORED) + w.sendEvent(watch.path, "", watch.mask&sysFSIGNORED) watch.mask = 0 } else { name := filepath.Base(pathname) - w.sendEvent(filepath.Join(watch.path, name), watch.names[name]&sysFSIGNORED) + w.sendEvent(filepath.Join(watch.path, name), "", watch.names[name]&sysFSIGNORED) delete(watch.names, name) } @@ -578,23 +421,23 @@ func (w *Watcher) remWatch(pathname string) error { } // Must run within the I/O thread. -func (w *Watcher) deleteWatch(watch *watch) { +func (w *readDirChangesW) deleteWatch(watch *watch) { for name, mask := range watch.names { if mask&provisional == 0 { - w.sendEvent(filepath.Join(watch.path, name), mask&sysFSIGNORED) + w.sendEvent(filepath.Join(watch.path, name), "", mask&sysFSIGNORED) } delete(watch.names, name) } if watch.mask != 0 { if watch.mask&provisional == 0 { - w.sendEvent(watch.path, watch.mask&sysFSIGNORED) + w.sendEvent(watch.path, "", watch.mask&sysFSIGNORED) } watch.mask = 0 } } // Must run within the I/O thread. -func (w *Watcher) startRead(watch *watch) error { +func (w *readDirChangesW) startRead(watch *watch) error { err := windows.CancelIo(watch.ino.handle) if err != nil { w.sendError(os.NewSyscallError("CancelIo", err)) @@ -624,7 +467,7 @@ func (w *Watcher) startRead(watch *watch) error { err := os.NewSyscallError("ReadDirectoryChanges", rdErr) if rdErr == windows.ERROR_ACCESS_DENIED && watch.mask&provisional == 0 { // Watched directory was probably removed - w.sendEvent(watch.path, watch.mask&sysFSDELETESELF) + w.sendEvent(watch.path, "", watch.mask&sysFSDELETESELF) err = nil } w.deleteWatch(watch) @@ -637,7 +480,7 @@ func (w *Watcher) startRead(watch *watch) error { // readEvents reads from the I/O completion port, converts the // received events into Event objects and sends them via the Events channel. // Entry point to the I/O thread. -func (w *Watcher) readEvents() { +func (w *readDirChangesW) readEvents() { var ( n uint32 key uintptr @@ -700,7 +543,7 @@ func (w *Watcher) readEvents() { } case windows.ERROR_ACCESS_DENIED: // Watched directory was probably removed - w.sendEvent(watch.path, watch.mask&sysFSDELETESELF) + w.sendEvent(watch.path, "", watch.mask&sysFSDELETESELF) w.deleteWatch(watch) w.startRead(watch) continue @@ -733,6 +576,10 @@ func (w *Watcher) readEvents() { name := windows.UTF16ToString(buf) fullname := filepath.Join(watch.path, name) + if debug { + internal.Debug(fullname, raw.Action) + } + var mask uint64 switch raw.Action { case windows.FILE_ACTION_REMOVED: @@ -761,21 +608,22 @@ func (w *Watcher) readEvents() { } } - sendNameEvent := func() { - w.sendEvent(fullname, watch.names[name]&mask) - } if raw.Action != windows.FILE_ACTION_RENAMED_NEW_NAME { - sendNameEvent() + w.sendEvent(fullname, "", watch.names[name]&mask) } if raw.Action == windows.FILE_ACTION_REMOVED { - w.sendEvent(fullname, watch.names[name]&sysFSIGNORED) + w.sendEvent(fullname, "", watch.names[name]&sysFSIGNORED) delete(watch.names, name) } - w.sendEvent(fullname, watch.mask&w.toFSnotifyFlags(raw.Action)) + if watch.rename != "" && raw.Action == windows.FILE_ACTION_RENAMED_NEW_NAME { + w.sendEvent(fullname, filepath.Join(watch.path, watch.rename), watch.mask&w.toFSnotifyFlags(raw.Action)) + } else { + w.sendEvent(fullname, "", watch.mask&w.toFSnotifyFlags(raw.Action)) + } + if raw.Action == windows.FILE_ACTION_RENAMED_NEW_NAME { - fullname = filepath.Join(watch.path, watch.rename) - sendNameEvent() + w.sendEvent(filepath.Join(watch.path, watch.rename), "", watch.names[name]&mask) } // Move to the next event in the buffer @@ -787,8 +635,7 @@ func (w *Watcher) readEvents() { // Error! if offset >= n { //lint:ignore ST1005 Windows should be capitalized - w.sendError(errors.New( - "Windows system assumed buffer larger than it is, events have likely been missed")) + w.sendError(errors.New("Windows system assumed buffer larger than it is, events have likely been missed")) break } } @@ -799,7 +646,7 @@ func (w *Watcher) readEvents() { } } -func (w *Watcher) toWindowsFlags(mask uint64) uint32 { +func (w *readDirChangesW) toWindowsFlags(mask uint64) uint32 { var m uint32 if mask&sysFSMODIFY != 0 { m |= windows.FILE_NOTIFY_CHANGE_LAST_WRITE @@ -810,7 +657,7 @@ func (w *Watcher) toWindowsFlags(mask uint64) uint32 { return m } -func (w *Watcher) toFSnotifyFlags(action uint32) uint64 { +func (w *readDirChangesW) toFSnotifyFlags(action uint32) uint64 { switch action { case windows.FILE_ACTION_ADDED: return sysFSCREATE @@ -825,3 +672,11 @@ func (w *Watcher) toFSnotifyFlags(action uint32) uint64 { } return 0 } + +func (w *readDirChangesW) xSupports(op Op) bool { + if op.Has(xUnportableOpen) || op.Has(xUnportableRead) || + op.Has(xUnportableCloseWrite) || op.Has(xUnportableCloseRead) { + return false + } + return true +} diff --git a/vendor/github.com/fsnotify/fsnotify/fsnotify.go b/vendor/github.com/fsnotify/fsnotify/fsnotify.go index 24c99cc49..0760efe91 100644 --- a/vendor/github.com/fsnotify/fsnotify/fsnotify.go +++ b/vendor/github.com/fsnotify/fsnotify/fsnotify.go @@ -3,19 +3,146 @@ // // Currently supported systems: // -// Linux 2.6.32+ via inotify -// BSD, macOS via kqueue -// Windows via ReadDirectoryChangesW -// illumos via FEN +// - Linux via inotify +// - BSD, macOS via kqueue +// - Windows via ReadDirectoryChangesW +// - illumos via FEN +// +// # FSNOTIFY_DEBUG +// +// Set the FSNOTIFY_DEBUG environment variable to "1" to print debug messages to +// stderr. This can be useful to track down some problems, especially in cases +// where fsnotify is used as an indirect dependency. +// +// Every event will be printed as soon as there's something useful to print, +// with as little processing from fsnotify. +// +// Example output: +// +// FSNOTIFY_DEBUG: 11:34:23.633087586 256:IN_CREATE → "/tmp/file-1" +// FSNOTIFY_DEBUG: 11:34:23.633202319 4:IN_ATTRIB → "/tmp/file-1" +// FSNOTIFY_DEBUG: 11:34:28.989728764 512:IN_DELETE → "/tmp/file-1" package fsnotify import ( "errors" "fmt" + "os" "path/filepath" "strings" ) +// Watcher watches a set of paths, delivering events on a channel. +// +// A watcher should not be copied (e.g. pass it by pointer, rather than by +// value). +// +// # Linux notes +// +// When a file is removed a Remove event won't be emitted until all file +// descriptors are closed, and deletes will always emit a Chmod. For example: +// +// fp := os.Open("file") +// os.Remove("file") // Triggers Chmod +// fp.Close() // Triggers Remove +// +// This is the event that inotify sends, so not much can be changed about this. +// +// The fs.inotify.max_user_watches sysctl variable specifies the upper limit +// for the number of watches per user, and fs.inotify.max_user_instances +// specifies the maximum number of inotify instances per user. Every Watcher you +// create is an "instance", and every path you add is a "watch". +// +// These are also exposed in /proc as /proc/sys/fs/inotify/max_user_watches and +// /proc/sys/fs/inotify/max_user_instances +// +// To increase them you can use sysctl or write the value to the /proc file: +// +// # Default values on Linux 5.18 +// sysctl fs.inotify.max_user_watches=124983 +// sysctl fs.inotify.max_user_instances=128 +// +// To make the changes persist on reboot edit /etc/sysctl.conf or +// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check +// your distro's documentation): +// +// fs.inotify.max_user_watches=124983 +// fs.inotify.max_user_instances=128 +// +// Reaching the limit will result in a "no space left on device" or "too many open +// files" error. +// +// # kqueue notes (macOS, BSD) +// +// kqueue requires opening a file descriptor for every file that's being watched; +// so if you're watching a directory with five files then that's six file +// descriptors. You will run in to your system's "max open files" limit faster on +// these platforms. +// +// The sysctl variables kern.maxfiles and kern.maxfilesperproc can be used to +// control the maximum number of open files, as well as /etc/login.conf on BSD +// systems. +// +// # Windows notes +// +// Paths can be added as "C:\\path\\to\\dir", but forward slashes +// ("C:/path/to/dir") will also work. +// +// When a watched directory is removed it will always send an event for the +// directory itself, but may not send events for all files in that directory. +// Sometimes it will send events for all files, sometimes it will send no +// events, and often only for some files. +// +// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest +// value that is guaranteed to work with SMB filesystems. If you have many +// events in quick succession this may not be enough, and you will have to use +// [WithBufferSize] to increase the value. +type Watcher struct { + b backend + + // Events sends the filesystem change events. + // + // fsnotify can send the following events; a "path" here can refer to a + // file, directory, symbolic link, or special file like a FIFO. + // + // fsnotify.Create A new path was created; this may be followed by one + // or more Write events if data also gets written to a + // file. + // + // fsnotify.Remove A path was removed. + // + // fsnotify.Rename A path was renamed. A rename is always sent with the + // old path as Event.Name, and a Create event will be + // sent with the new name. Renames are only sent for + // paths that are currently watched; e.g. moving an + // unmonitored file into a monitored directory will + // show up as just a Create. Similarly, renaming a file + // to outside a monitored directory will show up as + // only a Rename. + // + // fsnotify.Write A file or named pipe was written to. A Truncate will + // also trigger a Write. A single "write action" + // initiated by the user may show up as one or multiple + // writes, depending on when the system syncs things to + // disk. For example when compiling a large Go program + // you may get hundreds of Write events, and you may + // want to wait until you've stopped receiving them + // (see the dedup example in cmd/fsnotify). + // + // Some systems may send Write event for directories + // when the directory content changes. + // + // fsnotify.Chmod Attributes were changed. On Linux this is also sent + // when a file is removed (or more accurately, when a + // link to an inode is removed). On kqueue it's sent + // when a file is truncated. On Windows it's never + // sent. + Events chan Event + + // Errors sends any errors. + Errors chan error +} + // Event represents a file system notification. type Event struct { // Path to the file or directory. @@ -30,6 +157,16 @@ type Event struct { // This is a bitmask and some systems may send multiple operations at once. // Use the Event.Has() method instead of comparing with ==. Op Op + + // Create events will have this set to the old path if it's a rename. This + // only works when both the source and destination are watched. It's not + // reliable when watching individual files, only directories. + // + // For example "mv /tmp/file /tmp/rename" will emit: + // + // Event{Op: Rename, Name: "/tmp/file"} + // Event{Op: Create, Name: "/tmp/rename", RenamedFrom: "/tmp/file"} + renamedFrom string } // Op describes a set of file operations. @@ -50,7 +187,7 @@ const ( // example "remove to trash" is often a rename). Remove - // The path was renamed to something else; any watched on it will be + // The path was renamed to something else; any watches on it will be // removed. Rename @@ -60,15 +197,155 @@ const ( // get triggered very frequently by some software. For example, Spotlight // indexing on macOS, anti-virus software, backup software, etc. Chmod + + // File descriptor was opened. + // + // Only works on Linux and FreeBSD. + xUnportableOpen + + // File was read from. + // + // Only works on Linux and FreeBSD. + xUnportableRead + + // File opened for writing was closed. + // + // Only works on Linux and FreeBSD. + // + // The advantage of using this over Write is that it's more reliable than + // waiting for Write events to stop. It's also faster (if you're not + // listening to Write events): copying a file of a few GB can easily + // generate tens of thousands of Write events in a short span of time. + xUnportableCloseWrite + + // File opened for reading was closed. + // + // Only works on Linux and FreeBSD. + xUnportableCloseRead ) -// Common errors that can be reported. var ( + // ErrNonExistentWatch is used when Remove() is called on a path that's not + // added. ErrNonExistentWatch = errors.New("fsnotify: can't remove non-existent watch") - ErrEventOverflow = errors.New("fsnotify: queue or buffer overflow") - ErrClosed = errors.New("fsnotify: watcher already closed") + + // ErrClosed is used when trying to operate on a closed Watcher. + ErrClosed = errors.New("fsnotify: watcher already closed") + + // ErrEventOverflow is reported from the Errors channel when there are too + // many events: + // + // - inotify: inotify returns IN_Q_OVERFLOW – because there are too + // many queued events (the fs.inotify.max_queued_events + // sysctl can be used to increase this). + // - windows: The buffer size is too small; WithBufferSize() can be used to increase it. + // - kqueue, fen: Not used. + ErrEventOverflow = errors.New("fsnotify: queue or buffer overflow") + + // ErrUnsupported is returned by AddWith() when WithOps() specified an + // Unportable event that's not supported on this platform. + xErrUnsupported = errors.New("fsnotify: not supported with this backend") ) +// NewWatcher creates a new Watcher. +func NewWatcher() (*Watcher, error) { + ev, errs := make(chan Event), make(chan error) + b, err := newBackend(ev, errs) + if err != nil { + return nil, err + } + return &Watcher{b: b, Events: ev, Errors: errs}, nil +} + +// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events +// channel. +// +// The main use case for this is situations with a very large number of events +// where the kernel buffer size can't be increased (e.g. due to lack of +// permissions). An unbuffered Watcher will perform better for almost all use +// cases, and whenever possible you will be better off increasing the kernel +// buffers instead of adding a large userspace buffer. +func NewBufferedWatcher(sz uint) (*Watcher, error) { + ev, errs := make(chan Event), make(chan error) + b, err := newBufferedBackend(sz, ev, errs) + if err != nil { + return nil, err + } + return &Watcher{b: b, Events: ev, Errors: errs}, nil +} + +// Add starts monitoring the path for changes. +// +// A path can only be watched once; watching it more than once is a no-op and will +// not return an error. Paths that do not yet exist on the filesystem cannot be +// watched. +// +// A watch will be automatically removed if the watched path is deleted or +// renamed. The exception is the Windows backend, which doesn't remove the +// watcher on renames. +// +// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special +// filesystems (/proc, /sys, etc.) generally don't work. +// +// Returns [ErrClosed] if [Watcher.Close] was called. +// +// See [Watcher.AddWith] for a version that allows adding options. +// +// # Watching directories +// +// All files in a directory are monitored, including new files that are created +// after the watcher is started. Subdirectories are not watched (i.e. it's +// non-recursive). +// +// # Watching files +// +// Watching individual files (rather than directories) is generally not +// recommended as many programs (especially editors) update files atomically: it +// will write to a temporary file which is then moved to destination, +// overwriting the original (or some variant thereof). The watcher on the +// original file is now lost, as that no longer exists. +// +// The upshot of this is that a power failure or crash won't leave a +// half-written file. +// +// Watch the parent directory and use Event.Name to filter out files you're not +// interested in. There is an example of this in cmd/fsnotify/file.go. +func (w *Watcher) Add(path string) error { return w.b.Add(path) } + +// AddWith is like [Watcher.Add], but allows adding options. When using Add() +// the defaults described below are used. +// +// Possible options are: +// +// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on +// other platforms. The default is 64K (65536 bytes). +func (w *Watcher) AddWith(path string, opts ...addOpt) error { return w.b.AddWith(path, opts...) } + +// Remove stops monitoring the path for changes. +// +// Directories are always removed non-recursively. For example, if you added +// /tmp/dir and /tmp/dir/subdir then you will need to remove both. +// +// Removing a path that has not yet been added returns [ErrNonExistentWatch]. +// +// Returns nil if [Watcher.Close] was called. +func (w *Watcher) Remove(path string) error { return w.b.Remove(path) } + +// Close removes all watches and closes the Events channel. +func (w *Watcher) Close() error { return w.b.Close() } + +// WatchList returns all paths explicitly added with [Watcher.Add] (and are not +// yet removed). +// +// Returns nil if [Watcher.Close] was called. +func (w *Watcher) WatchList() []string { return w.b.WatchList() } + +// Supports reports if all the listed operations are supported by this platform. +// +// Create, Write, Remove, Rename, and Chmod are always supported. It can only +// return false for an Op starting with Unportable. +func (w *Watcher) xSupports(op Op) bool { return w.b.xSupports(op) } + func (o Op) String() string { var b strings.Builder if o.Has(Create) { @@ -80,6 +357,18 @@ func (o Op) String() string { if o.Has(Write) { b.WriteString("|WRITE") } + if o.Has(xUnportableOpen) { + b.WriteString("|OPEN") + } + if o.Has(xUnportableRead) { + b.WriteString("|READ") + } + if o.Has(xUnportableCloseWrite) { + b.WriteString("|CLOSE_WRITE") + } + if o.Has(xUnportableCloseRead) { + b.WriteString("|CLOSE_READ") + } if o.Has(Rename) { b.WriteString("|RENAME") } @@ -100,24 +389,48 @@ func (e Event) Has(op Op) bool { return e.Op.Has(op) } // String returns a string representation of the event with their path. func (e Event) String() string { + if e.renamedFrom != "" { + return fmt.Sprintf("%-13s %q ← %q", e.Op.String(), e.Name, e.renamedFrom) + } return fmt.Sprintf("%-13s %q", e.Op.String(), e.Name) } type ( + backend interface { + Add(string) error + AddWith(string, ...addOpt) error + Remove(string) error + WatchList() []string + Close() error + xSupports(Op) bool + } addOpt func(opt *withOpts) withOpts struct { - bufsize int + bufsize int + op Op + noFollow bool + sendCreate bool } ) +var debug = func() bool { + // Check for exactly "1" (rather than mere existence) so we can add + // options/flags in the future. I don't know if we ever want that, but it's + // nice to leave the option open. + return os.Getenv("FSNOTIFY_DEBUG") == "1" +}() + var defaultOpts = withOpts{ bufsize: 65536, // 64K + op: Create | Write | Remove | Rename | Chmod, } func getOptions(opts ...addOpt) withOpts { with := defaultOpts for _, o := range opts { - o(&with) + if o != nil { + o(&with) + } } return with } @@ -136,9 +449,44 @@ func WithBufferSize(bytes int) addOpt { return func(opt *withOpts) { opt.bufsize = bytes } } +// WithOps sets which operations to listen for. The default is [Create], +// [Write], [Remove], [Rename], and [Chmod]. +// +// Excluding operations you're not interested in can save quite a bit of CPU +// time; in some use cases there may be hundreds of thousands of useless Write +// or Chmod operations per second. +// +// This can also be used to add unportable operations not supported by all +// platforms; unportable operations all start with "Unportable": +// [UnportableOpen], [UnportableRead], [UnportableCloseWrite], and +// [UnportableCloseRead]. +// +// AddWith returns an error when using an unportable operation that's not +// supported. Use [Watcher.Support] to check for support. +func withOps(op Op) addOpt { + return func(opt *withOpts) { opt.op = op } +} + +// WithNoFollow disables following symlinks, so the symlinks themselves are +// watched. +func withNoFollow() addOpt { + return func(opt *withOpts) { opt.noFollow = true } +} + +// "Internal" option for recursive watches on inotify. +func withCreate() addOpt { + return func(opt *withOpts) { opt.sendCreate = true } +} + +var enableRecurse = false + // Check if this path is recursive (ends with "/..." or "\..."), and return the // path with the /... stripped. func recursivePath(path string) (string, bool) { + path = filepath.Clean(path) + if !enableRecurse { // Only enabled in tests for now. + return path, false + } if filepath.Base(path) == "..." { return filepath.Dir(path), true } diff --git a/vendor/github.com/fsnotify/fsnotify/internal/darwin.go b/vendor/github.com/fsnotify/fsnotify/internal/darwin.go new file mode 100644 index 000000000..b0eab1009 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/darwin.go @@ -0,0 +1,39 @@ +//go:build darwin + +package internal + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +var ( + SyscallEACCES = syscall.EACCES + UnixEACCES = unix.EACCES +) + +var maxfiles uint64 + +// Go 1.19 will do this automatically: https://go-review.googlesource.com/c/go/+/393354/ +func SetRlimit() { + var l syscall.Rlimit + err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &l) + if err == nil && l.Cur != l.Max { + l.Cur = l.Max + syscall.Setrlimit(syscall.RLIMIT_NOFILE, &l) + } + maxfiles = l.Cur + + if n, err := syscall.SysctlUint32("kern.maxfiles"); err == nil && uint64(n) < maxfiles { + maxfiles = uint64(n) + } + + if n, err := syscall.SysctlUint32("kern.maxfilesperproc"); err == nil && uint64(n) < maxfiles { + maxfiles = uint64(n) + } +} + +func Maxfiles() uint64 { return maxfiles } +func Mkfifo(path string, mode uint32) error { return unix.Mkfifo(path, mode) } +func Mknod(path string, mode uint32, dev int) error { return unix.Mknod(path, mode, dev) } diff --git a/vendor/github.com/fsnotify/fsnotify/internal/debug_darwin.go b/vendor/github.com/fsnotify/fsnotify/internal/debug_darwin.go new file mode 100644 index 000000000..928319fb0 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/debug_darwin.go @@ -0,0 +1,57 @@ +package internal + +import "golang.org/x/sys/unix" + +var names = []struct { + n string + m uint32 +}{ + {"NOTE_ABSOLUTE", unix.NOTE_ABSOLUTE}, + {"NOTE_ATTRIB", unix.NOTE_ATTRIB}, + {"NOTE_BACKGROUND", unix.NOTE_BACKGROUND}, + {"NOTE_CHILD", unix.NOTE_CHILD}, + {"NOTE_CRITICAL", unix.NOTE_CRITICAL}, + {"NOTE_DELETE", unix.NOTE_DELETE}, + {"NOTE_EXEC", unix.NOTE_EXEC}, + {"NOTE_EXIT", unix.NOTE_EXIT}, + {"NOTE_EXITSTATUS", unix.NOTE_EXITSTATUS}, + {"NOTE_EXIT_CSERROR", unix.NOTE_EXIT_CSERROR}, + {"NOTE_EXIT_DECRYPTFAIL", unix.NOTE_EXIT_DECRYPTFAIL}, + {"NOTE_EXIT_DETAIL", unix.NOTE_EXIT_DETAIL}, + {"NOTE_EXIT_DETAIL_MASK", unix.NOTE_EXIT_DETAIL_MASK}, + {"NOTE_EXIT_MEMORY", unix.NOTE_EXIT_MEMORY}, + {"NOTE_EXIT_REPARENTED", unix.NOTE_EXIT_REPARENTED}, + {"NOTE_EXTEND", unix.NOTE_EXTEND}, + {"NOTE_FFAND", unix.NOTE_FFAND}, + {"NOTE_FFCOPY", unix.NOTE_FFCOPY}, + {"NOTE_FFCTRLMASK", unix.NOTE_FFCTRLMASK}, + {"NOTE_FFLAGSMASK", unix.NOTE_FFLAGSMASK}, + {"NOTE_FFNOP", unix.NOTE_FFNOP}, + {"NOTE_FFOR", unix.NOTE_FFOR}, + {"NOTE_FORK", unix.NOTE_FORK}, + {"NOTE_FUNLOCK", unix.NOTE_FUNLOCK}, + {"NOTE_LEEWAY", unix.NOTE_LEEWAY}, + {"NOTE_LINK", unix.NOTE_LINK}, + {"NOTE_LOWAT", unix.NOTE_LOWAT}, + {"NOTE_MACHTIME", unix.NOTE_MACHTIME}, + {"NOTE_MACH_CONTINUOUS_TIME", unix.NOTE_MACH_CONTINUOUS_TIME}, + {"NOTE_NONE", unix.NOTE_NONE}, + {"NOTE_NSECONDS", unix.NOTE_NSECONDS}, + {"NOTE_OOB", unix.NOTE_OOB}, + //{"NOTE_PCTRLMASK", unix.NOTE_PCTRLMASK}, -0x100000 (?!) + {"NOTE_PDATAMASK", unix.NOTE_PDATAMASK}, + {"NOTE_REAP", unix.NOTE_REAP}, + {"NOTE_RENAME", unix.NOTE_RENAME}, + {"NOTE_REVOKE", unix.NOTE_REVOKE}, + {"NOTE_SECONDS", unix.NOTE_SECONDS}, + {"NOTE_SIGNAL", unix.NOTE_SIGNAL}, + {"NOTE_TRACK", unix.NOTE_TRACK}, + {"NOTE_TRACKERR", unix.NOTE_TRACKERR}, + {"NOTE_TRIGGER", unix.NOTE_TRIGGER}, + {"NOTE_USECONDS", unix.NOTE_USECONDS}, + {"NOTE_VM_ERROR", unix.NOTE_VM_ERROR}, + {"NOTE_VM_PRESSURE", unix.NOTE_VM_PRESSURE}, + {"NOTE_VM_PRESSURE_SUDDEN_TERMINATE", unix.NOTE_VM_PRESSURE_SUDDEN_TERMINATE}, + {"NOTE_VM_PRESSURE_TERMINATE", unix.NOTE_VM_PRESSURE_TERMINATE}, + {"NOTE_WRITE", unix.NOTE_WRITE}, +} diff --git a/vendor/github.com/fsnotify/fsnotify/internal/debug_dragonfly.go b/vendor/github.com/fsnotify/fsnotify/internal/debug_dragonfly.go new file mode 100644 index 000000000..3186b0c34 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/debug_dragonfly.go @@ -0,0 +1,33 @@ +package internal + +import "golang.org/x/sys/unix" + +var names = []struct { + n string + m uint32 +}{ + {"NOTE_ATTRIB", unix.NOTE_ATTRIB}, + {"NOTE_CHILD", unix.NOTE_CHILD}, + {"NOTE_DELETE", unix.NOTE_DELETE}, + {"NOTE_EXEC", unix.NOTE_EXEC}, + {"NOTE_EXIT", unix.NOTE_EXIT}, + {"NOTE_EXTEND", unix.NOTE_EXTEND}, + {"NOTE_FFAND", unix.NOTE_FFAND}, + {"NOTE_FFCOPY", unix.NOTE_FFCOPY}, + {"NOTE_FFCTRLMASK", unix.NOTE_FFCTRLMASK}, + {"NOTE_FFLAGSMASK", unix.NOTE_FFLAGSMASK}, + {"NOTE_FFNOP", unix.NOTE_FFNOP}, + {"NOTE_FFOR", unix.NOTE_FFOR}, + {"NOTE_FORK", unix.NOTE_FORK}, + {"NOTE_LINK", unix.NOTE_LINK}, + {"NOTE_LOWAT", unix.NOTE_LOWAT}, + {"NOTE_OOB", unix.NOTE_OOB}, + {"NOTE_PCTRLMASK", unix.NOTE_PCTRLMASK}, + {"NOTE_PDATAMASK", unix.NOTE_PDATAMASK}, + {"NOTE_RENAME", unix.NOTE_RENAME}, + {"NOTE_REVOKE", unix.NOTE_REVOKE}, + {"NOTE_TRACK", unix.NOTE_TRACK}, + {"NOTE_TRACKERR", unix.NOTE_TRACKERR}, + {"NOTE_TRIGGER", unix.NOTE_TRIGGER}, + {"NOTE_WRITE", unix.NOTE_WRITE}, +} diff --git a/vendor/github.com/fsnotify/fsnotify/internal/debug_freebsd.go b/vendor/github.com/fsnotify/fsnotify/internal/debug_freebsd.go new file mode 100644 index 000000000..f69fdb930 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/debug_freebsd.go @@ -0,0 +1,42 @@ +package internal + +import "golang.org/x/sys/unix" + +var names = []struct { + n string + m uint32 +}{ + {"NOTE_ABSTIME", unix.NOTE_ABSTIME}, + {"NOTE_ATTRIB", unix.NOTE_ATTRIB}, + {"NOTE_CHILD", unix.NOTE_CHILD}, + {"NOTE_CLOSE", unix.NOTE_CLOSE}, + {"NOTE_CLOSE_WRITE", unix.NOTE_CLOSE_WRITE}, + {"NOTE_DELETE", unix.NOTE_DELETE}, + {"NOTE_EXEC", unix.NOTE_EXEC}, + {"NOTE_EXIT", unix.NOTE_EXIT}, + {"NOTE_EXTEND", unix.NOTE_EXTEND}, + {"NOTE_FFAND", unix.NOTE_FFAND}, + {"NOTE_FFCOPY", unix.NOTE_FFCOPY}, + {"NOTE_FFCTRLMASK", unix.NOTE_FFCTRLMASK}, + {"NOTE_FFLAGSMASK", unix.NOTE_FFLAGSMASK}, + {"NOTE_FFNOP", unix.NOTE_FFNOP}, + {"NOTE_FFOR", unix.NOTE_FFOR}, + {"NOTE_FILE_POLL", unix.NOTE_FILE_POLL}, + {"NOTE_FORK", unix.NOTE_FORK}, + {"NOTE_LINK", unix.NOTE_LINK}, + {"NOTE_LOWAT", unix.NOTE_LOWAT}, + {"NOTE_MSECONDS", unix.NOTE_MSECONDS}, + {"NOTE_NSECONDS", unix.NOTE_NSECONDS}, + {"NOTE_OPEN", unix.NOTE_OPEN}, + {"NOTE_PCTRLMASK", unix.NOTE_PCTRLMASK}, + {"NOTE_PDATAMASK", unix.NOTE_PDATAMASK}, + {"NOTE_READ", unix.NOTE_READ}, + {"NOTE_RENAME", unix.NOTE_RENAME}, + {"NOTE_REVOKE", unix.NOTE_REVOKE}, + {"NOTE_SECONDS", unix.NOTE_SECONDS}, + {"NOTE_TRACK", unix.NOTE_TRACK}, + {"NOTE_TRACKERR", unix.NOTE_TRACKERR}, + {"NOTE_TRIGGER", unix.NOTE_TRIGGER}, + {"NOTE_USECONDS", unix.NOTE_USECONDS}, + {"NOTE_WRITE", unix.NOTE_WRITE}, +} diff --git a/vendor/github.com/fsnotify/fsnotify/internal/debug_kqueue.go b/vendor/github.com/fsnotify/fsnotify/internal/debug_kqueue.go new file mode 100644 index 000000000..607e683bd --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/debug_kqueue.go @@ -0,0 +1,32 @@ +//go:build freebsd || openbsd || netbsd || dragonfly || darwin + +package internal + +import ( + "fmt" + "os" + "strings" + "time" + + "golang.org/x/sys/unix" +) + +func Debug(name string, kevent *unix.Kevent_t) { + mask := uint32(kevent.Fflags) + + var ( + l []string + unknown = mask + ) + for _, n := range names { + if mask&n.m == n.m { + l = append(l, n.n) + unknown ^= n.m + } + } + if unknown > 0 { + l = append(l, fmt.Sprintf("0x%x", unknown)) + } + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s %10d:%-60s → %q\n", + time.Now().Format("15:04:05.000000000"), mask, strings.Join(l, " | "), name) +} diff --git a/vendor/github.com/fsnotify/fsnotify/internal/debug_linux.go b/vendor/github.com/fsnotify/fsnotify/internal/debug_linux.go new file mode 100644 index 000000000..35c734be4 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/debug_linux.go @@ -0,0 +1,56 @@ +package internal + +import ( + "fmt" + "os" + "strings" + "time" + + "golang.org/x/sys/unix" +) + +func Debug(name string, mask, cookie uint32) { + names := []struct { + n string + m uint32 + }{ + {"IN_ACCESS", unix.IN_ACCESS}, + {"IN_ATTRIB", unix.IN_ATTRIB}, + {"IN_CLOSE", unix.IN_CLOSE}, + {"IN_CLOSE_NOWRITE", unix.IN_CLOSE_NOWRITE}, + {"IN_CLOSE_WRITE", unix.IN_CLOSE_WRITE}, + {"IN_CREATE", unix.IN_CREATE}, + {"IN_DELETE", unix.IN_DELETE}, + {"IN_DELETE_SELF", unix.IN_DELETE_SELF}, + {"IN_IGNORED", unix.IN_IGNORED}, + {"IN_ISDIR", unix.IN_ISDIR}, + {"IN_MODIFY", unix.IN_MODIFY}, + {"IN_MOVE", unix.IN_MOVE}, + {"IN_MOVED_FROM", unix.IN_MOVED_FROM}, + {"IN_MOVED_TO", unix.IN_MOVED_TO}, + {"IN_MOVE_SELF", unix.IN_MOVE_SELF}, + {"IN_OPEN", unix.IN_OPEN}, + {"IN_Q_OVERFLOW", unix.IN_Q_OVERFLOW}, + {"IN_UNMOUNT", unix.IN_UNMOUNT}, + } + + var ( + l []string + unknown = mask + ) + for _, n := range names { + if mask&n.m == n.m { + l = append(l, n.n) + unknown ^= n.m + } + } + if unknown > 0 { + l = append(l, fmt.Sprintf("0x%x", unknown)) + } + var c string + if cookie > 0 { + c = fmt.Sprintf("(cookie: %d) ", cookie) + } + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s %-30s → %s%q\n", + time.Now().Format("15:04:05.000000000"), strings.Join(l, "|"), c, name) +} diff --git a/vendor/github.com/fsnotify/fsnotify/internal/debug_netbsd.go b/vendor/github.com/fsnotify/fsnotify/internal/debug_netbsd.go new file mode 100644 index 000000000..e5b3b6f69 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/debug_netbsd.go @@ -0,0 +1,25 @@ +package internal + +import "golang.org/x/sys/unix" + +var names = []struct { + n string + m uint32 +}{ + {"NOTE_ATTRIB", unix.NOTE_ATTRIB}, + {"NOTE_CHILD", unix.NOTE_CHILD}, + {"NOTE_DELETE", unix.NOTE_DELETE}, + {"NOTE_EXEC", unix.NOTE_EXEC}, + {"NOTE_EXIT", unix.NOTE_EXIT}, + {"NOTE_EXTEND", unix.NOTE_EXTEND}, + {"NOTE_FORK", unix.NOTE_FORK}, + {"NOTE_LINK", unix.NOTE_LINK}, + {"NOTE_LOWAT", unix.NOTE_LOWAT}, + {"NOTE_PCTRLMASK", unix.NOTE_PCTRLMASK}, + {"NOTE_PDATAMASK", unix.NOTE_PDATAMASK}, + {"NOTE_RENAME", unix.NOTE_RENAME}, + {"NOTE_REVOKE", unix.NOTE_REVOKE}, + {"NOTE_TRACK", unix.NOTE_TRACK}, + {"NOTE_TRACKERR", unix.NOTE_TRACKERR}, + {"NOTE_WRITE", unix.NOTE_WRITE}, +} diff --git a/vendor/github.com/fsnotify/fsnotify/internal/debug_openbsd.go b/vendor/github.com/fsnotify/fsnotify/internal/debug_openbsd.go new file mode 100644 index 000000000..1dd455bc5 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/debug_openbsd.go @@ -0,0 +1,28 @@ +package internal + +import "golang.org/x/sys/unix" + +var names = []struct { + n string + m uint32 +}{ + {"NOTE_ATTRIB", unix.NOTE_ATTRIB}, + // {"NOTE_CHANGE", unix.NOTE_CHANGE}, // Not on 386? + {"NOTE_CHILD", unix.NOTE_CHILD}, + {"NOTE_DELETE", unix.NOTE_DELETE}, + {"NOTE_EOF", unix.NOTE_EOF}, + {"NOTE_EXEC", unix.NOTE_EXEC}, + {"NOTE_EXIT", unix.NOTE_EXIT}, + {"NOTE_EXTEND", unix.NOTE_EXTEND}, + {"NOTE_FORK", unix.NOTE_FORK}, + {"NOTE_LINK", unix.NOTE_LINK}, + {"NOTE_LOWAT", unix.NOTE_LOWAT}, + {"NOTE_PCTRLMASK", unix.NOTE_PCTRLMASK}, + {"NOTE_PDATAMASK", unix.NOTE_PDATAMASK}, + {"NOTE_RENAME", unix.NOTE_RENAME}, + {"NOTE_REVOKE", unix.NOTE_REVOKE}, + {"NOTE_TRACK", unix.NOTE_TRACK}, + {"NOTE_TRACKERR", unix.NOTE_TRACKERR}, + {"NOTE_TRUNCATE", unix.NOTE_TRUNCATE}, + {"NOTE_WRITE", unix.NOTE_WRITE}, +} diff --git a/vendor/github.com/fsnotify/fsnotify/internal/debug_solaris.go b/vendor/github.com/fsnotify/fsnotify/internal/debug_solaris.go new file mode 100644 index 000000000..f1b2e73bd --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/debug_solaris.go @@ -0,0 +1,45 @@ +package internal + +import ( + "fmt" + "os" + "strings" + "time" + + "golang.org/x/sys/unix" +) + +func Debug(name string, mask int32) { + names := []struct { + n string + m int32 + }{ + {"FILE_ACCESS", unix.FILE_ACCESS}, + {"FILE_MODIFIED", unix.FILE_MODIFIED}, + {"FILE_ATTRIB", unix.FILE_ATTRIB}, + {"FILE_TRUNC", unix.FILE_TRUNC}, + {"FILE_NOFOLLOW", unix.FILE_NOFOLLOW}, + {"FILE_DELETE", unix.FILE_DELETE}, + {"FILE_RENAME_TO", unix.FILE_RENAME_TO}, + {"FILE_RENAME_FROM", unix.FILE_RENAME_FROM}, + {"UNMOUNTED", unix.UNMOUNTED}, + {"MOUNTEDOVER", unix.MOUNTEDOVER}, + {"FILE_EXCEPTION", unix.FILE_EXCEPTION}, + } + + var ( + l []string + unknown = mask + ) + for _, n := range names { + if mask&n.m == n.m { + l = append(l, n.n) + unknown ^= n.m + } + } + if unknown > 0 { + l = append(l, fmt.Sprintf("0x%x", unknown)) + } + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s %10d:%-30s → %q\n", + time.Now().Format("15:04:05.000000000"), mask, strings.Join(l, " | "), name) +} diff --git a/vendor/github.com/fsnotify/fsnotify/internal/debug_windows.go b/vendor/github.com/fsnotify/fsnotify/internal/debug_windows.go new file mode 100644 index 000000000..52bf4ce53 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/debug_windows.go @@ -0,0 +1,40 @@ +package internal + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "golang.org/x/sys/windows" +) + +func Debug(name string, mask uint32) { + names := []struct { + n string + m uint32 + }{ + {"FILE_ACTION_ADDED", windows.FILE_ACTION_ADDED}, + {"FILE_ACTION_REMOVED", windows.FILE_ACTION_REMOVED}, + {"FILE_ACTION_MODIFIED", windows.FILE_ACTION_MODIFIED}, + {"FILE_ACTION_RENAMED_OLD_NAME", windows.FILE_ACTION_RENAMED_OLD_NAME}, + {"FILE_ACTION_RENAMED_NEW_NAME", windows.FILE_ACTION_RENAMED_NEW_NAME}, + } + + var ( + l []string + unknown = mask + ) + for _, n := range names { + if mask&n.m == n.m { + l = append(l, n.n) + unknown ^= n.m + } + } + if unknown > 0 { + l = append(l, fmt.Sprintf("0x%x", unknown)) + } + fmt.Fprintf(os.Stderr, "FSNOTIFY_DEBUG: %s %-65s → %q\n", + time.Now().Format("15:04:05.000000000"), strings.Join(l, " | "), filepath.ToSlash(name)) +} diff --git a/vendor/github.com/fsnotify/fsnotify/internal/freebsd.go b/vendor/github.com/fsnotify/fsnotify/internal/freebsd.go new file mode 100644 index 000000000..547df1df8 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/freebsd.go @@ -0,0 +1,31 @@ +//go:build freebsd + +package internal + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +var ( + SyscallEACCES = syscall.EACCES + UnixEACCES = unix.EACCES +) + +var maxfiles uint64 + +func SetRlimit() { + // Go 1.19 will do this automatically: https://go-review.googlesource.com/c/go/+/393354/ + var l syscall.Rlimit + err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &l) + if err == nil && l.Cur != l.Max { + l.Cur = l.Max + syscall.Setrlimit(syscall.RLIMIT_NOFILE, &l) + } + maxfiles = uint64(l.Cur) +} + +func Maxfiles() uint64 { return maxfiles } +func Mkfifo(path string, mode uint32) error { return unix.Mkfifo(path, mode) } +func Mknod(path string, mode uint32, dev int) error { return unix.Mknod(path, mode, uint64(dev)) } diff --git a/vendor/github.com/fsnotify/fsnotify/internal/internal.go b/vendor/github.com/fsnotify/fsnotify/internal/internal.go new file mode 100644 index 000000000..7daa45e19 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/internal.go @@ -0,0 +1,2 @@ +// Package internal contains some helpers. +package internal diff --git a/vendor/github.com/fsnotify/fsnotify/internal/unix.go b/vendor/github.com/fsnotify/fsnotify/internal/unix.go new file mode 100644 index 000000000..30976ce97 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/unix.go @@ -0,0 +1,31 @@ +//go:build !windows && !darwin && !freebsd + +package internal + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +var ( + SyscallEACCES = syscall.EACCES + UnixEACCES = unix.EACCES +) + +var maxfiles uint64 + +func SetRlimit() { + // Go 1.19 will do this automatically: https://go-review.googlesource.com/c/go/+/393354/ + var l syscall.Rlimit + err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &l) + if err == nil && l.Cur != l.Max { + l.Cur = l.Max + syscall.Setrlimit(syscall.RLIMIT_NOFILE, &l) + } + maxfiles = uint64(l.Cur) +} + +func Maxfiles() uint64 { return maxfiles } +func Mkfifo(path string, mode uint32) error { return unix.Mkfifo(path, mode) } +func Mknod(path string, mode uint32, dev int) error { return unix.Mknod(path, mode, dev) } diff --git a/vendor/github.com/fsnotify/fsnotify/internal/unix2.go b/vendor/github.com/fsnotify/fsnotify/internal/unix2.go new file mode 100644 index 000000000..37dfeddc2 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/unix2.go @@ -0,0 +1,7 @@ +//go:build !windows + +package internal + +func HasPrivilegesForSymlink() bool { + return true +} diff --git a/vendor/github.com/fsnotify/fsnotify/internal/windows.go b/vendor/github.com/fsnotify/fsnotify/internal/windows.go new file mode 100644 index 000000000..a72c64954 --- /dev/null +++ b/vendor/github.com/fsnotify/fsnotify/internal/windows.go @@ -0,0 +1,41 @@ +//go:build windows + +package internal + +import ( + "errors" + + "golang.org/x/sys/windows" +) + +// Just a dummy. +var ( + SyscallEACCES = errors.New("dummy") + UnixEACCES = errors.New("dummy") +) + +func SetRlimit() {} +func Maxfiles() uint64 { return 1<<64 - 1 } +func Mkfifo(path string, mode uint32) error { return errors.New("no FIFOs on Windows") } +func Mknod(path string, mode uint32, dev int) error { return errors.New("no device nodes on Windows") } + +func HasPrivilegesForSymlink() bool { + var sid *windows.SID + err := windows.AllocateAndInitializeSid( + &windows.SECURITY_NT_AUTHORITY, + 2, + windows.SECURITY_BUILTIN_DOMAIN_RID, + windows.DOMAIN_ALIAS_RID_ADMINS, + 0, 0, 0, 0, 0, 0, + &sid) + if err != nil { + return false + } + defer windows.FreeSid(sid) + token := windows.Token(0) + member, err := token.IsMember(sid) + if err != nil { + return false + } + return member || token.IsElevated() +} diff --git a/vendor/github.com/fsnotify/fsnotify/mkdoc.zsh b/vendor/github.com/fsnotify/fsnotify/mkdoc.zsh deleted file mode 100644 index 99012ae65..000000000 --- a/vendor/github.com/fsnotify/fsnotify/mkdoc.zsh +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/env zsh -[ "${ZSH_VERSION:-}" = "" ] && echo >&2 "Only works with zsh" && exit 1 -setopt err_exit no_unset pipefail extended_glob - -# Simple script to update the godoc comments on all watchers so you don't need -# to update the same comment 5 times. - -watcher=$(</tmp/x - print -r -- $cmt >>/tmp/x - tail -n+$(( end + 1 )) $file >>/tmp/x - mv /tmp/x $file - done -} - -set-cmt '^type Watcher struct ' $watcher -set-cmt '^func NewWatcher(' $new -set-cmt '^func NewBufferedWatcher(' $newbuffered -set-cmt '^func (w \*Watcher) Add(' $add -set-cmt '^func (w \*Watcher) AddWith(' $addwith -set-cmt '^func (w \*Watcher) Remove(' $remove -set-cmt '^func (w \*Watcher) Close(' $close -set-cmt '^func (w \*Watcher) WatchList(' $watchlist -set-cmt '^[[:space:]]*Events *chan Event$' $events -set-cmt '^[[:space:]]*Errors *chan error$' $errors diff --git a/vendor/github.com/fsnotify/fsnotify/system_bsd.go b/vendor/github.com/fsnotify/fsnotify/system_bsd.go index 4322b0b88..f65e8fe3e 100644 --- a/vendor/github.com/fsnotify/fsnotify/system_bsd.go +++ b/vendor/github.com/fsnotify/fsnotify/system_bsd.go @@ -1,5 +1,4 @@ //go:build freebsd || openbsd || netbsd || dragonfly -// +build freebsd openbsd netbsd dragonfly package fsnotify diff --git a/vendor/github.com/fsnotify/fsnotify/system_darwin.go b/vendor/github.com/fsnotify/fsnotify/system_darwin.go index 5da5ffa78..a29fc7aab 100644 --- a/vendor/github.com/fsnotify/fsnotify/system_darwin.go +++ b/vendor/github.com/fsnotify/fsnotify/system_darwin.go @@ -1,5 +1,4 @@ //go:build darwin -// +build darwin package fsnotify diff --git a/vendor/github.com/vishvananda/netlink/.gitignore b/vendor/github.com/vishvananda/netlink/.gitignore index 9f11b755a..66f8fb502 100644 --- a/vendor/github.com/vishvananda/netlink/.gitignore +++ b/vendor/github.com/vishvananda/netlink/.gitignore @@ -1 +1,2 @@ .idea/ +.vscode/ diff --git a/vendor/github.com/vishvananda/netlink/.travis.yml b/vendor/github.com/vishvananda/netlink/.travis.yml deleted file mode 100644 index 7d14af4d6..000000000 --- a/vendor/github.com/vishvananda/netlink/.travis.yml +++ /dev/null @@ -1,19 +0,0 @@ -language: go -go: - - "1.10.x" - - "1.11.x" - - "1.12.x" -before_script: - # make sure we keep path in tact when we sudo - - sudo sed -i -e 's/^Defaults\tsecure_path.*$//' /etc/sudoers - # modprobe ip_gre or else the first gre device can't be deleted - - sudo modprobe ip_gre - # modprobe nf_conntrack for the conntrack testing - - sudo modprobe nf_conntrack - - sudo modprobe nf_conntrack_netlink - - sudo modprobe nf_conntrack_ipv4 - - sudo modprobe nf_conntrack_ipv6 - - sudo modprobe sch_hfsc -install: - - go get github.com/vishvananda/netns -go_import_path: github.com/vishvananda/netlink diff --git a/vendor/github.com/vishvananda/netlink/README.md b/vendor/github.com/vishvananda/netlink/README.md index a88e2f418..0128bc67d 100644 --- a/vendor/github.com/vishvananda/netlink/README.md +++ b/vendor/github.com/vishvananda/netlink/README.md @@ -1,6 +1,6 @@ # netlink - netlink library for go # -[![Build Status](https://travis-ci.org/vishvananda/netlink.png?branch=master)](https://travis-ci.org/vishvananda/netlink) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink) +![Build Status](https://github.com/vishvananda/netlink/actions/workflows/main.yml/badge.svg) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink) The netlink package provides a simple netlink library for go. Netlink is the interface a user-space program in linux uses to communicate with diff --git a/vendor/github.com/vishvananda/netlink/addr.go b/vendor/github.com/vishvananda/netlink/addr.go index f08c95696..653f540db 100644 --- a/vendor/github.com/vishvananda/netlink/addr.go +++ b/vendor/github.com/vishvananda/netlink/addr.go @@ -17,6 +17,7 @@ type Addr struct { Broadcast net.IP PreferedLft int ValidLft int + LinkIndex int } // String returns $ip/$netmask $label diff --git a/vendor/github.com/vishvananda/netlink/addr_linux.go b/vendor/github.com/vishvananda/netlink/addr_linux.go index 28746d5af..218ab2379 100644 --- a/vendor/github.com/vishvananda/netlink/addr_linux.go +++ b/vendor/github.com/vishvananda/netlink/addr_linux.go @@ -11,9 +11,6 @@ import ( "golang.org/x/sys/unix" ) -// IFA_FLAGS is a u32 attribute. -const IFA_FLAGS = 0x8 - // AddrAdd will add an IP address to a link device. // // Equivalent to: `ip addr add $addr dev $link` @@ -77,17 +74,19 @@ func (h *Handle) AddrDel(link Link, addr *Addr) error { } func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error { - base := link.Attrs() - if addr.Label != "" && !strings.HasPrefix(addr.Label, base.Name) { - return fmt.Errorf("label must begin with interface name") - } - h.ensureIndex(base) - family := nl.GetIPFamily(addr.IP) - msg := nl.NewIfAddrmsg(family) - msg.Index = uint32(base.Index) msg.Scope = uint8(addr.Scope) + if link == nil { + msg.Index = uint32(addr.LinkIndex) + } else { + base := link.Attrs() + if addr.Label != "" && !strings.HasPrefix(addr.Label, base.Name) { + return fmt.Errorf("label must begin with interface name") + } + h.ensureIndex(base) + msg.Index = uint32(base.Index) + } mask := addr.Mask if addr.Peer != nil { mask = addr.Peer.Mask @@ -125,7 +124,7 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error } else { b := make([]byte, 4) native.PutUint32(b, uint32(addr.Flags)) - flagsData := nl.NewRtAttr(IFA_FLAGS, b) + flagsData := nl.NewRtAttr(unix.IFA_FLAGS, b) req.AddData(flagsData) } } @@ -156,10 +155,10 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error // value should be "forever". To compensate for that, only add the attributes if at least one of the values is // non-zero, which means the caller has explicitly set them if addr.ValidLft > 0 || addr.PreferedLft > 0 { - cachedata := nl.IfaCacheInfo{ - IfaValid: uint32(addr.ValidLft), - IfaPrefered: uint32(addr.PreferedLft), - } + cachedata := nl.IfaCacheInfo{unix.IfaCacheinfo{ + Valid: uint32(addr.ValidLft), + Prefered: uint32(addr.PreferedLft), + }} req.AddData(nl.NewRtAttr(unix.IFA_CACHEINFO, cachedata.Serialize())) } @@ -179,7 +178,7 @@ func AddrList(link Link, family int) ([]Addr, error) { // The list can be filtered by link and ip family. func (h *Handle) AddrList(link Link, family int) ([]Addr, error) { req := h.newNetlinkRequest(unix.RTM_GETADDR, unix.NLM_F_DUMP) - msg := nl.NewIfInfomsg(family) + msg := nl.NewIfAddrmsg(family) req.AddData(msg) msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWADDR) @@ -196,12 +195,12 @@ func (h *Handle) AddrList(link Link, family int) ([]Addr, error) { var res []Addr for _, m := range msgs { - addr, msgFamily, ifindex, err := parseAddr(m) + addr, msgFamily, err := parseAddr(m) if err != nil { return res, err } - if link != nil && ifindex != indexFilter { + if link != nil && addr.LinkIndex != indexFilter { // Ignore messages from other interfaces continue } @@ -216,11 +215,11 @@ func (h *Handle) AddrList(link Link, family int) ([]Addr, error) { return res, nil } -func parseAddr(m []byte) (addr Addr, family, index int, err error) { +func parseAddr(m []byte) (addr Addr, family int, err error) { msg := nl.DeserializeIfAddrmsg(m) family = -1 - index = -1 + addr.LinkIndex = -1 attrs, err1 := nl.ParseRouteAttr(m[msg.Len():]) if err1 != nil { @@ -229,7 +228,7 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) { } family = int(msg.Family) - index = int(msg.Index) + addr.LinkIndex = int(msg.Index) var local, dst *net.IPNet for _, attr := range attrs { @@ -254,12 +253,12 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) { addr.Broadcast = attr.Value case unix.IFA_LABEL: addr.Label = string(attr.Value[:len(attr.Value)-1]) - case IFA_FLAGS: + case unix.IFA_FLAGS: addr.Flags = int(native.Uint32(attr.Value[0:4])) - case nl.IFA_CACHEINFO: + case unix.IFA_CACHEINFO: ci := nl.DeserializeIfaCacheInfo(attr.Value) - addr.PreferedLft = int(ci.IfaPrefered) - addr.ValidLft = int(ci.IfaValid) + addr.PreferedLft = int(ci.Prefered) + addr.ValidLft = int(ci.Valid) } } @@ -271,7 +270,7 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) { // But obviously, as there are IPv6 PtP addresses, too, // IFA_LOCAL should also be handled for IPv6. if local != nil { - if family == FAMILY_V4 && local.IP.Equal(dst.IP) { + if family == FAMILY_V4 && dst != nil && local.IP.Equal(dst.IP) { addr.IPNet = dst } else { addr.IPNet = local @@ -299,22 +298,24 @@ type AddrUpdate struct { // AddrSubscribe takes a chan down which notifications will be sent // when addresses change. Close the 'done' chan to stop subscription. func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error { - return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0) + return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false) } // AddrSubscribeAt works like AddrSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error { - return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0) + return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false) } // AddrSubscribeOptions contains a set of options to use with // AddrSubscribeWithOptions. type AddrSubscribeOptions struct { - Namespace *netns.NsHandle - ErrorCallback func(error) - ListExisting bool - ReceiveBufferSize int + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool + ReceiveBufferSize int + ReceiveBufferForceSize bool + ReceiveTimeout *unix.Timeval } // AddrSubscribeWithOptions work like AddrSubscribe but enable to @@ -325,26 +326,33 @@ func AddrSubscribeWithOptions(ch chan<- AddrUpdate, done <-chan struct{}, option none := netns.None() options.Namespace = &none } - return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, options.ReceiveBufferSize) + return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, + options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize) } -func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, rcvbuf int) error { +func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, + rcvbuf int, rcvTimeout *unix.Timeval, rcvBufForce bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_IFADDR, unix.RTNLGRP_IPV6_IFADDR) if err != nil { return err } - if done != nil { - go func() { - <-done - s.Close() - }() + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } } if rcvbuf != 0 { - err = pkgHandle.SetSocketReceiveBufferSize(rcvbuf, false) + err = s.SetReceiveBufferSize(rcvbuf, rcvBufForce) if err != nil { return err } } + if done != nil { + go func() { + <-done + s.Close() + }() + } if listExisting { req := pkgHandle.newNetlinkRequest(unix.RTM_GETADDR, unix.NLM_F_DUMP) @@ -360,7 +368,8 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c msgs, from, err := s.Receive() if err != nil { if cberr != nil { - cberr(err) + cberr(fmt.Errorf("Receive failed: %v", + err)) } return } @@ -375,7 +384,6 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c continue } if m.Header.Type == unix.NLMSG_ERROR { - native := nl.NativeEndian() error := int32(native.Uint32(m.Data[0:4])) if error == 0 { continue @@ -394,7 +402,7 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c continue } - addr, _, ifindex, err := parseAddr(m.Data) + addr, _, err := parseAddr(m.Data) if err != nil { if cberr != nil { cberr(fmt.Errorf("could not parse address: %v", err)) @@ -403,7 +411,7 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c } ch <- AddrUpdate{LinkAddress: *addr.IPNet, - LinkIndex: ifindex, + LinkIndex: addr.LinkIndex, NewAddr: msgType == unix.RTM_NEWADDR, Flags: addr.Flags, Scope: addr.Scope, diff --git a/vendor/github.com/vishvananda/netlink/bpf_linux.go b/vendor/github.com/vishvananda/netlink/bpf_linux.go index 6631626bf..96befbfe0 100644 --- a/vendor/github.com/vishvananda/netlink/bpf_linux.go +++ b/vendor/github.com/vishvananda/netlink/bpf_linux.go @@ -16,6 +16,30 @@ const ( BPF_PROG_TYPE_SCHED_ACT BPF_PROG_TYPE_TRACEPOINT BPF_PROG_TYPE_XDP + BPF_PROG_TYPE_PERF_EVENT + BPF_PROG_TYPE_CGROUP_SKB + BPF_PROG_TYPE_CGROUP_SOCK + BPF_PROG_TYPE_LWT_IN + BPF_PROG_TYPE_LWT_OUT + BPF_PROG_TYPE_LWT_XMIT + BPF_PROG_TYPE_SOCK_OPS + BPF_PROG_TYPE_SK_SKB + BPF_PROG_TYPE_CGROUP_DEVICE + BPF_PROG_TYPE_SK_MSG + BPF_PROG_TYPE_RAW_TRACEPOINT + BPF_PROG_TYPE_CGROUP_SOCK_ADDR + BPF_PROG_TYPE_LWT_SEG6LOCAL + BPF_PROG_TYPE_LIRC_MODE2 + BPF_PROG_TYPE_SK_REUSEPORT + BPF_PROG_TYPE_FLOW_DISSECTOR + BPF_PROG_TYPE_CGROUP_SYSCTL + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE + BPF_PROG_TYPE_CGROUP_SOCKOPT + BPF_PROG_TYPE_TRACING + BPF_PROG_TYPE_STRUCT_OPS + BPF_PROG_TYPE_EXT + BPF_PROG_TYPE_LSM + BPF_PROG_TYPE_SK_LOOKUP ) type BPFAttr struct { diff --git a/vendor/github.com/vishvananda/netlink/bridge_linux.go b/vendor/github.com/vishvananda/netlink/bridge_linux.go index 6e1224c47..6c340b0ce 100644 --- a/vendor/github.com/vishvananda/netlink/bridge_linux.go +++ b/vendor/github.com/vishvananda/netlink/bridge_linux.go @@ -63,7 +63,19 @@ func BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) err // BridgeVlanAdd adds a new vlan filter entry // Equivalent to: `bridge vlan add dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` func (h *Handle) BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) error { - return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, pvid, untagged, self, master) + return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, 0, pvid, untagged, self, master) +} + +// BridgeVlanAddRange adds a new vlan filter entry +// Equivalent to: `bridge vlan add dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]` +func BridgeVlanAddRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { + return pkgHandle.BridgeVlanAddRange(link, vid, vidEnd, pvid, untagged, self, master) +} + +// BridgeVlanAddRange adds a new vlan filter entry +// Equivalent to: `bridge vlan add dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]` +func (h *Handle) BridgeVlanAddRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { + return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, vidEnd, pvid, untagged, self, master) } // BridgeVlanDel adds a new vlan filter entry @@ -75,10 +87,22 @@ func BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) err // BridgeVlanDel adds a new vlan filter entry // Equivalent to: `bridge vlan del dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` func (h *Handle) BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) error { - return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, pvid, untagged, self, master) + return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, 0, pvid, untagged, self, master) } -func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged, self, master bool) error { +// BridgeVlanDelRange adds a new vlan filter entry +// Equivalent to: `bridge vlan del dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]` +func BridgeVlanDelRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { + return pkgHandle.BridgeVlanDelRange(link, vid, vidEnd, pvid, untagged, self, master) +} + +// BridgeVlanDelRange adds a new vlan filter entry +// Equivalent to: `bridge vlan del dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]` +func (h *Handle) BridgeVlanDelRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { + return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, vidEnd, pvid, untagged, self, master) +} + +func (h *Handle) bridgeVlanModify(cmd int, link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { base := link.Attrs() h.ensureIndex(base) req := h.newNetlinkRequest(cmd, unix.NLM_F_ACK) @@ -105,7 +129,20 @@ func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged if untagged { vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_UNTAGGED } - br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) + + if vidEnd != 0 { + vlanEndInfo := &nl.BridgeVlanInfo{Vid: vidEnd} + vlanEndInfo.Flags = vlanInfo.Flags + + vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_RANGE_BEGIN + br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) + + vlanEndInfo.Flags |= nl.BRIDGE_VLAN_INFO_RANGE_END + br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanEndInfo.Serialize()) + } else { + br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) + } + req.AddData(br) _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err diff --git a/vendor/github.com/vishvananda/netlink/chain.go b/vendor/github.com/vishvananda/netlink/chain.go new file mode 100644 index 000000000..1d1c144e9 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/chain.go @@ -0,0 +1,22 @@ +package netlink + +import ( + "fmt" +) + +// Chain contains the attributes of a Chain +type Chain struct { + Parent uint32 + Chain uint32 +} + +func (c Chain) String() string { + return fmt.Sprintf("{Parent: %d, Chain: %d}", c.Parent, c.Chain) +} + +func NewChain(parent uint32, chain uint32) Chain { + return Chain{ + Parent: parent, + Chain: chain, + } +} diff --git a/vendor/github.com/vishvananda/netlink/chain_linux.go b/vendor/github.com/vishvananda/netlink/chain_linux.go new file mode 100644 index 000000000..d9f441613 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/chain_linux.go @@ -0,0 +1,112 @@ +package netlink + +import ( + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// ChainDel will delete a chain from the system. +func ChainDel(link Link, chain Chain) error { + // Equivalent to: `tc chain del $chain` + return pkgHandle.ChainDel(link, chain) +} + +// ChainDel will delete a chain from the system. +// Equivalent to: `tc chain del $chain` +func (h *Handle) ChainDel(link Link, chain Chain) error { + return h.chainModify(unix.RTM_DELCHAIN, 0, link, chain) +} + +// ChainAdd will add a chain to the system. +// Equivalent to: `tc chain add` +func ChainAdd(link Link, chain Chain) error { + return pkgHandle.ChainAdd(link, chain) +} + +// ChainAdd will add a chain to the system. +// Equivalent to: `tc chain add` +func (h *Handle) ChainAdd(link Link, chain Chain) error { + return h.chainModify( + unix.RTM_NEWCHAIN, + unix.NLM_F_CREATE|unix.NLM_F_EXCL, + link, + chain) +} + +func (h *Handle) chainModify(cmd, flags int, link Link, chain Chain) error { + req := h.newNetlinkRequest(cmd, flags|unix.NLM_F_ACK) + index := int32(0) + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + Parent: chain.Parent, + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_CHAIN, nl.Uint32Attr(chain.Chain))) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// ChainList gets a list of chains in the system. +// Equivalent to: `tc chain list`. +// The list can be filtered by link. +func ChainList(link Link, parent uint32) ([]Chain, error) { + return pkgHandle.ChainList(link, parent) +} + +// ChainList gets a list of chains in the system. +// Equivalent to: `tc chain list`. +// The list can be filtered by link. +func (h *Handle) ChainList(link Link, parent uint32) ([]Chain, error) { + req := h.newNetlinkRequest(unix.RTM_GETCHAIN, unix.NLM_F_DUMP) + index := int32(0) + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + Parent: parent, + } + req.AddData(msg) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWCHAIN) + if err != nil { + return nil, err + } + + var res []Chain + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + // skip chains from other interfaces + if link != nil && msg.Ifindex != index { + continue + } + + var chain Chain + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_CHAIN: + chain.Chain = native.Uint32(attr.Value) + chain.Parent = parent + } + } + res = append(res, chain) + } + + return res, nil +} diff --git a/vendor/github.com/vishvananda/netlink/class.go b/vendor/github.com/vishvananda/netlink/class.go index dcc22d9e9..e686f6745 100644 --- a/vendor/github.com/vishvananda/netlink/class.go +++ b/vendor/github.com/vishvananda/netlink/class.go @@ -47,6 +47,7 @@ type ClassStatistics struct { Basic *GnetStatsBasic Queue *GnetStatsQueue RateEst *GnetStatsRateEst + BasicHw *GnetStatsBasic // Hardward statistics added in kernel 4.20 } // NewClassStatistics Construct a ClassStatistics struct which fields are all initialized by 0. @@ -55,6 +56,7 @@ func NewClassStatistics() *ClassStatistics { Basic: &GnetStatsBasic{}, Queue: &GnetStatsQueue{}, RateEst: &GnetStatsRateEst{}, + BasicHw: &GnetStatsBasic{}, } } @@ -132,7 +134,10 @@ func (class *GenericClass) Type() string { return class.ClassType } -// ServiceCurve is the way the HFSC curve are represented +// ServiceCurve is a nondecreasing function of some time unit, returning the amount of service +// (an allowed or allocated amount of bandwidth) at some specific point in time. The purpose of it +// should be subconsciously obvious: if a class was allowed to transfer not less than the amount +// specified by its service curve, then the service curve is not violated. type ServiceCurve struct { m1 uint32 d uint32 @@ -144,6 +149,21 @@ func (c *ServiceCurve) Attrs() (uint32, uint32, uint32) { return c.m1, c.d, c.m2 } +// Burst returns the burst rate (m1) of the curve +func (c *ServiceCurve) Burst() uint32 { + return c.m1 +} + +// Delay return the delay (d) of the curve +func (c *ServiceCurve) Delay() uint32 { + return c.d +} + +// Rate returns the rate (m2) of the curve +func (c *ServiceCurve) Rate() uint32 { + return c.m2 +} + // HfscClass is a representation of the HFSC class type HfscClass struct { ClassAttrs @@ -152,35 +172,44 @@ type HfscClass struct { Usc ServiceCurve } -// SetUsc sets the Usc curve +// SetUsc sets the USC curve. The bandwidth (m1 and m2) is specified in bits and the delay in +// seconds. func (hfsc *HfscClass) SetUsc(m1 uint32, d uint32, m2 uint32) { - hfsc.Usc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} + hfsc.Usc = ServiceCurve{m1: m1, d: d, m2: m2} } -// SetFsc sets the Fsc curve +// SetFsc sets the Fsc curve. The bandwidth (m1 and m2) is specified in bits and the delay in +// seconds. func (hfsc *HfscClass) SetFsc(m1 uint32, d uint32, m2 uint32) { - hfsc.Fsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} + hfsc.Fsc = ServiceCurve{m1: m1, d: d, m2: m2} } -// SetRsc sets the Rsc curve +// SetRsc sets the Rsc curve. The bandwidth (m1 and m2) is specified in bits and the delay in +// seconds. func (hfsc *HfscClass) SetRsc(m1 uint32, d uint32, m2 uint32) { - hfsc.Rsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} + hfsc.Rsc = ServiceCurve{m1: m1, d: d, m2: m2} } -// SetSC implements the SC from the tc CLI +// SetSC implements the SC from the `tc` CLI. This function behaves the same as if one would set the +// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and +// the delay in ms. func (hfsc *HfscClass) SetSC(m1 uint32, d uint32, m2 uint32) { - hfsc.Rsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} - hfsc.Fsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} + hfsc.SetRsc(m1, d, m2) + hfsc.SetFsc(m1, d, m2) } -// SetUL implements the UL from the tc CLI +// SetUL implements the UL from the `tc` CLI. This function behaves the same as if one would set the +// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and +// the delay in ms. func (hfsc *HfscClass) SetUL(m1 uint32, d uint32, m2 uint32) { - hfsc.Usc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} + hfsc.SetUsc(m1, d, m2) } -// SetLS implements the LS from the tc CLI +// SetLS implements the LS from the `tc` CLI. This function behaves the same as if one would set the +// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and +// the delay in ms. func (hfsc *HfscClass) SetLS(m1 uint32, d uint32, m2 uint32) { - hfsc.Fsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} + hfsc.SetFsc(m1, d, m2) } // NewHfscClass returns a new HFSC struct with the set parameters @@ -193,6 +222,7 @@ func NewHfscClass(attrs ClassAttrs) *HfscClass { } } +// String() returns a string that contains the information and attributes of the HFSC class func (hfsc *HfscClass) String() string { return fmt.Sprintf( "{%s -- {RSC: {m1=%d d=%d m2=%d}} {FSC: {m1=%d d=%d m2=%d}} {USC: {m1=%d d=%d m2=%d}}}", diff --git a/vendor/github.com/vishvananda/netlink/class_linux.go b/vendor/github.com/vishvananda/netlink/class_linux.go index 31091e501..a82eb09de 100644 --- a/vendor/github.com/vishvananda/netlink/class_linux.go +++ b/vendor/github.com/vishvananda/netlink/class_linux.go @@ -43,12 +43,12 @@ func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass { if buffer == 0 { buffer = uint32(float64(rate)/Hz() + float64(mtu)) } - buffer = uint32(Xmittime(rate, buffer)) + buffer = Xmittime(rate, buffer) if cbuffer == 0 { cbuffer = uint32(float64(ceil)/Hz() + float64(mtu)) } - cbuffer = uint32(Xmittime(ceil, cbuffer)) + cbuffer = Xmittime(ceil, cbuffer) return &HtbClass{ ClassAttrs: attrs, @@ -56,9 +56,9 @@ func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass { Ceil: ceil, Buffer: buffer, Cbuffer: cbuffer, - Quantum: 10, Level: 0, - Prio: 0, + Prio: cattrs.Prio, + Quantum: cattrs.Quantum, } } @@ -176,12 +176,21 @@ func classPayload(req *nl.NetlinkRequest, class Class) error { options.AddRtAttr(nl.TCA_HTB_PARMS, opt.Serialize()) options.AddRtAttr(nl.TCA_HTB_RTAB, SerializeRtab(rtab)) options.AddRtAttr(nl.TCA_HTB_CTAB, SerializeRtab(ctab)) + if htb.Rate >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_HTB_RATE64, nl.Uint64Attr(htb.Rate)) + } + if htb.Ceil >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_HTB_CEIL64, nl.Uint64Attr(htb.Ceil)) + } case "hfsc": hfsc := class.(*HfscClass) opt := nl.HfscCopt{} - opt.Rsc.Set(hfsc.Rsc.Attrs()) - opt.Fsc.Set(hfsc.Fsc.Attrs()) - opt.Usc.Set(hfsc.Usc.Attrs()) + rm1, rd, rm2 := hfsc.Rsc.Attrs() + opt.Rsc.Set(rm1/8, rd, rm2/8) + fm1, fd, fm2 := hfsc.Fsc.Attrs() + opt.Fsc.Set(fm1/8, fd, fm2/8) + um1, ud, um2 := hfsc.Usc.Attrs() + opt.Usc.Set(um1/8, ud, um2/8) options.AddRtAttr(nl.TCA_HFSC_RSC, nl.SerializeHfscCurve(&opt.Rsc)) options.AddRtAttr(nl.TCA_HFSC_FSC, nl.SerializeHfscCurve(&opt.Fsc)) options.AddRtAttr(nl.TCA_HFSC_USC, nl.SerializeHfscCurve(&opt.Usc)) @@ -303,6 +312,10 @@ func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, erro htb.Quantum = opt.Quantum htb.Level = opt.Level htb.Prio = opt.Prio + case nl.TCA_HTB_RATE64: + htb.Rate = native.Uint64(datum.Value[0:8]) + case nl.TCA_HTB_CEIL64: + htb.Ceil = native.Uint64(datum.Value[0:8]) } } return detailed, nil @@ -315,11 +328,11 @@ func parseHfscClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, err m1, d, m2 := nl.DeserializeHfscCurve(datum.Value).Attrs() switch datum.Attr.Type { case nl.TCA_HFSC_RSC: - hfsc.Rsc = ServiceCurve{m1: m1, d: d, m2: m2} + hfsc.Rsc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8} case nl.TCA_HFSC_FSC: - hfsc.Fsc = ServiceCurve{m1: m1, d: d, m2: m2} + hfsc.Fsc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8} case nl.TCA_HFSC_USC: - hfsc.Usc = ServiceCurve{m1: m1, d: d, m2: m2} + hfsc.Usc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8} } } return detailed, nil @@ -328,7 +341,6 @@ func parseHfscClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, err func parseTcStats(data []byte) (*ClassStatistics, error) { buf := &bytes.Buffer{} buf.Write(data) - native := nl.NativeEndian() tcStats := &tcStats{} if err := binary.Read(buf, native, tcStats); err != nil { return nil, err @@ -350,7 +362,6 @@ func parseTcStats(data []byte) (*ClassStatistics, error) { func parseGnetStats(data []byte, gnetStats interface{}) error { buf := &bytes.Buffer{} buf.Write(data) - native := nl.NativeEndian() return binary.Read(buf, native, gnetStats) } @@ -377,6 +388,11 @@ func parseTcStats2(data []byte) (*ClassStatistics, error) { return nil, fmt.Errorf("Failed to parse ClassStatistics.RateEst with: %v\n%s", err, hex.Dump(datum.Value)) } + case nl.TCA_STATS_BASIC_HW: + if err := parseGnetStats(datum.Value, stats.BasicHw); err != nil { + return nil, fmt.Errorf("Failed to parse ClassStatistics.BasicHw with: %v\n%s", + err, hex.Dump(datum.Value)) + } } } diff --git a/vendor/github.com/vishvananda/netlink/conntrack_linux.go b/vendor/github.com/vishvananda/netlink/conntrack_linux.go index 4bff0dcba..ba022453b 100644 --- a/vendor/github.com/vishvananda/netlink/conntrack_linux.go +++ b/vendor/github.com/vishvananda/netlink/conntrack_linux.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "net" + "time" "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" @@ -54,10 +55,30 @@ func ConntrackTableFlush(table ConntrackTableType) error { return pkgHandle.ConntrackTableFlush(table) } +// ConntrackCreate creates a new conntrack flow in the desired table +// conntrack -I [table] Create a conntrack or expectation +func ConntrackCreate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error { + return pkgHandle.ConntrackCreate(table, family, flow) +} + +// ConntrackUpdate updates an existing conntrack flow in the desired table using the handle +// conntrack -U [table] Update a conntrack +func ConntrackUpdate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error { + return pkgHandle.ConntrackUpdate(table, family, flow) +} + // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter // conntrack -D [table] parameters Delete conntrack or expectation +// +// Deprecated: use [ConntrackDeleteFilter] instead. func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) { - return pkgHandle.ConntrackDeleteFilter(table, family, filter) + return pkgHandle.ConntrackDeleteFilters(table, family, filter) +} + +// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters +// conntrack -D [table] parameters Delete conntrack or expectation +func ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) { + return pkgHandle.ConntrackDeleteFilters(table, family, filters...) } // ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed @@ -86,9 +107,51 @@ func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error { return err } +// ConntrackCreate creates a new conntrack flow in the desired table using the handle +// conntrack -I [table] Create a conntrack or expectation +func (h *Handle) ConntrackCreate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error { + req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_NEW, unix.NLM_F_ACK|unix.NLM_F_CREATE) + attr, err := flow.toNlData() + if err != nil { + return err + } + + for _, a := range attr { + req.AddData(a) + } + + _, err = req.Execute(unix.NETLINK_NETFILTER, 0) + return err +} + +// ConntrackUpdate updates an existing conntrack flow in the desired table using the handle +// conntrack -U [table] Update a conntrack +func (h *Handle) ConntrackUpdate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error { + req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_NEW, unix.NLM_F_ACK|unix.NLM_F_REPLACE) + attr, err := flow.toNlData() + if err != nil { + return err + } + + for _, a := range attr { + req.AddData(a) + } + + _, err = req.Execute(unix.NETLINK_NETFILTER, 0) + return err +} + // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed // conntrack -D [table] parameters Delete conntrack or expectation +// +// Deprecated: use [Handle.ConntrackDeleteFilters] instead. func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) { + return h.ConntrackDeleteFilters(table, family, filter) +} + +// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters using the netlink handle passed +// conntrack -D [table] parameters Delete conntrack or expectation +func (h *Handle) ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) { res, err := h.dumpConntrackTable(table, family) if err != nil { return 0, err @@ -97,12 +160,16 @@ func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFami var matched uint for _, dataRaw := range res { flow := parseRawData(dataRaw) - if match := filter.MatchConntrackFlow(flow); match { - req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK) - // skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already - req2.AddRawData(dataRaw[4:]) - req2.Execute(unix.NETLINK_NETFILTER, 0) - matched++ + for _, filter := range filters { + if match := filter.MatchConntrackFlow(flow); match { + req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK) + // skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already + req2.AddRawData(dataRaw[4:]) + req2.Execute(unix.NETLINK_NETFILTER, 0) + matched++ + // flow is already deleted, no need to match on other filters and continue to the next flow. + break + } } } @@ -127,10 +194,44 @@ func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) return req.Execute(unix.NETLINK_NETFILTER, 0) } +// ProtoInfo wraps an L4-protocol structure - roughly corresponds to the +// __nfct_protoinfo union found in libnetfilter_conntrack/include/internal/object.h. +// Currently, only protocol names, and TCP state is supported. +type ProtoInfo interface { + Protocol() string +} + +// ProtoInfoTCP corresponds to the `tcp` struct of the __nfct_protoinfo union. +// Only TCP state is currently supported. +type ProtoInfoTCP struct { + State uint8 +} +// Protocol returns "tcp". +func (*ProtoInfoTCP) Protocol() string {return "tcp"} +func (p *ProtoInfoTCP) toNlData() ([]*nl.RtAttr, error) { + ctProtoInfo := nl.NewRtAttr(unix.NLA_F_NESTED | nl.CTA_PROTOINFO, []byte{}) + ctProtoInfoTCP := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_PROTOINFO_TCP, []byte{}) + ctProtoInfoTCPState := nl.NewRtAttr(nl.CTA_PROTOINFO_TCP_STATE, nl.Uint8Attr(p.State)) + ctProtoInfoTCP.AddChild(ctProtoInfoTCPState) + ctProtoInfo.AddChild(ctProtoInfoTCP) + + return []*nl.RtAttr{ctProtoInfo}, nil +} + +// ProtoInfoSCTP only supports the protocol name. +type ProtoInfoSCTP struct {} +// Protocol returns "sctp". +func (*ProtoInfoSCTP) Protocol() string {return "sctp"} + +// ProtoInfoDCCP only supports the protocol name. +type ProtoInfoDCCP struct {} +// Protocol returns "dccp". +func (*ProtoInfoDCCP) Protocol() string {return "dccp"} + // The full conntrack flow structure is very complicated and can be found in the file: // http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h // For the time being, the structure below allows to parse and extract the base information of a flow -type ipTuple struct { +type IPTuple struct { Bytes uint64 DstIP net.IP DstPort uint16 @@ -140,21 +241,150 @@ type ipTuple struct { SrcPort uint16 } +// toNlData generates the inner fields of a nested tuple netlink datastructure +// does not generate the "nested"-flagged outer message. +func (t *IPTuple) toNlData(family uint8) ([]*nl.RtAttr, error) { + + var srcIPsFlag, dstIPsFlag int + if family == nl.FAMILY_V4 { + srcIPsFlag = nl.CTA_IP_V4_SRC + dstIPsFlag = nl.CTA_IP_V4_DST + } else if family == nl.FAMILY_V6 { + srcIPsFlag = nl.CTA_IP_V6_SRC + dstIPsFlag = nl.CTA_IP_V6_DST + } else { + return []*nl.RtAttr{}, fmt.Errorf("couldn't generate netlink message for tuple due to unrecognized FamilyType '%d'", family) + } + + ctTupleIP := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_IP, nil) + ctTupleIPSrc := nl.NewRtAttr(srcIPsFlag, t.SrcIP) + ctTupleIP.AddChild(ctTupleIPSrc) + ctTupleIPDst := nl.NewRtAttr(dstIPsFlag, t.DstIP) + ctTupleIP.AddChild(ctTupleIPDst) + + ctTupleProto := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_PROTO, nil) + ctTupleProtoNum := nl.NewRtAttr(nl.CTA_PROTO_NUM, []byte{t.Protocol}) + ctTupleProto.AddChild(ctTupleProtoNum) + ctTupleProtoSrcPort := nl.NewRtAttr(nl.CTA_PROTO_SRC_PORT, nl.BEUint16Attr(t.SrcPort)) + ctTupleProto.AddChild(ctTupleProtoSrcPort) + ctTupleProtoDstPort := nl.NewRtAttr(nl.CTA_PROTO_DST_PORT, nl.BEUint16Attr(t.DstPort)) + ctTupleProto.AddChild(ctTupleProtoDstPort, ) + + return []*nl.RtAttr{ctTupleIP, ctTupleProto}, nil +} + type ConntrackFlow struct { FamilyType uint8 - Forward ipTuple - Reverse ipTuple + Forward IPTuple + Reverse IPTuple Mark uint32 + Zone uint16 + TimeStart uint64 + TimeStop uint64 + TimeOut uint32 + Labels []byte + ProtoInfo ProtoInfo } func (s *ConntrackFlow) String() string { // conntrack cmd output: - // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 - return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=%d", + // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 labels=0x00000000050012ac4202010000000000 zone=100 + // start=2019-07-26 01:26:21.557800506 +0000 UTC stop=1970-01-01 00:00:00 +0000 UTC timeout=30(sec) + start := time.Unix(0, int64(s.TimeStart)) + stop := time.Unix(0, int64(s.TimeStop)) + timeout := int32(s.TimeOut) + res := fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x ", nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol, s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes, s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes, s.Mark) + if len(s.Labels) > 0 { + res += fmt.Sprintf("labels=0x%x ", s.Labels) + } + if s.Zone != 0 { + res += fmt.Sprintf("zone=%d ", s.Zone) + } + res += fmt.Sprintf("start=%v stop=%v timeout=%d(sec)", start, stop, timeout) + return res +} + +// toNlData generates netlink messages representing the flow. +func (s *ConntrackFlow) toNlData() ([]*nl.RtAttr, error) { + var payload []*nl.RtAttr + // The message structure is built as follows: + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + + // CTA_TUPLE_ORIG + ctTupleOrig := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_ORIG, nil) + forwardFlowAttrs, err := s.Forward.toNlData(s.FamilyType) + if err != nil { + return nil, fmt.Errorf("couldn't generate netlink data for conntrack forward flow: %w", err) + } + for _, a := range forwardFlowAttrs { + ctTupleOrig.AddChild(a) + } + + // CTA_TUPLE_REPLY + ctTupleReply := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_REPLY, nil) + reverseFlowAttrs, err := s.Reverse.toNlData(s.FamilyType) + if err != nil { + return nil, fmt.Errorf("couldn't generate netlink data for conntrack reverse flow: %w", err) + } + for _, a := range reverseFlowAttrs { + ctTupleReply.AddChild(a) + } + + ctMark := nl.NewRtAttr(nl.CTA_MARK, nl.BEUint32Attr(s.Mark)) + ctTimeout := nl.NewRtAttr(nl.CTA_TIMEOUT, nl.BEUint32Attr(s.TimeOut)) + + payload = append(payload, ctTupleOrig, ctTupleReply, ctMark, ctTimeout) + + if s.ProtoInfo != nil { + switch p := s.ProtoInfo.(type) { + case *ProtoInfoTCP: + attrs, err := p.toNlData() + if err != nil { + return nil, fmt.Errorf("couldn't generate netlink data for conntrack flow's TCP protoinfo: %w", err) + } + payload = append(payload, attrs...) + default: + return nil, errors.New("couldn't generate netlink data for conntrack: field 'ProtoInfo' only supports TCP or nil") + } + } + + return payload, nil } // This method parse the ip tuple structure @@ -164,7 +394,7 @@ func (s *ConntrackFlow) String() string { // // // -func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 { +func parseIpTuple(reader *bytes.Reader, tpl *IPTuple) uint8 { for i := 0; i < 2; i++ { _, t, _, v := parseNfAttrTLV(reader) switch t { @@ -174,25 +404,43 @@ func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 { tpl.DstIP = v } } - // Skip the next 4 bytes nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO - reader.Seek(4, seekCurrent) - _, t, _, v := parseNfAttrTLV(reader) + // Get total length of nested protocol-specific info. + _, _, protoInfoTotalLen := parseNfAttrTL(reader) + _, t, l, v := parseNfAttrTLV(reader) + // Track the number of bytes read. + protoInfoBytesRead := uint16(nl.SizeofNfattr) + l if t == nl.CTA_PROTO_NUM { tpl.Protocol = uint8(v[0]) } - // Skip some padding 3 bytes + // We only parse TCP & UDP headers. Skip the others. + if tpl.Protocol != unix.IPPROTO_TCP && tpl.Protocol != unix.IPPROTO_UDP { + // skip the rest + bytesRemaining := protoInfoTotalLen - protoInfoBytesRead + reader.Seek(int64(bytesRemaining), seekCurrent) + return tpl.Protocol + } + // Skip 3 bytes of padding reader.Seek(3, seekCurrent) + protoInfoBytesRead += 3 for i := 0; i < 2; i++ { _, t, _ := parseNfAttrTL(reader) + protoInfoBytesRead += uint16(nl.SizeofNfattr) switch t { case nl.CTA_PROTO_SRC_PORT: parseBERaw16(reader, &tpl.SrcPort) + protoInfoBytesRead += 2 case nl.CTA_PROTO_DST_PORT: parseBERaw16(reader, &tpl.DstPort) + protoInfoBytesRead += 2 } - // Skip some padding 2 byte + // Skip 2 bytes of padding reader.Seek(2, seekCurrent) + protoInfoBytesRead += 2 } + // Skip any remaining/unknown parts of the message + bytesRemaining := protoInfoTotalLen - protoInfoBytesRead + reader.Seek(int64(bytesRemaining), seekCurrent) + return tpl.Protocol } @@ -211,10 +459,18 @@ func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) { binary.Read(r, nl.NativeEndian(), &attrType) isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED attrType = attrType & (nl.NLA_F_NESTED - 1) - return isNested, attrType, len } +// skipNfAttrValue seeks `r` past attr of length `len`. +// Maintains buffer alignment. +// Returns length of the seek performed. +func skipNfAttrValue(r *bytes.Reader, len uint16) uint16 { + len = (len + nl.NLA_ALIGNTO - 1) & ^(nl.NLA_ALIGNTO - 1) + r.Seek(int64(len), seekCurrent) + return len +} + func parseBERaw16(r *bytes.Reader, v *uint16) { binary.Read(r, binary.BigEndian, v) } @@ -227,6 +483,10 @@ func parseBERaw64(r *bytes.Reader, v *uint64) { binary.Read(r, binary.BigEndian, v) } +func parseRaw32(r *bytes.Reader, v *uint32) { + binary.Read(r, nl.NativeEndian(), v) +} + func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) { for i := 0; i < 2; i++ { switch _, t, _ := parseNfAttrTL(r); t { @@ -241,11 +501,107 @@ func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) { return } +// when the flow is alive, only the timestamp_start is returned in structure +func parseTimeStamp(r *bytes.Reader, readSize uint16) (tstart, tstop uint64) { + var numTimeStamps int + oneItem := nl.SizeofNfattr + 8 // 4 bytes attr header + 8 bytes timestamp + if readSize == uint16(oneItem) { + numTimeStamps = 1 + } else if readSize == 2*uint16(oneItem) { + numTimeStamps = 2 + } else { + return + } + for i := 0; i < numTimeStamps; i++ { + switch _, t, _ := parseNfAttrTL(r); t { + case nl.CTA_TIMESTAMP_START: + parseBERaw64(r, &tstart) + case nl.CTA_TIMESTAMP_STOP: + parseBERaw64(r, &tstop) + default: + return + } + } + return + +} + +func parseProtoInfoTCPState(r *bytes.Reader) (s uint8) { + binary.Read(r, binary.BigEndian, &s) + r.Seek(nl.SizeofNfattr - 1, seekCurrent) + return s +} + +// parseProtoInfoTCP reads the entire nested protoinfo structure, but only parses the state attr. +func parseProtoInfoTCP(r *bytes.Reader, attrLen uint16) (*ProtoInfoTCP) { + p := new(ProtoInfoTCP) + bytesRead := 0 + for bytesRead < int(attrLen) { + _, t, l := parseNfAttrTL(r) + bytesRead += nl.SizeofNfattr + + switch t { + case nl.CTA_PROTOINFO_TCP_STATE: + p.State = parseProtoInfoTCPState(r) + bytesRead += nl.SizeofNfattr + default: + bytesRead += int(skipNfAttrValue(r, l)) + } + } + + return p +} + +func parseProtoInfo(r *bytes.Reader, attrLen uint16) (p ProtoInfo) { + bytesRead := 0 + for bytesRead < int(attrLen) { + _, t, l := parseNfAttrTL(r) + bytesRead += nl.SizeofNfattr + + switch t { + case nl.CTA_PROTOINFO_TCP: + p = parseProtoInfoTCP(r, l) + bytesRead += int(l) + // No inner fields of DCCP / SCTP currently supported. + case nl.CTA_PROTOINFO_DCCP: + p = new(ProtoInfoDCCP) + skipped := skipNfAttrValue(r, l) + bytesRead += int(skipped) + case nl.CTA_PROTOINFO_SCTP: + p = new(ProtoInfoSCTP) + skipped := skipNfAttrValue(r, l) + bytesRead += int(skipped) + default: + skipped := skipNfAttrValue(r, l) + bytesRead += int(skipped) + } + } + + return p +} + +func parseTimeOut(r *bytes.Reader) (ttimeout uint32) { + parseBERaw32(r, &ttimeout) + return +} + func parseConnectionMark(r *bytes.Reader) (mark uint32) { parseBERaw32(r, &mark) return } +func parseConnectionLabels(r *bytes.Reader) (label []byte) { + label = make([]byte, 16) // netfilter defines 128 bit labels value + binary.Read(r, nl.NativeEndian(), &label) + return +} + +func parseConnectionZone(r *bytes.Reader) (zone uint16) { + parseBERaw16(r, &zone) + r.Seek(2, seekCurrent) + return +} + func parseRawData(data []byte) *ConntrackFlow { s := &ConntrackFlow{} // First there is the Nfgenmsg header @@ -266,25 +622,41 @@ func parseRawData(data []byte) *ConntrackFlow { if nested, t, l := parseNfAttrTL(reader); nested { switch t { case nl.CTA_TUPLE_ORIG: - if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { + if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { parseIpTuple(reader, &s.Forward) } case nl.CTA_TUPLE_REPLY: - if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { + if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { parseIpTuple(reader, &s.Reverse) } else { // Header not recognized skip it - reader.Seek(int64(l), seekCurrent) + skipNfAttrValue(reader, l) } case nl.CTA_COUNTERS_ORIG: s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader) case nl.CTA_COUNTERS_REPLY: s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader) + case nl.CTA_TIMESTAMP: + s.TimeStart, s.TimeStop = parseTimeStamp(reader, l) + case nl.CTA_PROTOINFO: + s.ProtoInfo = parseProtoInfo(reader, l) + default: + skipNfAttrValue(reader, l) } } else { switch t { case nl.CTA_MARK: s.Mark = parseConnectionMark(reader) + case nl.CTA_LABELS: + s.Labels = parseConnectionLabels(reader) + case nl.CTA_TIMEOUT: + s.TimeOut = parseTimeOut(reader) + case nl.CTA_ID, nl.CTA_STATUS, nl.CTA_USE: + skipNfAttrValue(reader, l) + case nl.CTA_ZONE: + s.Zone = parseConnectionZone(reader) + default: + skipNfAttrValue(reader, l) } } } @@ -318,18 +690,27 @@ func parseRawData(data []byte) *ConntrackFlow { // --mask-src ip Source mask address // --mask-dst ip Destination mask address +// Layer 4 Protocol common parameters and options: +// TCP, UDP, SCTP, UDPLite and DCCP +// --sport, --orig-port-src port Source port in original direction +// --dport, --orig-port-dst port Destination port in original direction + // Filter types type ConntrackFilterType uint8 const ( - ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction - ConntrackOrigDstIP // -orig-dst ip Destination address from original direction - ConntrackReplySrcIP // --reply-src ip Reply Source IP - ConntrackReplyDstIP // --reply-dst ip Reply Destination IP - ConntrackReplyAnyIP // Match source or destination reply IP - ConntrackNatSrcIP = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP - ConntrackNatDstIP = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP - ConntrackNatAnyIP = ConntrackReplyAnyIP // deprecated use instaed ConntrackReplyAnyIP + ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction + ConntrackOrigDstIP // -orig-dst ip Destination address from original direction + ConntrackReplySrcIP // --reply-src ip Reply Source IP + ConntrackReplyDstIP // --reply-dst ip Reply Destination IP + ConntrackReplyAnyIP // Match source or destination reply IP + ConntrackOrigSrcPort // --orig-port-src port Source port in original direction + ConntrackOrigDstPort // --orig-port-dst port Destination port in original direction + ConntrackMatchLabels // --label label1,label2 Labels used in entry + ConntrackUnmatchLabels // --label label1,label2 Labels not used in entry + ConntrackNatSrcIP = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP + ConntrackNatDstIP = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP + ConntrackNatAnyIP = ConntrackReplyAnyIP // deprecated use instead ConntrackReplyAnyIP ) type CustomConntrackFilter interface { @@ -339,53 +720,180 @@ type CustomConntrackFilter interface { } type ConntrackFilter struct { - ipFilter map[ConntrackFilterType]net.IP + ipNetFilter map[ConntrackFilterType]*net.IPNet + portFilter map[ConntrackFilterType]uint16 + protoFilter uint8 + labelFilter map[ConntrackFilterType][][]byte + zoneFilter *uint16 +} + +// AddIPNet adds a IP subnet to the conntrack filter +func (f *ConntrackFilter) AddIPNet(tp ConntrackFilterType, ipNet *net.IPNet) error { + if ipNet == nil { + return fmt.Errorf("Filter attribute empty") + } + if f.ipNetFilter == nil { + f.ipNetFilter = make(map[ConntrackFilterType]*net.IPNet) + } + if _, ok := f.ipNetFilter[tp]; ok { + return errors.New("Filter attribute already present") + } + f.ipNetFilter[tp] = ipNet + return nil } // AddIP adds an IP to the conntrack filter func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error { - if f.ipFilter == nil { - f.ipFilter = make(map[ConntrackFilterType]net.IP) + if ip == nil { + return fmt.Errorf("Filter attribute empty") + } + return f.AddIPNet(tp, NewIPNet(ip)) +} + +// AddPort adds a Port to the conntrack filter if the Layer 4 protocol allows it +func (f *ConntrackFilter) AddPort(tp ConntrackFilterType, port uint16) error { + switch f.protoFilter { + // TCP, UDP, DCCP, SCTP, UDPLite + case 6, 17, 33, 132, 136: + default: + return fmt.Errorf("Filter attribute not available without a valid Layer 4 protocol: %d", f.protoFilter) } - if _, ok := f.ipFilter[tp]; ok { + + if f.portFilter == nil { + f.portFilter = make(map[ConntrackFilterType]uint16) + } + if _, ok := f.portFilter[tp]; ok { return errors.New("Filter attribute already present") } - f.ipFilter[tp] = ip + f.portFilter[tp] = port + return nil +} + +// AddProtocol adds the Layer 4 protocol to the conntrack filter +func (f *ConntrackFilter) AddProtocol(proto uint8) error { + if f.protoFilter != 0 { + return errors.New("Filter attribute already present") + } + f.protoFilter = proto + return nil +} + +// AddLabels adds the provided list (zero or more) of labels to the conntrack filter +// ConntrackFilterType here can be either: +// 1. ConntrackMatchLabels: This matches every flow that has a label value (len(flow.Labels) > 0) +// against the list of provided labels. If `flow.Labels` contains ALL the provided labels +// it is considered a match. This can be used when you want to match flows that contain +// one or more labels. +// 2. ConntrackUnmatchLabels: This matches every flow that has a label value (len(flow.Labels) > 0) +// against the list of provided labels. If `flow.Labels` does NOT contain ALL the provided labels +// it is considered a match. This can be used when you want to match flows that don't contain +// one or more labels. +func (f *ConntrackFilter) AddLabels(tp ConntrackFilterType, labels [][]byte) error { + if len(labels) == 0 { + return errors.New("Invalid length for provided labels") + } + if f.labelFilter == nil { + f.labelFilter = make(map[ConntrackFilterType][][]byte) + } + if _, ok := f.labelFilter[tp]; ok { + return errors.New("Filter attribute already present") + } + f.labelFilter[tp] = labels + return nil +} + +// AddZone adds a zone to the conntrack filter +func (f *ConntrackFilter) AddZone(zone uint16) error { + if f.zoneFilter != nil { + return errors.New("Filter attribute already present") + } + f.zoneFilter = &zone return nil } // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter // false otherwise func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool { - if len(f.ipFilter) == 0 { + if len(f.ipNetFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 && len(f.labelFilter) == 0 && f.zoneFilter == nil { // empty filter always not match return false } - match := true - // -orig-src ip Source address from original direction - if elem, found := f.ipFilter[ConntrackOrigSrcIP]; found { - match = match && elem.Equal(flow.Forward.SrcIP) + // -p, --protonum proto Layer 4 Protocol, eg. 'tcp' + if f.protoFilter != 0 && flow.Forward.Protocol != f.protoFilter { + // different Layer 4 protocol always not match + return false } - // -orig-dst ip Destination address from original direction - if elem, found := f.ipFilter[ConntrackOrigDstIP]; match && found { - match = match && elem.Equal(flow.Forward.DstIP) + // Conntrack zone filter + if f.zoneFilter != nil && *f.zoneFilter != flow.Zone { + return false } - // -src-nat ip Source NAT ip - if elem, found := f.ipFilter[ConntrackReplySrcIP]; match && found { - match = match && elem.Equal(flow.Reverse.SrcIP) + match := true + + // IP conntrack filter + if len(f.ipNetFilter) > 0 { + // -orig-src ip Source address from original direction + if elem, found := f.ipNetFilter[ConntrackOrigSrcIP]; found { + match = match && elem.Contains(flow.Forward.SrcIP) + } + + // -orig-dst ip Destination address from original direction + if elem, found := f.ipNetFilter[ConntrackOrigDstIP]; match && found { + match = match && elem.Contains(flow.Forward.DstIP) + } + + // -src-nat ip Source NAT ip + if elem, found := f.ipNetFilter[ConntrackReplySrcIP]; match && found { + match = match && elem.Contains(flow.Reverse.SrcIP) + } + + // -dst-nat ip Destination NAT ip + if elem, found := f.ipNetFilter[ConntrackReplyDstIP]; match && found { + match = match && elem.Contains(flow.Reverse.DstIP) + } + + // Match source or destination reply IP + if elem, found := f.ipNetFilter[ConntrackReplyAnyIP]; match && found { + match = match && (elem.Contains(flow.Reverse.SrcIP) || elem.Contains(flow.Reverse.DstIP)) + } } - // -dst-nat ip Destination NAT ip - if elem, found := f.ipFilter[ConntrackReplyDstIP]; match && found { - match = match && elem.Equal(flow.Reverse.DstIP) + // Layer 4 Port filter + if len(f.portFilter) > 0 { + // -orig-port-src port Source port from original direction + if elem, found := f.portFilter[ConntrackOrigSrcPort]; match && found { + match = match && elem == flow.Forward.SrcPort + } + + // -orig-port-dst port Destination port from original direction + if elem, found := f.portFilter[ConntrackOrigDstPort]; match && found { + match = match && elem == flow.Forward.DstPort + } } - // Match source or destination reply IP - if elem, found := f.ipFilter[ConntrackReplyAnyIP]; match && found { - match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP)) + // Label filter + if len(f.labelFilter) > 0 { + if len(flow.Labels) > 0 { + // --label label1,label2 in conn entry; + // every label passed should be contained in flow.Labels for a match to be true + if elem, found := f.labelFilter[ConntrackMatchLabels]; match && found { + for _, label := range elem { + match = match && (bytes.Contains(flow.Labels, label)) + } + } + // --label label1,label2 in conn entry; + // every label passed should be not contained in flow.Labels for a match to be true + if elem, found := f.labelFilter[ConntrackUnmatchLabels]; match && found { + for _, label := range elem { + match = match && !(bytes.Contains(flow.Labels, label)) + } + } + } else { + // flow doesn't contain labels, so it doesn't contain or notContain any provided matches + match = false + } } return match diff --git a/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go b/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go index af7af799e..0bfdf422d 100644 --- a/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go +++ b/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go @@ -11,6 +11,9 @@ type InetFamily uint8 // ConntrackFlow placeholder type ConntrackFlow struct{} +// CustomConntrackFilter placeholder +type CustomConntrackFilter struct{} + // ConntrackFilter placeholder type ConntrackFilter struct{} @@ -29,10 +32,18 @@ func ConntrackTableFlush(table ConntrackTableType) error { // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter // conntrack -D [table] parameters Delete conntrack or expectation +// +// Deprecated: use [ConntrackDeleteFilter] instead. func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) { return 0, ErrNotImplemented } +// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters +// conntrack -D [table] parameters Delete conntrack or expectation +func ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) { + return 0, ErrNotImplemented +} + // ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed // conntrack -L [table] [options] List conntrack or expectation table func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) { @@ -48,6 +59,14 @@ func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error { // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed // conntrack -D [table] parameters Delete conntrack or expectation +// +// Deprecated: use [Handle.ConntrackDeleteFilters] instead. func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) { return 0, ErrNotImplemented } + +// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters using the netlink handle passed +// conntrack -D [table] parameters Delete conntrack or expectation +func (h *Handle) ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) { + return 0, ErrNotImplemented +} diff --git a/vendor/github.com/vishvananda/netlink/devlink_linux.go b/vendor/github.com/vishvananda/netlink/devlink_linux.go index 29b3f8ec1..d98801dbb 100644 --- a/vendor/github.com/vishvananda/netlink/devlink_linux.go +++ b/vendor/github.com/vishvananda/netlink/devlink_linux.go @@ -1,9 +1,11 @@ package netlink import ( + "fmt" + "net" + "strings" "syscall" - "fmt" "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" ) @@ -27,6 +29,325 @@ type DevlinkDevice struct { Attrs DevlinkDevAttrs } +// DevlinkPortFn represents port function and its attributes +type DevlinkPortFn struct { + HwAddr net.HardwareAddr + State uint8 + OpState uint8 +} + +// DevlinkPortFnSetAttrs represents attributes to set +type DevlinkPortFnSetAttrs struct { + FnAttrs DevlinkPortFn + HwAddrValid bool + StateValid bool +} + +// DevlinkPort represents port and its attributes +type DevlinkPort struct { + BusName string + DeviceName string + PortIndex uint32 + PortType uint16 + NetdeviceName string + NetdevIfIndex uint32 + RdmaDeviceName string + PortFlavour uint16 + Fn *DevlinkPortFn +} + +type DevLinkPortAddAttrs struct { + Controller uint32 + SfNumber uint32 + PortIndex uint32 + PfNumber uint16 + SfNumberValid bool + PortIndexValid bool + ControllerValid bool +} + +// DevlinkDeviceInfo represents devlink info +type DevlinkDeviceInfo struct { + Driver string + SerialNumber string + BoardID string + FwApp string + FwAppBoundleID string + FwAppName string + FwBoundleID string + FwMgmt string + FwMgmtAPI string + FwMgmtBuild string + FwNetlist string + FwNetlistBuild string + FwPsidAPI string + FwUndi string +} + +// DevlinkResource represents a device resource +type DevlinkResource struct { + Name string + ID uint64 + Size uint64 + SizeNew uint64 + SizeMin uint64 + SizeMax uint64 + SizeGranularity uint64 + PendingChange bool + Unit uint8 + SizeValid bool + OCCValid bool + OCCSize uint64 + Parent *DevlinkResource + Children []DevlinkResource +} + +// parseAttributes parses provided Netlink Attributes and populates DevlinkResource, returns error if occured +func (dlr *DevlinkResource) parseAttributes(attrs map[uint16]syscall.NetlinkRouteAttr) error { + var attr syscall.NetlinkRouteAttr + var ok bool + + // mandatory attributes + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_ID] + if !ok { + return fmt.Errorf("missing resource id") + } + dlr.ID = native.Uint64(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_NAME] + if !ok { + return fmt.Errorf("missing resource name") + } + dlr.Name = nl.BytesToString(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE] + if !ok { + return fmt.Errorf("missing resource size") + } + dlr.Size = native.Uint64(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_GRAN] + if !ok { + return fmt.Errorf("missing resource size granularity") + } + dlr.SizeGranularity = native.Uint64(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_UNIT] + if !ok { + return fmt.Errorf("missing resource unit") + } + dlr.Unit = uint8(attr.Value[0]) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_MIN] + if !ok { + return fmt.Errorf("missing resource size min") + } + dlr.SizeMin = native.Uint64(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_MAX] + if !ok { + return fmt.Errorf("missing resource size max") + } + dlr.SizeMax = native.Uint64(attr.Value) + + // optional attributes + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_OCC] + if ok { + dlr.OCCSize = native.Uint64(attr.Value) + dlr.OCCValid = true + } + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_VALID] + if ok { + dlr.SizeValid = uint8(attr.Value[0]) != 0 + } + + dlr.SizeNew = dlr.Size + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_NEW] + if ok { + dlr.SizeNew = native.Uint64(attr.Value) + } + + dlr.PendingChange = dlr.Size != dlr.SizeNew + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_LIST] + if ok { + // handle nested resoruces recursively + subResources, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return err + } + + for _, subresource := range subResources { + resource := DevlinkResource{Parent: dlr} + attrs, err := nl.ParseRouteAttrAsMap(subresource.Value) + if err != nil { + return err + } + err = resource.parseAttributes(attrs) + if err != nil { + return fmt.Errorf("failed to parse child resource, parent:%s. %w", dlr.Name, err) + } + dlr.Children = append(dlr.Children, resource) + } + } + return nil +} + +// DevlinkResources represents all devlink resources of a devlink device +type DevlinkResources struct { + Bus string + Device string + Resources []DevlinkResource +} + +// parseAttributes parses provided Netlink Attributes and populates DevlinkResources, returns error if occured +func (dlrs *DevlinkResources) parseAttributes(attrs map[uint16]syscall.NetlinkRouteAttr) error { + var attr syscall.NetlinkRouteAttr + var ok bool + + // Bus + attr, ok = attrs[nl.DEVLINK_ATTR_BUS_NAME] + if !ok { + return fmt.Errorf("missing bus name") + } + dlrs.Bus = nl.BytesToString(attr.Value) + + // Device + attr, ok = attrs[nl.DEVLINK_ATTR_DEV_NAME] + if !ok { + return fmt.Errorf("missing device name") + } + dlrs.Device = nl.BytesToString(attr.Value) + + // Resource List + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_LIST] + if !ok { + return fmt.Errorf("missing resource list") + } + + resourceAttrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return err + } + + for _, resourceAttr := range resourceAttrs { + resource := DevlinkResource{} + attrs, err := nl.ParseRouteAttrAsMap(resourceAttr.Value) + if err != nil { + return err + } + err = resource.parseAttributes(attrs) + if err != nil { + return fmt.Errorf("failed to parse root resoruces, %w", err) + } + dlrs.Resources = append(dlrs.Resources, resource) + } + + return nil +} + +// DevlinkParam represents parameter of the device +type DevlinkParam struct { + Name string + IsGeneric bool + Type uint8 // possible values are in nl.DEVLINK_PARAM_TYPE_* constants + Values []DevlinkParamValue +} + +// DevlinkParamValue contains values of the parameter +// Data field contains specific type which can be casted by unsing info from the DevlinkParam.Type field +type DevlinkParamValue struct { + rawData []byte + Data interface{} + CMODE uint8 // possible values are in nl.DEVLINK_PARAM_CMODE_* constants +} + +// parseAttributes parses provided Netlink Attributes and populates DevlinkParam, returns error if occured +func (dlp *DevlinkParam) parseAttributes(attrs []syscall.NetlinkRouteAttr) error { + var valuesList [][]syscall.NetlinkRouteAttr + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.DEVLINK_ATTR_PARAM: + nattrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return err + } + for _, nattr := range nattrs { + switch nattr.Attr.Type { + case nl.DEVLINK_ATTR_PARAM_NAME: + dlp.Name = nl.BytesToString(nattr.Value) + case nl.DEVLINK_ATTR_PARAM_GENERIC: + dlp.IsGeneric = true + case nl.DEVLINK_ATTR_PARAM_TYPE: + if len(nattr.Value) == 1 { + dlp.Type = nattr.Value[0] + } + case nl.DEVLINK_ATTR_PARAM_VALUES_LIST: + nnattrs, err := nl.ParseRouteAttr(nattr.Value) + if err != nil { + return err + } + valuesList = append(valuesList, nnattrs) + } + } + } + } + for _, valAttr := range valuesList { + v := DevlinkParamValue{} + if err := v.parseAttributes(valAttr, dlp.Type); err != nil { + return err + } + dlp.Values = append(dlp.Values, v) + } + return nil +} + +func (dlpv *DevlinkParamValue) parseAttributes(attrs []syscall.NetlinkRouteAttr, paramType uint8) error { + for _, attr := range attrs { + nattrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return err + } + var rawData []byte + for _, nattr := range nattrs { + switch nattr.Attr.Type { + case nl.DEVLINK_ATTR_PARAM_VALUE_DATA: + rawData = nattr.Value + case nl.DEVLINK_ATTR_PARAM_VALUE_CMODE: + if len(nattr.Value) == 1 { + dlpv.CMODE = nattr.Value[0] + } + } + } + switch paramType { + case nl.DEVLINK_PARAM_TYPE_U8: + dlpv.Data = uint8(0) + if rawData != nil && len(rawData) == 1 { + dlpv.Data = uint8(rawData[0]) + } + case nl.DEVLINK_PARAM_TYPE_U16: + dlpv.Data = uint16(0) + if rawData != nil { + dlpv.Data = native.Uint16(rawData) + } + case nl.DEVLINK_PARAM_TYPE_U32: + dlpv.Data = uint32(0) + if rawData != nil { + dlpv.Data = native.Uint32(rawData) + } + case nl.DEVLINK_PARAM_TYPE_STRING: + dlpv.Data = "" + if rawData != nil { + dlpv.Data = nl.BytesToString(rawData) + } + case nl.DEVLINK_PARAM_TYPE_BOOL: + dlpv.Data = rawData != nil + } + } + return nil +} + func parseDevLinkDeviceList(msgs [][]byte) ([]*DevlinkDevice, error) { devices := make([]*DevlinkDevice, 0, len(msgs)) for _, m := range msgs { @@ -95,9 +416,9 @@ func (d *DevlinkDevice) parseAttributes(attrs []syscall.NetlinkRouteAttr) error for _, a := range attrs { switch a.Attr.Type { case nl.DEVLINK_ATTR_BUS_NAME: - d.BusName = string(a.Value) + d.BusName = string(a.Value[:len(a.Value)-1]) case nl.DEVLINK_ATTR_DEV_NAME: - d.DeviceName = string(a.Value) + d.DeviceName = string(a.Value[:len(a.Value)-1]) case nl.DEVLINK_ATTR_ESWITCH_MODE: d.Attrs.Eswitch.Mode = parseEswitchMode(native.Uint16(a.Value)) case nl.DEVLINK_ATTR_ESWITCH_INLINE_MODE: @@ -126,12 +447,12 @@ func (h *Handle) getEswitchAttrs(family *GenlFamily, dev *DevlinkDevice) { req := h.newNetlinkRequest(int(family.ID), unix.NLM_F_REQUEST|unix.NLM_F_ACK) req.AddData(msg) - b := make([]byte, len(dev.BusName)) + b := make([]byte, len(dev.BusName)+1) copy(b, dev.BusName) data := nl.NewRtAttr(nl.DEVLINK_ATTR_BUS_NAME, b) req.AddData(data) - b = make([]byte, len(dev.DeviceName)) + b = make([]byte, len(dev.DeviceName)+1) copy(b, dev.DeviceName) data = nl.NewRtAttr(nl.DEVLINK_ATTR_DEV_NAME, b) req.AddData(data) @@ -270,3 +591,569 @@ func (h *Handle) DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error func DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error { return pkgHandle.DevLinkSetEswitchMode(Dev, NewMode) } + +func (port *DevlinkPort) parseAttributes(attrs []syscall.NetlinkRouteAttr) error { + for _, a := range attrs { + switch a.Attr.Type { + case nl.DEVLINK_ATTR_BUS_NAME: + port.BusName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_DEV_NAME: + port.DeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_PORT_INDEX: + port.PortIndex = native.Uint32(a.Value) + case nl.DEVLINK_ATTR_PORT_TYPE: + port.PortType = native.Uint16(a.Value) + case nl.DEVLINK_ATTR_PORT_NETDEV_NAME: + port.NetdeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_PORT_NETDEV_IFINDEX: + port.NetdevIfIndex = native.Uint32(a.Value) + case nl.DEVLINK_ATTR_PORT_IBDEV_NAME: + port.RdmaDeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_PORT_FLAVOUR: + port.PortFlavour = native.Uint16(a.Value) + case nl.DEVLINK_ATTR_PORT_FUNCTION: + port.Fn = &DevlinkPortFn{} + for nested := range nl.ParseAttributes(a.Value) { + switch nested.Type { + case nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR: + port.Fn.HwAddr = nested.Value[:] + case nl.DEVLINK_PORT_FN_ATTR_STATE: + port.Fn.State = uint8(nested.Value[0]) + case nl.DEVLINK_PORT_FN_ATTR_OPSTATE: + port.Fn.OpState = uint8(nested.Value[0]) + } + } + } + } + return nil +} + +func parseDevLinkAllPortList(msgs [][]byte) ([]*DevlinkPort, error) { + ports := make([]*DevlinkPort, 0, len(msgs)) + for _, m := range msgs { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + port := &DevlinkPort{} + if err = port.parseAttributes(attrs); err != nil { + return nil, err + } + ports = append(ports, port) + } + return ports, nil +} + +// DevLinkGetPortList provides a pointer to devlink ports and nil error, +// otherwise returns an error code. +func (h *Handle) DevLinkGetAllPortList() ([]*DevlinkPort, error) { + f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME) + if err != nil { + return nil, err + } + msg := &nl.Genlmsg{ + Command: nl.DEVLINK_CMD_PORT_GET, + Version: nl.GENL_DEVLINK_VERSION, + } + req := h.newNetlinkRequest(int(f.ID), + unix.NLM_F_REQUEST|unix.NLM_F_ACK|unix.NLM_F_DUMP) + req.AddData(msg) + msgs, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + ports, err := parseDevLinkAllPortList(msgs) + if err != nil { + return nil, err + } + return ports, nil +} + +// DevLinkGetPortList provides a pointer to devlink ports and nil error, +// otherwise returns an error code. +func DevLinkGetAllPortList() ([]*DevlinkPort, error) { + return pkgHandle.DevLinkGetAllPortList() +} + +func parseDevlinkPortMsg(msgs [][]byte) (*DevlinkPort, error) { + m := msgs[0] + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + port := &DevlinkPort{} + if err = port.parseAttributes(attrs); err != nil { + return nil, err + } + return port, nil +} + +// DevLinkGetPortByIndexprovides a pointer to devlink device and nil error, +// otherwise returns an error code. +func (h *Handle) DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) { + + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_GET, Bus, Device) + if err != nil { + return nil, err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex))) + + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + port, err := parseDevlinkPortMsg(respmsg) + return port, err +} + +// DevlinkGetDeviceResources returns devlink device resources +func DevlinkGetDeviceResources(bus string, device string) (*DevlinkResources, error) { + return pkgHandle.DevlinkGetDeviceResources(bus, device) +} + +// DevlinkGetDeviceResources returns devlink device resources +func (h *Handle) DevlinkGetDeviceResources(bus string, device string) (*DevlinkResources, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_RESOURCE_DUMP, bus, device) + if err != nil { + return nil, err + } + + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + + var resources DevlinkResources + for _, m := range respmsg { + attrs, err := nl.ParseRouteAttrAsMap(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + resources.parseAttributes(attrs) + } + + return &resources, nil +} + +// DevlinkGetDeviceParams returns parameters for devlink device +// Equivalent to: `devlink dev param show /` +func (h *Handle) DevlinkGetDeviceParams(bus string, device string) ([]*DevlinkParam, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PARAM_GET, bus, device) + if err != nil { + return nil, err + } + req.Flags |= unix.NLM_F_DUMP + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + var params []*DevlinkParam + for _, m := range respmsg { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + p := &DevlinkParam{} + if err := p.parseAttributes(attrs); err != nil { + return nil, err + } + params = append(params, p) + } + + return params, nil +} + +// DevlinkGetDeviceParams returns parameters for devlink device +// Equivalent to: `devlink dev param show /` +func DevlinkGetDeviceParams(bus string, device string) ([]*DevlinkParam, error) { + return pkgHandle.DevlinkGetDeviceParams(bus, device) +} + +// DevlinkGetDeviceParamByName returns specific parameter for devlink device +// Equivalent to: `devlink dev param show / name ` +func (h *Handle) DevlinkGetDeviceParamByName(bus string, device string, param string) (*DevlinkParam, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PARAM_GET, bus, device) + if err != nil { + return nil, err + } + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_NAME, nl.ZeroTerminated(param))) + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + if len(respmsg) == 0 { + return nil, fmt.Errorf("unexpected response") + } + attrs, err := nl.ParseRouteAttr(respmsg[0][nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + p := &DevlinkParam{} + if err := p.parseAttributes(attrs); err != nil { + return nil, err + } + return p, nil +} + +// DevlinkGetDeviceParamByName returns specific parameter for devlink device +// Equivalent to: `devlink dev param show / name ` +func DevlinkGetDeviceParamByName(bus string, device string, param string) (*DevlinkParam, error) { + return pkgHandle.DevlinkGetDeviceParamByName(bus, device, param) +} + +// DevlinkSetDeviceParam set specific parameter for devlink device +// Equivalent to: `devlink dev param set / name cmode value ` +// cmode argument should contain valid cmode value as uint8, modes are define in nl.DEVLINK_PARAM_CMODE_* constants +// value argument should have one of the following types: uint8, uint16, uint32, string, bool +func (h *Handle) DevlinkSetDeviceParam(bus string, device string, param string, cmode uint8, value interface{}) error { + // retrive the param type + p, err := h.DevlinkGetDeviceParamByName(bus, device, param) + if err != nil { + return fmt.Errorf("failed to get device param: %v", err) + } + paramType := p.Type + + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PARAM_SET, bus, device) + if err != nil { + return err + } + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_TYPE, nl.Uint8Attr(paramType))) + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_NAME, nl.ZeroTerminated(param))) + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_VALUE_CMODE, nl.Uint8Attr(cmode))) + + var valueAsBytes []byte + switch paramType { + case nl.DEVLINK_PARAM_TYPE_U8: + v, ok := value.(uint8) + if !ok { + return fmt.Errorf("unepected value type required: uint8, actual: %T", value) + } + valueAsBytes = nl.Uint8Attr(v) + case nl.DEVLINK_PARAM_TYPE_U16: + v, ok := value.(uint16) + if !ok { + return fmt.Errorf("unepected value type required: uint16, actual: %T", value) + } + valueAsBytes = nl.Uint16Attr(v) + case nl.DEVLINK_PARAM_TYPE_U32: + v, ok := value.(uint32) + if !ok { + return fmt.Errorf("unepected value type required: uint32, actual: %T", value) + } + valueAsBytes = nl.Uint32Attr(v) + case nl.DEVLINK_PARAM_TYPE_STRING: + v, ok := value.(string) + if !ok { + return fmt.Errorf("unepected value type required: string, actual: %T", value) + } + valueAsBytes = nl.ZeroTerminated(v) + case nl.DEVLINK_PARAM_TYPE_BOOL: + v, ok := value.(bool) + if !ok { + return fmt.Errorf("unepected value type required: bool, actual: %T", value) + } + if v { + valueAsBytes = []byte{} + } + default: + return fmt.Errorf("unsupported parameter type: %d", paramType) + } + if valueAsBytes != nil { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_VALUE_DATA, valueAsBytes)) + } + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevlinkSetDeviceParam set specific parameter for devlink device +// Equivalent to: `devlink dev param set / name cmode value ` +// cmode argument should contain valid cmode value as uint8, modes are define in nl.DEVLINK_PARAM_CMODE_* constants +// value argument should have one of the following types: uint8, uint16, uint32, string, bool +func DevlinkSetDeviceParam(bus string, device string, param string, cmode uint8, value interface{}) error { + return pkgHandle.DevlinkSetDeviceParam(bus, device, param, cmode, value) +} + +// DevLinkGetPortByIndex provides a pointer to devlink portand nil error, +// otherwise returns an error code. +func DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) { + return pkgHandle.DevLinkGetPortByIndex(Bus, Device, PortIndex) +} + +// DevLinkPortAdd adds a devlink port and returns a port on success +// otherwise returns nil port and an error code. +func (h *Handle) DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_NEW, Bus, Device) + if err != nil { + return nil, err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FLAVOUR, nl.Uint16Attr(Flavour))) + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_PF_NUMBER, nl.Uint16Attr(Attrs.PfNumber))) + if Flavour == nl.DEVLINK_PORT_FLAVOUR_PCI_SF && Attrs.SfNumberValid { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_SF_NUMBER, nl.Uint32Attr(Attrs.SfNumber))) + } + if Attrs.PortIndexValid { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(Attrs.PortIndex))) + } + if Attrs.ControllerValid { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, nl.Uint32Attr(Attrs.Controller))) + } + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + port, err := parseDevlinkPortMsg(respmsg) + return port, err +} + +// DevLinkPortAdd adds a devlink port and returns a port on success +// otherwise returns nil port and an error code. +func DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) { + return pkgHandle.DevLinkPortAdd(Bus, Device, Flavour, Attrs) +} + +// DevLinkPortDel deletes a devlink port and returns success or error code. +func (h *Handle) DevLinkPortDel(Bus string, Device string, PortIndex uint32) error { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_DEL, Bus, Device) + if err != nil { + return err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex))) + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevLinkPortDel deletes a devlink port and returns success or error code. +func DevLinkPortDel(Bus string, Device string, PortIndex uint32) error { + return pkgHandle.DevLinkPortDel(Bus, Device, PortIndex) +} + +// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask. +// It returns 0 on success or error code. +func (h *Handle) DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_SET, Bus, Device) + if err != nil { + return err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex))) + + fnAttr := nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FUNCTION|unix.NLA_F_NESTED, nil) + + if FnAttrs.HwAddrValid { + fnAttr.AddRtAttr(nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, []byte(FnAttrs.FnAttrs.HwAddr)) + } + + if FnAttrs.StateValid { + fnAttr.AddRtAttr(nl.DEVLINK_PORT_FN_ATTR_STATE, nl.Uint8Attr(FnAttrs.FnAttrs.State)) + } + req.AddData(fnAttr) + + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask. +// It returns 0 on success or error code. +func DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error { + return pkgHandle.DevlinkPortFnSet(Bus, Device, PortIndex, FnAttrs) +} + +// devlinkInfoGetter is function that is responsible for getting devlink info message +// this is introduced for test purpose +type devlinkInfoGetter func(bus, device string) ([]byte, error) + +// DevlinkGetDeviceInfoByName returns devlink info for selected device, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func (h *Handle) DevlinkGetDeviceInfoByName(Bus string, Device string, getInfoMsg devlinkInfoGetter) (*DevlinkDeviceInfo, error) { + info, err := h.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, getInfoMsg) + if err != nil { + return nil, err + } + + return parseInfoData(info), nil +} + +// DevlinkGetDeviceInfoByName returns devlink info for selected device, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func DevlinkGetDeviceInfoByName(Bus string, Device string) (*DevlinkDeviceInfo, error) { + return pkgHandle.DevlinkGetDeviceInfoByName(Bus, Device, pkgHandle.getDevlinkInfoMsg) +} + +// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func (h *Handle) DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string, getInfoMsg devlinkInfoGetter) (map[string]string, error) { + response, err := getInfoMsg(Bus, Device) + if err != nil { + return nil, err + } + + info, err := parseInfoMsg(response) + if err != nil { + return nil, err + } + + return info, nil +} + +// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string) (map[string]string, error) { + return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, pkgHandle.getDevlinkInfoMsg) +} + +// GetDevlinkInfo returns devlink info for target device, +// otherwise returns an error code. +func (d *DevlinkDevice) GetDevlinkInfo() (*DevlinkDeviceInfo, error) { + return pkgHandle.DevlinkGetDeviceInfoByName(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg) +} + +// GetDevlinkInfoAsMap returns devlink info for target device as a map, +// otherwise returns an error code. +func (d *DevlinkDevice) GetDevlinkInfoAsMap() (map[string]string, error) { + return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg) +} + +func (h *Handle) getDevlinkInfoMsg(bus, device string) ([]byte, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_INFO_GET, bus, device) + if err != nil { + return nil, err + } + + response, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + + if len(response) < 1 { + return nil, fmt.Errorf("getDevlinkInfoMsg: message too short") + } + + return response[0], nil +} + +func parseInfoMsg(msg []byte) (map[string]string, error) { + if len(msg) < nl.SizeofGenlmsg { + return nil, fmt.Errorf("parseInfoMsg: message too short") + } + + info := make(map[string]string) + err := collectInfoData(msg[nl.SizeofGenlmsg:], info) + + if err != nil { + return nil, err + } + + return info, nil +} + +func collectInfoData(msg []byte, data map[string]string) error { + attrs, err := nl.ParseRouteAttr(msg) + if err != nil { + return err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.DEVLINK_ATTR_INFO_DRIVER_NAME: + data["driver"] = parseInfoValue(attr.Value) + case nl.DEVLINK_ATTR_INFO_SERIAL_NUMBER: + data["serialNumber"] = parseInfoValue(attr.Value) + case nl.DEVLINK_ATTR_INFO_VERSION_RUNNING, nl.DEVLINK_ATTR_INFO_VERSION_FIXED, + nl.DEVLINK_ATTR_INFO_VERSION_STORED: + key, value, err := getNestedInfoData(attr.Value) + if err != nil { + return err + } + data[key] = value + } + } + + if len(data) == 0 { + return fmt.Errorf("collectInfoData: could not read attributes") + } + + return nil +} + +func getNestedInfoData(msg []byte) (string, string, error) { + nestedAttrs, err := nl.ParseRouteAttr(msg) + + var key, value string + + if err != nil { + return "", "", err + } + + if len(nestedAttrs) != 2 { + return "", "", fmt.Errorf("getNestedInfoData: too few attributes in nested structure") + } + + for _, nestedAttr := range nestedAttrs { + switch nestedAttr.Attr.Type { + case nl.DEVLINK_ATTR_INFO_VERSION_NAME: + key = parseInfoValue(nestedAttr.Value) + case nl.DEVLINK_ATTR_INFO_VERSION_VALUE: + value = parseInfoValue(nestedAttr.Value) + } + } + + if key == "" { + return "", "", fmt.Errorf("getNestedInfoData: key not found") + } + + if value == "" { + return "", "", fmt.Errorf("getNestedInfoData: value not found") + } + + return key, value, nil +} + +func parseInfoData(data map[string]string) *DevlinkDeviceInfo { + info := new(DevlinkDeviceInfo) + for key, value := range data { + switch key { + case "driver": + info.Driver = value + case "serialNumber": + info.SerialNumber = value + case "board.id": + info.BoardID = value + case "fw.app": + info.FwApp = value + case "fw.app.bundle_id": + info.FwAppBoundleID = value + case "fw.app.name": + info.FwAppName = value + case "fw.bundle_id": + info.FwBoundleID = value + case "fw.mgmt": + info.FwMgmt = value + case "fw.mgmt.api": + info.FwMgmtAPI = value + case "fw.mgmt.build": + info.FwMgmtBuild = value + case "fw.netlist": + info.FwNetlist = value + case "fw.netlist.build": + info.FwNetlistBuild = value + case "fw.psid.api": + info.FwPsidAPI = value + case "fw.undi": + info.FwUndi = value + } + } + return info +} + +func parseInfoValue(value []byte) string { + v := strings.ReplaceAll(string(value), "\x00", "") + return strings.TrimSpace(v) +} diff --git a/vendor/github.com/vishvananda/netlink/filter.go b/vendor/github.com/vishvananda/netlink/filter.go index 88792eab0..84e1ca7a4 100644 --- a/vendor/github.com/vishvananda/netlink/filter.go +++ b/vendor/github.com/vishvananda/netlink/filter.go @@ -19,6 +19,7 @@ type FilterAttrs struct { Parent uint32 Priority uint16 // lower is higher priority Protocol uint16 // unix.ETH_P_* + Chain *uint32 } func (q FilterAttrs) String() string { @@ -27,6 +28,11 @@ func (q FilterAttrs) String() string { type TcAct int32 +const ( + TC_ACT_EXT_SHIFT = 28 + TC_ACT_EXT_VAL_MASK = (1 << TC_ACT_EXT_SHIFT) - 1 +) + const ( TC_ACT_UNSPEC TcAct = -1 TC_ACT_OK TcAct = 0 @@ -40,6 +46,22 @@ const ( TC_ACT_JUMP TcAct = 0x10000000 ) +func getTcActExt(local int32) int32 { + return local << TC_ACT_EXT_SHIFT +} + +func getTcActGotoChain() TcAct { + return TcAct(getTcActExt(2)) +} + +func getTcActExtOpcode(combined int32) int32 { + return combined & (^TC_ACT_EXT_VAL_MASK) +} + +func TcActExtCmp(combined int32, opcode int32) bool { + return getTcActExtOpcode(combined) == opcode +} + func (a TcAct) String() string { switch a { case TC_ACT_UNSPEC: @@ -63,6 +85,9 @@ func (a TcAct) String() string { case TC_ACT_JUMP: return "jump" } + if TcActExtCmp(int32(a), int32(getTcActGotoChain())) { + return "goto" + } return fmt.Sprintf("0x%x", int32(a)) } @@ -93,17 +118,32 @@ func (a TcPolAct) String() string { } type ActionAttrs struct { - Index int - Capab int - Action TcAct - Refcnt int - Bindcnt int + Index int + Capab int + Action TcAct + Refcnt int + Bindcnt int + Statistics *ActionStatistic + Timestamp *ActionTimestamp } func (q ActionAttrs) String() string { return fmt.Sprintf("{Index: %d, Capab: %x, Action: %s, Refcnt: %d, Bindcnt: %d}", q.Index, q.Capab, q.Action.String(), q.Refcnt, q.Bindcnt) } +type ActionTimestamp struct { + Installed uint64 + LastUsed uint64 + Expires uint64 + FirstUsed uint64 +} + +func (t ActionTimestamp) String() string { + return fmt.Sprintf("Installed %d LastUsed %d Expires %d FirstUsed %d", t.Installed, t.LastUsed, t.Expires, t.FirstUsed) +} + +type ActionStatistic ClassStatistics + // Action represents an action in any supported filter. type Action interface { Attrs() *ActionAttrs @@ -112,6 +152,7 @@ type Action interface { type GenericAction struct { ActionAttrs + Chain int32 } func (action *GenericAction) Type() string { @@ -157,6 +198,39 @@ func NewConnmarkAction() *ConnmarkAction { } } +type CsumUpdateFlags uint32 + +const ( + TCA_CSUM_UPDATE_FLAG_IPV4HDR CsumUpdateFlags = 1 + TCA_CSUM_UPDATE_FLAG_ICMP CsumUpdateFlags = 2 + TCA_CSUM_UPDATE_FLAG_IGMP CsumUpdateFlags = 4 + TCA_CSUM_UPDATE_FLAG_TCP CsumUpdateFlags = 8 + TCA_CSUM_UPDATE_FLAG_UDP CsumUpdateFlags = 16 + TCA_CSUM_UPDATE_FLAG_UDPLITE CsumUpdateFlags = 32 + TCA_CSUM_UPDATE_FLAG_SCTP CsumUpdateFlags = 64 +) + +type CsumAction struct { + ActionAttrs + UpdateFlags CsumUpdateFlags +} + +func (action *CsumAction) Type() string { + return "csum" +} + +func (action *CsumAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewCsumAction() *CsumAction { + return &CsumAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} + type MirredAct uint8 func (a MirredAct) String() string { @@ -213,10 +287,11 @@ const ( type TunnelKeyAction struct { ActionAttrs - Action TunnelKeyAct - SrcAddr net.IP - DstAddr net.IP - KeyID uint32 + Action TunnelKeyAct + SrcAddr net.IP + DstAddr net.IP + KeyID uint32 + DestPort uint16 } func (action *TunnelKeyAction) Type() string { @@ -241,6 +316,7 @@ type SkbEditAction struct { PType *uint16 Priority *uint32 Mark *uint32 + Mask *uint32 } func (action *SkbEditAction) Type() string { @@ -259,6 +335,40 @@ func NewSkbEditAction() *SkbEditAction { } } +type PoliceAction struct { + ActionAttrs + Rate uint32 // in byte per second + Burst uint32 // in byte + RCellLog int + Mtu uint32 + Mpu uint16 // in byte + PeakRate uint32 // in byte per second + PCellLog int + AvRate uint32 // in byte per second + Overhead uint16 + LinkLayer int + ExceedAction TcPolAct + NotExceedAction TcPolAct +} + +func (action *PoliceAction) Type() string { + return "police" +} + +func (action *PoliceAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewPoliceAction() *PoliceAction { + return &PoliceAction{ + RCellLog: -1, + PCellLog: -1, + LinkLayer: 1, // ETHERNET + ExceedAction: TC_POLICE_RECLASSIFY, + NotExceedAction: TC_POLICE_OK, + } +} + // MatchAll filters match all packets type MatchAll struct { FilterAttrs @@ -274,20 +384,21 @@ func (filter *MatchAll) Type() string { return "matchall" } -type FilterFwAttrs struct { - ClassId uint32 - InDev string - Mask uint32 - Index uint32 - Buffer uint32 - Mtu uint32 - Mpu uint16 - Rate uint32 - AvRate uint32 - PeakRate uint32 - Action TcPolAct - Overhead uint16 - LinkLayer int +type FwFilter struct { + FilterAttrs + ClassId uint32 + InDev string + Mask uint32 + Police *PoliceAction + Actions []Action +} + +func (filter *FwFilter) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *FwFilter) Type() string { + return "fw" } type BpfFilter struct { @@ -322,3 +433,30 @@ func (filter *GenericFilter) Attrs() *FilterAttrs { func (filter *GenericFilter) Type() string { return filter.FilterType } + +type PeditAction struct { + ActionAttrs + Proto uint8 + SrcMacAddr net.HardwareAddr + DstMacAddr net.HardwareAddr + SrcIP net.IP + DstIP net.IP + SrcPort uint16 + DstPort uint16 +} + +func (p *PeditAction) Attrs() *ActionAttrs { + return &p.ActionAttrs +} + +func (p *PeditAction) Type() string { + return "pedit" +} + +func NewPeditAction() *PeditAction { + return &PeditAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} diff --git a/vendor/github.com/vishvananda/netlink/filter_linux.go b/vendor/github.com/vishvananda/netlink/filter_linux.go index c56f314cd..87cd18f8e 100644 --- a/vendor/github.com/vishvananda/netlink/filter_linux.go +++ b/vendor/github.com/vishvananda/netlink/filter_linux.go @@ -37,9 +37,11 @@ type U32 struct { ClassId uint32 Divisor uint32 // Divisor MUST be power of 2. Hash uint32 + Link uint32 RedirIndex int Sel *TcU32Sel Actions []Action + Police *PoliceAction } func (filter *U32) Attrs() *FilterAttrs { @@ -50,74 +52,185 @@ func (filter *U32) Type() string { return "u32" } -// Fw filter filters on firewall marks -// NOTE: this is in filter_linux because it refers to nl.TcPolice which -// is defined in nl/tc_linux.go -type Fw struct { +type Flower struct { FilterAttrs - ClassId uint32 - // TODO remove nl type from interface - Police nl.TcPolice - InDev string - // TODO Action - Mask uint32 - AvRate uint32 - Rtab [256]uint32 - Ptab [256]uint32 -} - -func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) { - var rtab [256]uint32 - var ptab [256]uint32 - rcellLog := -1 - pcellLog := -1 - avrate := fattrs.AvRate / 8 - police := nl.TcPolice{} - police.Rate.Rate = fattrs.Rate / 8 - police.PeakRate.Rate = fattrs.PeakRate / 8 - buffer := fattrs.Buffer - linklayer := nl.LINKLAYER_ETHERNET + DestIP net.IP + DestIPMask net.IPMask + SrcIP net.IP + SrcIPMask net.IPMask + EthType uint16 + EncDestIP net.IP + EncDestIPMask net.IPMask + EncSrcIP net.IP + EncSrcIPMask net.IPMask + EncDestPort uint16 + EncKeyId uint32 + SkipHw bool + SkipSw bool + IPProto *nl.IPProto + DestPort uint16 + SrcPort uint16 - if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC { - linklayer = fattrs.LinkLayer - } + Actions []Action +} - police.Action = int32(fattrs.Action) - if police.Rate.Rate != 0 { - police.Rate.Mpu = fattrs.Mpu - police.Rate.Overhead = fattrs.Overhead - if CalcRtable(&police.Rate, rtab[:], rcellLog, fattrs.Mtu, linklayer) < 0 { - return nil, errors.New("TBF: failed to calculate rate table") - } - police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer))) +func (filter *Flower) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *Flower) Type() string { + return "flower" +} + +func (filter *Flower) encodeIP(parent *nl.RtAttr, ip net.IP, mask net.IPMask, v4Type, v6Type int, v4MaskType, v6MaskType int) { + ipType := v4Type + maskType := v4MaskType + + encodeMask := mask + if mask == nil { + encodeMask = net.CIDRMask(32, 32) } - police.Mtu = fattrs.Mtu - if police.PeakRate.Rate != 0 { - police.PeakRate.Mpu = fattrs.Mpu - police.PeakRate.Overhead = fattrs.Overhead - if CalcRtable(&police.PeakRate, ptab[:], pcellLog, fattrs.Mtu, linklayer) < 0 { - return nil, errors.New("POLICE: failed to calculate peak rate table") + v4IP := ip.To4() + if v4IP == nil { + ipType = v6Type + maskType = v6MaskType + if mask == nil { + encodeMask = net.CIDRMask(128, 128) } + } else { + ip = v4IP } - return &Fw{ - FilterAttrs: attrs, - ClassId: fattrs.ClassId, - InDev: fattrs.InDev, - Mask: fattrs.Mask, - Police: police, - AvRate: avrate, - Rtab: rtab, - Ptab: ptab, - }, nil + parent.AddRtAttr(ipType, ip) + parent.AddRtAttr(maskType, encodeMask) } -func (filter *Fw) Attrs() *FilterAttrs { - return &filter.FilterAttrs +func (filter *Flower) encode(parent *nl.RtAttr) error { + if filter.EthType != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ETH_TYPE, htons(filter.EthType)) + } + if filter.SrcIP != nil { + filter.encodeIP(parent, filter.SrcIP, filter.SrcIPMask, + nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC, + nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK) + } + if filter.DestIP != nil { + filter.encodeIP(parent, filter.DestIP, filter.DestIPMask, + nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST, + nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK) + } + if filter.EncSrcIP != nil { + filter.encodeIP(parent, filter.EncSrcIP, filter.EncSrcIPMask, + nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC, + nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK) + } + if filter.EncDestIP != nil { + filter.encodeIP(parent, filter.EncDestIP, filter.EncSrcIPMask, + nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST, + nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK) + } + if filter.EncDestPort != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT, htons(filter.EncDestPort)) + } + if filter.EncKeyId != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_KEY_ID, htonl(filter.EncKeyId)) + } + if filter.IPProto != nil { + ipproto := *filter.IPProto + parent.AddRtAttr(nl.TCA_FLOWER_KEY_IP_PROTO, ipproto.Serialize()) + if filter.SrcPort != 0 { + switch ipproto { + case nl.IPPROTO_TCP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_TCP_SRC, htons(filter.SrcPort)) + case nl.IPPROTO_UDP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_UDP_SRC, htons(filter.SrcPort)) + case nl.IPPROTO_SCTP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_SCTP_SRC, htons(filter.SrcPort)) + } + } + if filter.DestPort != 0 { + switch ipproto { + case nl.IPPROTO_TCP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_TCP_DST, htons(filter.DestPort)) + case nl.IPPROTO_UDP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_UDP_DST, htons(filter.DestPort)) + case nl.IPPROTO_SCTP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_SCTP_DST, htons(filter.DestPort)) + } + } + } + + var flags uint32 = 0 + if filter.SkipHw { + flags |= nl.TCA_CLS_FLAGS_SKIP_HW + } + if filter.SkipSw { + flags |= nl.TCA_CLS_FLAGS_SKIP_SW + } + parent.AddRtAttr(nl.TCA_FLOWER_FLAGS, htonl(flags)) + + actionsAttr := parent.AddRtAttr(nl.TCA_FLOWER_ACT, nil) + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { + return err + } + return nil } -func (filter *Fw) Type() string { - return "fw" +func (filter *Flower) decode(data []syscall.NetlinkRouteAttr) error { + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_FLOWER_KEY_ETH_TYPE: + filter.EthType = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC: + filter.SrcIP = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK: + filter.SrcIPMask = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST: + filter.DestIP = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK: + filter.DestIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC: + filter.EncSrcIP = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK: + filter.EncSrcIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST: + filter.EncDestIP = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK: + filter.EncDestIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT: + filter.EncDestPort = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_ENC_KEY_ID: + filter.EncKeyId = ntohl(datum.Value) + case nl.TCA_FLOWER_KEY_IP_PROTO: + val := new(nl.IPProto) + *val = nl.IPProto(datum.Value[0]) + filter.IPProto = val + case nl.TCA_FLOWER_KEY_TCP_SRC, nl.TCA_FLOWER_KEY_UDP_SRC, nl.TCA_FLOWER_KEY_SCTP_SRC: + filter.SrcPort = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_TCP_DST, nl.TCA_FLOWER_KEY_UDP_DST, nl.TCA_FLOWER_KEY_SCTP_DST: + filter.DestPort = ntohs(datum.Value) + case nl.TCA_FLOWER_ACT: + tables, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return err + } + filter.Actions, err = parseActions(tables) + if err != nil { + return err + } + case nl.TCA_FLOWER_FLAGS: + attr := nl.DeserializeUint32Bitfield(datum.Value) + skipSw := attr.Value & nl.TCA_CLS_FLAGS_SKIP_HW + skipHw := attr.Value & nl.TCA_CLS_FLAGS_SKIP_SW + if skipSw != 0 { + filter.SkipSw = true + } + if skipHw != 0 { + filter.SkipHw = true + } + } + } + return nil } // FilterDel will delete a filter from the system. @@ -129,19 +242,7 @@ func FilterDel(filter Filter) error { // FilterDel will delete a filter from the system. // Equivalent to: `tc filter del $filter` func (h *Handle) FilterDel(filter Filter) error { - req := h.newNetlinkRequest(unix.RTM_DELTFILTER, unix.NLM_F_ACK) - base := filter.Attrs() - msg := &nl.TcMsg{ - Family: nl.FAMILY_ALL, - Ifindex: int32(base.LinkIndex), - Handle: base.Handle, - Parent: base.Parent, - Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), - } - req.AddData(msg) - - _, err := req.Execute(unix.NETLINK_ROUTE, 0) - return err + return h.filterModify(filter, unix.RTM_DELTFILTER, 0) } // FilterAdd will add a filter to the system. @@ -153,7 +254,7 @@ func FilterAdd(filter Filter) error { // FilterAdd will add a filter to the system. // Equivalent to: `tc filter add $filter` func (h *Handle) FilterAdd(filter Filter) error { - return h.filterModify(filter, unix.NLM_F_CREATE|unix.NLM_F_EXCL) + return h.filterModify(filter, unix.RTM_NEWTFILTER, unix.NLM_F_CREATE|unix.NLM_F_EXCL) } // FilterReplace will replace a filter. @@ -165,12 +266,11 @@ func FilterReplace(filter Filter) error { // FilterReplace will replace a filter. // Equivalent to: `tc filter replace $filter` func (h *Handle) FilterReplace(filter Filter) error { - return h.filterModify(filter, unix.NLM_F_CREATE) + return h.filterModify(filter, unix.RTM_NEWTFILTER, unix.NLM_F_CREATE) } -func (h *Handle) filterModify(filter Filter, flags int) error { - native = nl.NativeEndian() - req := h.newNetlinkRequest(unix.RTM_NEWTFILTER, flags|unix.NLM_F_ACK) +func (h *Handle) filterModify(filter Filter, proto, flags int) error { + req := h.newNetlinkRequest(proto, flags|unix.NLM_F_ACK) base := filter.Attrs() msg := &nl.TcMsg{ Family: nl.FAMILY_ALL, @@ -180,6 +280,9 @@ func (h *Handle) filterModify(filter Filter, flags int) error { Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), } req.AddData(msg) + if filter.Attrs().Chain != nil { + req.AddData(nl.NewRtAttr(nl.TCA_CHAIN, nl.Uint32Attr(*filter.Attrs().Chain))) + } req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type()))) options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) @@ -226,6 +329,15 @@ func (h *Handle) filterModify(filter Filter, flags int) error { if filter.Hash != 0 { options.AddRtAttr(nl.TCA_U32_HASH, nl.Uint32Attr(filter.Hash)) } + if filter.Link != 0 { + options.AddRtAttr(nl.TCA_U32_LINK, nl.Uint32Attr(filter.Link)) + } + if filter.Police != nil { + police := options.AddRtAttr(nl.TCA_U32_POLICE, nil) + if err := encodePolice(police, filter.Police); err != nil { + return err + } + } actionsAttr := options.AddRtAttr(nl.TCA_U32_ACT, nil) // backwards compatibility if filter.RedirIndex != 0 { @@ -234,7 +346,7 @@ func (h *Handle) filterModify(filter Filter, flags int) error { if err := EncodeActions(actionsAttr, filter.Actions); err != nil { return err } - case *Fw: + case *FwFilter: if filter.Mask != 0 { b := make([]byte, 4) native.PutUint32(b, filter.Mask) @@ -243,17 +355,10 @@ func (h *Handle) filterModify(filter Filter, flags int) error { if filter.InDev != "" { options.AddRtAttr(nl.TCA_FW_INDEV, nl.ZeroTerminated(filter.InDev)) } - if (filter.Police != nl.TcPolice{}) { - + if filter.Police != nil { police := options.AddRtAttr(nl.TCA_FW_POLICE, nil) - police.AddRtAttr(nl.TCA_POLICE_TBF, filter.Police.Serialize()) - if (filter.Police.Rate != nl.TcRateSpec{}) { - payload := SerializeRtab(filter.Rtab) - police.AddRtAttr(nl.TCA_POLICE_RATE, payload) - } - if (filter.Police.PeakRate != nl.TcRateSpec{}) { - payload := SerializeRtab(filter.Ptab) - police.AddRtAttr(nl.TCA_POLICE_PEAKRATE, payload) + if err := encodePolice(police, filter.Police); err != nil { + return err } } if filter.ClassId != 0 { @@ -261,6 +366,10 @@ func (h *Handle) filterModify(filter Filter, flags int) error { native.PutUint32(b, filter.ClassId) options.AddRtAttr(nl.TCA_FW_CLASSID, b) } + actionsAttr := options.AddRtAttr(nl.TCA_FW_ACT, nil) + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { + return err + } case *BpfFilter: var bpfFlags uint32 if filter.ClassId != 0 { @@ -284,8 +393,11 @@ func (h *Handle) filterModify(filter Filter, flags int) error { if filter.ClassId != 0 { options.AddRtAttr(nl.TCA_MATCHALL_CLASSID, nl.Uint32Attr(filter.ClassId)) } + case *Flower: + if err := filter.encode(options); err != nil { + return err + } } - req.AddData(options) _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err @@ -347,11 +459,13 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) { case "u32": filter = &U32{} case "fw": - filter = &Fw{} + filter = &FwFilter{} case "bpf": filter = &BpfFilter{} case "matchall": filter = &MatchAll{} + case "flower": + filter = &Flower{} default: filter = &GenericFilter{FilterType: filterType} } @@ -381,9 +495,18 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) { if err != nil { return nil, err } + case "flower": + detailed, err = parseFlowerData(filter, data) + if err != nil { + return nil, err + } default: detailed = true } + case nl.TCA_CHAIN: + val := new(uint32) + *val = native.Uint32(attr.Value) + base.Chain = val } } // only return the detailed version of the filter @@ -412,6 +535,61 @@ func toAttrs(tcgen *nl.TcGen, attrs *ActionAttrs) { attrs.Bindcnt = int(tcgen.Bindcnt) } +func toTimeStamp(tcf *nl.Tcf) *ActionTimestamp { + return &ActionTimestamp{ + Installed: tcf.Install, + LastUsed: tcf.LastUse, + Expires: tcf.Expires, + FirstUsed: tcf.FirstUse} +} + +func encodePolice(attr *nl.RtAttr, action *PoliceAction) error { + var rtab [256]uint32 + var ptab [256]uint32 + police := nl.TcPolice{} + police.Index = uint32(action.Attrs().Index) + police.Bindcnt = int32(action.Attrs().Bindcnt) + police.Capab = uint32(action.Attrs().Capab) + police.Refcnt = int32(action.Attrs().Refcnt) + police.Rate.Rate = action.Rate + police.PeakRate.Rate = action.PeakRate + police.Action = int32(action.ExceedAction) + + if police.Rate.Rate != 0 { + police.Rate.Mpu = action.Mpu + police.Rate.Overhead = action.Overhead + if CalcRtable(&police.Rate, rtab[:], action.RCellLog, action.Mtu, action.LinkLayer) < 0 { + return errors.New("TBF: failed to calculate rate table") + } + police.Burst = Xmittime(uint64(police.Rate.Rate), action.Burst) + } + + police.Mtu = action.Mtu + if police.PeakRate.Rate != 0 { + police.PeakRate.Mpu = action.Mpu + police.PeakRate.Overhead = action.Overhead + if CalcRtable(&police.PeakRate, ptab[:], action.PCellLog, action.Mtu, action.LinkLayer) < 0 { + return errors.New("POLICE: failed to calculate peak rate table") + } + } + + attr.AddRtAttr(nl.TCA_POLICE_TBF, police.Serialize()) + if police.Rate.Rate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_RATE, SerializeRtab(rtab)) + } + if police.PeakRate.Rate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_PEAKRATE, SerializeRtab(ptab)) + } + if action.AvRate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_AVRATE, nl.Uint32Attr(action.AvRate)) + } + if action.NotExceedAction != 0 { + attr.AddRtAttr(nl.TCA_POLICE_RESULT, nl.Uint32Attr(uint32(action.NotExceedAction))) + } + + return nil +} + func EncodeActions(attr *nl.RtAttr, actions []Action) error { tabIndex := int(nl.TCA_ACT_TAB) @@ -419,6 +597,14 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { switch action := action.(type) { default: return fmt.Errorf("unknown action type %s", action.Type()) + case *PoliceAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("police")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + if err := encodePolice(aopts, action); err != nil { + return err + } case *MirredAction: table := attr.AddRtAttr(tabIndex, nil) tabIndex++ @@ -456,6 +642,9 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { } else { return fmt.Errorf("invalid dst addr %s for tunnel_key action", action.DstAddr) } + if action.DestPort != 0 { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_DST_PORT, htons(action.DestPort)) + } } case *SkbEditAction: table := attr.AddRtAttr(tabIndex, nil) @@ -477,6 +666,9 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { if action.Mark != nil { aopts.AddRtAttr(nl.TCA_SKBEDIT_MARK, nl.Uint32Attr(*action.Mark)) } + if action.Mask != nil { + aopts.AddRtAttr(nl.TCA_SKBEDIT_MASK, nl.Uint32Attr(*action.Mask)) + } case *ConnmarkAction: table := attr.AddRtAttr(tabIndex, nil) tabIndex++ @@ -487,6 +679,16 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { } toTcGen(action.Attrs(), &connmark.TcGen) aopts.AddRtAttr(nl.TCA_CONNMARK_PARMS, connmark.Serialize()) + case *CsumAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("csum")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + csum := nl.TcCsum{ + UpdateFlags: uint32(action.UpdateFlags), + } + toTcGen(action.Attrs(), &csum.TcGen) + aopts.AddRtAttr(nl.TCA_CSUM_PARMS, csum.Serialize()) case *BpfAction: table := attr.AddRtAttr(tabIndex, nil) tabIndex++ @@ -505,16 +707,64 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { gen := nl.TcGen{} toTcGen(action.Attrs(), &gen) aopts.AddRtAttr(nl.TCA_GACT_PARMS, gen.Serialize()) + case *PeditAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + pedit := nl.TcPedit{} + if action.SrcMacAddr != nil { + pedit.SetEthSrc(action.SrcMacAddr) + } + if action.DstMacAddr != nil { + pedit.SetEthDst(action.DstMacAddr) + } + if action.SrcIP != nil { + pedit.SetSrcIP(action.SrcIP) + } + if action.DstIP != nil { + pedit.SetDstIP(action.DstIP) + } + if action.SrcPort != 0 { + pedit.SetSrcPort(action.SrcPort, action.Proto) + } + if action.DstPort != 0 { + pedit.SetDstPort(action.DstPort, action.Proto) + } + pedit.Encode(table) } } return nil } +func parsePolice(data syscall.NetlinkRouteAttr, police *PoliceAction) { + switch data.Attr.Type { + case nl.TCA_POLICE_RESULT: + police.NotExceedAction = TcPolAct(native.Uint32(data.Value[0:4])) + case nl.TCA_POLICE_AVRATE: + police.AvRate = native.Uint32(data.Value[0:4]) + case nl.TCA_POLICE_TBF: + p := *nl.DeserializeTcPolice(data.Value) + police.ActionAttrs = ActionAttrs{} + police.Attrs().Index = int(p.Index) + police.Attrs().Bindcnt = int(p.Bindcnt) + police.Attrs().Capab = int(p.Capab) + police.Attrs().Refcnt = int(p.Refcnt) + police.ExceedAction = TcPolAct(p.Action) + police.Rate = p.Rate.Rate + police.PeakRate = p.PeakRate.Rate + police.Burst = Xmitsize(uint64(p.Rate.Rate), p.Burst) + police.Mtu = p.Mtu + police.LinkLayer = int(p.Rate.Linklayer) & nl.TC_LINKLAYER_MASK + police.Overhead = p.Rate.Overhead + } +} + func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { var actions []Action for _, table := range tables { var action Action var actionType string + var actionnStatistic *ActionStatistic + var actionTimestamp *ActionTimestamp aattrs, err := nl.ParseRouteAttr(table.Value) if err != nil { return nil, err @@ -532,12 +782,18 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action = &BpfAction{} case "connmark": action = &ConnmarkAction{} + case "csum": + action = &CsumAction{} case "gact": action = &GenericAction{} case "tunnel_key": action = &TunnelKeyAction{} case "skbedit": action = &SkbEditAction{} + case "police": + action = &PoliceAction{} + case "pedit": + action = &PeditAction{} default: break nextattr } @@ -556,7 +812,11 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { toAttrs(&mirred.TcGen, action.Attrs()) action.(*MirredAction).Ifindex = int(mirred.Ifindex) action.(*MirredAction).MirredAction = MirredAct(mirred.Eaction) + case nl.TCA_MIRRED_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } + case "tunnel_key": switch adatum.Attr.Type { case nl.TCA_TUNNEL_KEY_PARMS: @@ -566,12 +826,15 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action.(*TunnelKeyAction).Action = TunnelKeyAct(tun.Action) case nl.TCA_TUNNEL_KEY_ENC_KEY_ID: action.(*TunnelKeyAction).KeyID = networkOrder.Uint32(adatum.Value[0:4]) - case nl.TCA_TUNNEL_KEY_ENC_IPV6_SRC: - case nl.TCA_TUNNEL_KEY_ENC_IPV4_SRC: - action.(*TunnelKeyAction).SrcAddr = net.IP(adatum.Value[:]) - case nl.TCA_TUNNEL_KEY_ENC_IPV6_DST: - case nl.TCA_TUNNEL_KEY_ENC_IPV4_DST: - action.(*TunnelKeyAction).DstAddr = net.IP(adatum.Value[:]) + case nl.TCA_TUNNEL_KEY_ENC_IPV6_SRC, nl.TCA_TUNNEL_KEY_ENC_IPV4_SRC: + action.(*TunnelKeyAction).SrcAddr = adatum.Value[:] + case nl.TCA_TUNNEL_KEY_ENC_IPV6_DST, nl.TCA_TUNNEL_KEY_ENC_IPV4_DST: + action.(*TunnelKeyAction).DstAddr = adatum.Value[:] + case nl.TCA_TUNNEL_KEY_ENC_DST_PORT: + action.(*TunnelKeyAction).DestPort = ntohs(adatum.Value) + case nl.TCA_TUNNEL_KEY_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "skbedit": switch adatum.Attr.Type { @@ -582,6 +845,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { case nl.TCA_SKBEDIT_MARK: mark := native.Uint32(adatum.Value[0:4]) action.(*SkbEditAction).Mark = &mark + case nl.TCA_SKBEDIT_MASK: + mask := native.Uint32(adatum.Value[0:4]) + action.(*SkbEditAction).Mask = &mask case nl.TCA_SKBEDIT_PRIORITY: priority := native.Uint32(adatum.Value[0:4]) action.(*SkbEditAction).Priority = &priority @@ -591,6 +857,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { case nl.TCA_SKBEDIT_QUEUE_MAPPING: mapping := native.Uint16(adatum.Value[0:2]) action.(*SkbEditAction).QueueMapping = &mapping + case nl.TCA_SKBEDIT_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "bpf": switch adatum.Attr.Type { @@ -601,6 +870,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action.(*BpfAction).Fd = int(native.Uint32(adatum.Value[0:4])) case nl.TCA_ACT_BPF_NAME: action.(*BpfAction).Name = string(adatum.Value[:len(adatum.Value)-1]) + case nl.TCA_ACT_BPF_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "connmark": switch adatum.Attr.Type { @@ -609,24 +881,53 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action.(*ConnmarkAction).ActionAttrs = ActionAttrs{} toAttrs(&connmark.TcGen, action.Attrs()) action.(*ConnmarkAction).Zone = connmark.Zone + case nl.TCA_CONNMARK_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) + } + case "csum": + switch adatum.Attr.Type { + case nl.TCA_CSUM_PARMS: + csum := *nl.DeserializeTcCsum(adatum.Value) + action.(*CsumAction).ActionAttrs = ActionAttrs{} + toAttrs(&csum.TcGen, action.Attrs()) + action.(*CsumAction).UpdateFlags = CsumUpdateFlags(csum.UpdateFlags) + case nl.TCA_CSUM_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "gact": switch adatum.Attr.Type { case nl.TCA_GACT_PARMS: gen := *nl.DeserializeTcGen(adatum.Value) toAttrs(&gen, action.Attrs()) + if action.Attrs().Action.String() == "goto" { + action.(*GenericAction).Chain = TC_ACT_EXT_VAL_MASK & gen.Action + } + case nl.TCA_GACT_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } + case "police": + parsePolice(adatum, action.(*PoliceAction)) } } + case nl.TCA_ACT_STATS: + s, err := parseTcStats2(aattr.Value) + if err != nil { + return nil, err + } + actionnStatistic = (*ActionStatistic)(s) } } + action.Attrs().Statistics = actionnStatistic + action.Attrs().Timestamp = actionTimestamp actions = append(actions, action) } return actions, nil } func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { - native = nl.NativeEndian() u32 := filter.(*U32) detailed := false for _, datum := range data { @@ -658,20 +959,28 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) u32.RedirIndex = int(action.Ifindex) } } + case nl.TCA_U32_POLICE: + var police PoliceAction + adata, _ := nl.ParseRouteAttr(datum.Value) + for _, aattr := range adata { + parsePolice(aattr, &police) + } + u32.Police = &police case nl.TCA_U32_CLASSID: u32.ClassId = native.Uint32(datum.Value) case nl.TCA_U32_DIVISOR: u32.Divisor = native.Uint32(datum.Value) case nl.TCA_U32_HASH: u32.Hash = native.Uint32(datum.Value) + case nl.TCA_U32_LINK: + u32.Link = native.Uint32(datum.Value) } } return detailed, nil } func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { - native = nl.NativeEndian() - fw := filter.(*Fw) + fw := filter.(*FwFilter) detailed := true for _, datum := range data { switch datum.Attr.Type { @@ -682,16 +991,20 @@ func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { case nl.TCA_FW_INDEV: fw.InDev = string(datum.Value[:len(datum.Value)-1]) case nl.TCA_FW_POLICE: + var police PoliceAction adata, _ := nl.ParseRouteAttr(datum.Value) for _, aattr := range adata { - switch aattr.Attr.Type { - case nl.TCA_POLICE_TBF: - fw.Police = *nl.DeserializeTcPolice(aattr.Value) - case nl.TCA_POLICE_RATE: - fw.Rtab = DeserializeRtab(aattr.Value) - case nl.TCA_POLICE_PEAKRATE: - fw.Ptab = DeserializeRtab(aattr.Value) - } + parsePolice(aattr, &police) + } + fw.Police = &police + case nl.TCA_FW_ACT: + tables, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return detailed, err + } + fw.Actions, err = parseActions(tables) + if err != nil { + return detailed, err } } } @@ -699,7 +1012,6 @@ func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { } func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { - native = nl.NativeEndian() bpf := filter.(*BpfFilter) detailed := true for _, datum := range data { @@ -718,14 +1030,13 @@ func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) case nl.TCA_BPF_ID: bpf.Id = int(native.Uint32(datum.Value[0:4])) case nl.TCA_BPF_TAG: - bpf.Tag = hex.EncodeToString(datum.Value[:len(datum.Value)-1]) + bpf.Tag = hex.EncodeToString(datum.Value) } } return detailed, nil } func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { - native = nl.NativeEndian() matchall := filter.(*MatchAll) detailed := true for _, datum := range data { @@ -746,6 +1057,10 @@ func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, er return detailed, nil } +func parseFlowerData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + return true, filter.(*Flower).decode(data) +} + func AlignToAtm(size uint) uint { var linksize, cells int cells = int(size / nl.ATM_CELL_PAYLOAD) @@ -783,7 +1098,7 @@ func CalcRtable(rate *nl.TcRateSpec, rtab []uint32, cellLog int, mtu uint32, lin } for i := 0; i < 256; i++ { sz = AdjustSize(uint((i+1)<= nl.IPSET_ERR_PRIVATE { + err = nl.IPSetError(uintptr(errno)) + } + } + return +} + +func ipsetUnserialize(msgs [][]byte) (result IPSetResult) { + for _, msg := range msgs { + result.unserialize(msg) + } + return result +} + +func (result *IPSetResult) unserialize(msg []byte) { + result.Nfgenmsg = nl.DeserializeNfgenmsg(msg) + + for attr := range nl.ParseAttributes(msg[4:]) { + switch attr.Type { + case nl.IPSET_ATTR_PROTOCOL: + result.Protocol = attr.Value[0] + case nl.IPSET_ATTR_SETNAME: + result.SetName = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_COMMENT: + result.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_TYPENAME: + result.TypeName = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_REVISION: + result.Revision = attr.Value[0] + case nl.IPSET_ATTR_FAMILY: + result.Family = attr.Value[0] + case nl.IPSET_ATTR_FLAGS: + result.Flags = attr.Value[0] + case nl.IPSET_ATTR_DATA | nl.NLA_F_NESTED: + result.parseAttrData(attr.Value) + case nl.IPSET_ATTR_ADT | nl.NLA_F_NESTED: + result.parseAttrADT(attr.Value) + case nl.IPSET_ATTR_PROTOCOL_MIN: + result.ProtocolMinVersion = attr.Value[0] + case nl.IPSET_ATTR_MARKMASK: + result.MarkMask = attr.Uint32() + default: + log.Printf("unknown ipset attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) + } + } +} + +func (result *IPSetResult) parseAttrData(data []byte) { + for attr := range nl.ParseAttributes(data) { + switch attr.Type { + case nl.IPSET_ATTR_HASHSIZE | nl.NLA_F_NET_BYTEORDER: + result.HashSize = attr.Uint32() + case nl.IPSET_ATTR_MAXELEM | nl.NLA_F_NET_BYTEORDER: + result.MaxElements = attr.Uint32() + case nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + result.Timeout = &val + case nl.IPSET_ATTR_ELEMENTS | nl.NLA_F_NET_BYTEORDER: + result.NumEntries = attr.Uint32() + case nl.IPSET_ATTR_REFERENCES | nl.NLA_F_NET_BYTEORDER: + result.References = attr.Uint32() + case nl.IPSET_ATTR_MEMSIZE | nl.NLA_F_NET_BYTEORDER: + result.SizeInMemory = attr.Uint32() + case nl.IPSET_ATTR_CADT_FLAGS | nl.NLA_F_NET_BYTEORDER: + result.CadtFlags = attr.Uint32() + case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED: + for nested := range nl.ParseAttributes(attr.Value) { + switch nested.Type { + case nl.IPSET_ATTR_IP | nl.NLA_F_NET_BYTEORDER: + result.Entries = append(result.Entries, IPSetEntry{IP: nested.Value}) + case nl.IPSET_ATTR_IP: + result.IPFrom = nested.Value + default: + log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK) + } + } + case nl.IPSET_ATTR_IP_TO | nl.NLA_F_NESTED: + for nested := range nl.ParseAttributes(attr.Value) { + switch nested.Type { + case nl.IPSET_ATTR_IP: + result.IPTo = nested.Value + default: + log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK) + } + } + case nl.IPSET_ATTR_PORT_FROM | nl.NLA_F_NET_BYTEORDER: + result.PortFrom = networkOrder.Uint16(attr.Value) + case nl.IPSET_ATTR_PORT_TO | nl.NLA_F_NET_BYTEORDER: + result.PortTo = networkOrder.Uint16(attr.Value) + case nl.IPSET_ATTR_CADT_LINENO | nl.NLA_F_NET_BYTEORDER: + result.LineNo = attr.Uint32() + case nl.IPSET_ATTR_COMMENT: + result.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_MARKMASK: + result.MarkMask = attr.Uint32() + default: + log.Printf("unknown ipset data attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) + } + } +} + +func (result *IPSetResult) parseAttrADT(data []byte) { + for attr := range nl.ParseAttributes(data) { + switch attr.Type { + case nl.IPSET_ATTR_DATA | nl.NLA_F_NESTED: + result.Entries = append(result.Entries, parseIPSetEntry(attr.Value)) + default: + log.Printf("unknown ADT attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) + } + } +} + +func parseIPSetEntry(data []byte) (entry IPSetEntry) { + for attr := range nl.ParseAttributes(data) { + switch attr.Type { + case nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + entry.Timeout = &val + case nl.IPSET_ATTR_BYTES | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint64() + entry.Bytes = &val + case nl.IPSET_ATTR_PACKETS | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint64() + entry.Packets = &val + case nl.IPSET_ATTR_ETHER: + entry.MAC = net.HardwareAddr(attr.Value) + case nl.IPSET_ATTR_IP: + entry.IP = net.IP(attr.Value) + case nl.IPSET_ATTR_COMMENT: + entry.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED: + for attr := range nl.ParseAttributes(attr.Value) { + switch attr.Type { + case nl.IPSET_ATTR_IPADDR_IPV4, nl.IPSET_ATTR_IPADDR_IPV6: + entry.IP = net.IP(attr.Value) + default: + log.Printf("unknown nested ADT attribute from kernel: %+v", attr) + } + } + case nl.IPSET_ATTR_IP2 | nl.NLA_F_NESTED: + for attr := range nl.ParseAttributes(attr.Value) { + switch attr.Type { + case nl.IPSET_ATTR_IPADDR_IPV4, nl.IPSET_ATTR_IPADDR_IPV6: + entry.IP2 = net.IP(attr.Value) + default: + log.Printf("unknown nested ADT attribute from kernel: %+v", attr) + } + } + case nl.IPSET_ATTR_CIDR: + entry.CIDR = attr.Value[0] + case nl.IPSET_ATTR_CIDR2: + entry.CIDR2 = attr.Value[0] + case nl.IPSET_ATTR_PORT | nl.NLA_F_NET_BYTEORDER: + val := networkOrder.Uint16(attr.Value) + entry.Port = &val + case nl.IPSET_ATTR_PROTO: + val := attr.Value[0] + entry.Protocol = &val + case nl.IPSET_ATTR_IFACE: + entry.IFace = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + entry.Mark = &val + default: + log.Printf("unknown ADT attribute from kernel: %+v", attr) + } + } + return +} diff --git a/vendor/github.com/vishvananda/netlink/link.go b/vendor/github.com/vishvananda/netlink/link.go index 886d88d1b..f820cdb67 100644 --- a/vendor/github.com/vishvananda/netlink/link.go +++ b/vendor/github.com/vishvananda/netlink/link.go @@ -22,31 +22,41 @@ type ( // LinkAttrs represents data shared by most link types type LinkAttrs struct { - Index int - MTU int - TxQLen int // Transmit Queue Length - Name string - HardwareAddr net.HardwareAddr - Flags net.Flags - RawFlags uint32 - ParentIndex int // index of the parent link device - MasterIndex int // must be the index of a bridge - Namespace interface{} // nil | NsPid | NsFd - Alias string - Statistics *LinkStatistics - Promisc int - Xdp *LinkXdp - EncapType string - Protinfo *Protinfo - OperState LinkOperState - NetNsID int - NumTxQueues int - NumRxQueues int - GSOMaxSize uint32 - GSOMaxSegs uint32 - Vfs []VfInfo // virtual functions available on link - Group uint32 - Slave LinkSlave + Index int + MTU int + TxQLen int // Transmit Queue Length + Name string + HardwareAddr net.HardwareAddr + Flags net.Flags + RawFlags uint32 + ParentIndex int // index of the parent link device + MasterIndex int // must be the index of a bridge + Namespace interface{} // nil | NsPid | NsFd + Alias string + AltNames []string + Statistics *LinkStatistics + Promisc int + Allmulti int + Multi int + Xdp *LinkXdp + EncapType string + Protinfo *Protinfo + OperState LinkOperState + PhysSwitchID int + NetNsID int + NumTxQueues int + NumRxQueues int + TSOMaxSegs uint32 + TSOMaxSize uint32 + GSOMaxSegs uint32 + GSOMaxSize uint32 + GROMaxSize uint32 + GSOIPv4MaxSize uint32 + GROIPv4MaxSize uint32 + Vfs []VfInfo // virtual functions available on link + Group uint32 + PermHWAddr net.HardwareAddr + Slave LinkSlave } // LinkSlave represents a slave device. @@ -60,11 +70,23 @@ type VfInfo struct { Mac net.HardwareAddr Vlan int Qos int + VlanProto int TxRate int // IFLA_VF_TX_RATE Max TxRate Spoofchk bool LinkState uint32 MaxTxRate uint32 // IFLA_VF_RATE Max TxRate MinTxRate uint32 // IFLA_VF_RATE Min TxRate + RxPackets uint64 + TxPackets uint64 + RxBytes uint64 + TxBytes uint64 + Multicast uint64 + Broadcast uint64 + RxDropped uint64 + TxDropped uint64 + + RssQuery uint32 + Trust uint32 } // LinkOperState represents the values of the IFLA_OPERSTATE link @@ -103,7 +125,8 @@ func (s LinkOperState) String() string { // NewLinkAttrs returns LinkAttrs structure filled with default values func NewLinkAttrs() LinkAttrs { return LinkAttrs{ - TxQLen: -1, + NetNsID: -1, + TxQLen: -1, } } @@ -196,10 +219,11 @@ type LinkStatistics64 struct { } type LinkXdp struct { - Fd int - Attached bool - Flags uint32 - ProgId uint32 + Fd int + Attached bool + AttachMode uint32 + Flags uint32 + ProgId uint32 } // Device links cannot be created via netlink. These links @@ -246,8 +270,11 @@ func (ifb *Ifb) Type() string { type Bridge struct { LinkAttrs MulticastSnooping *bool + AgeingTime *uint32 HelloTime *uint32 VlanFiltering *bool + VlanDefaultPVID *uint16 + GroupFwdMask *uint16 } func (bridge *Bridge) Attrs() *LinkAttrs { @@ -291,6 +318,9 @@ type Macvlan struct { // MACAddrs is only populated for Macvlan SOURCE links MACAddrs []net.HardwareAddr + + BCQueueLen uint32 + UsedBCQueueLen uint32 } func (macvlan *Macvlan) Attrs() *LinkAttrs { @@ -333,11 +363,52 @@ func (tuntap *Tuntap) Type() string { return "tuntap" } +type NetkitMode uint32 + +const ( + NETKIT_MODE_L2 NetkitMode = iota + NETKIT_MODE_L3 +) + +type NetkitPolicy int + +const ( + NETKIT_POLICY_FORWARD NetkitPolicy = 0 + NETKIT_POLICY_BLACKHOLE NetkitPolicy = 2 +) + +func (n *Netkit) IsPrimary() bool { + return n.isPrimary +} + +// SetPeerAttrs will not take effect if trying to modify an existing netkit device +func (n *Netkit) SetPeerAttrs(Attrs *LinkAttrs) { + n.peerLinkAttrs = *Attrs +} + +type Netkit struct { + LinkAttrs + Mode NetkitMode + Policy NetkitPolicy + PeerPolicy NetkitPolicy + isPrimary bool + peerLinkAttrs LinkAttrs +} + +func (n *Netkit) Attrs() *LinkAttrs { + return &n.LinkAttrs +} + +func (n *Netkit) Type() string { + return "netkit" +} + // Veth devices must specify PeerName on create type Veth struct { LinkAttrs PeerName string // veth on create only PeerHardwareAddr net.HardwareAddr + PeerNamespace interface{} } func (veth *Veth) Attrs() *LinkAttrs { @@ -348,6 +419,19 @@ func (veth *Veth) Type() string { return "veth" } +// Wireguard represent links of type "wireguard", see https://www.wireguard.com/ +type Wireguard struct { + LinkAttrs +} + +func (wg *Wireguard) Attrs() *LinkAttrs { + return &wg.LinkAttrs +} + +func (wg *Wireguard) Type() string { + return "wireguard" +} + // GenericLink links represent types that are not currently understood // by this netlink library. type GenericLink struct { @@ -428,6 +512,19 @@ func (ipvlan *IPVlan) Type() string { return "ipvlan" } +// IPVtap - IPVtap is a virtual interfaces based on ipvlan +type IPVtap struct { + IPVlan +} + +func (ipvtap *IPVtap) Attrs() *LinkAttrs { + return &ipvtap.LinkAttrs +} + +func (ipvtap IPVtap) Type() string { + return "ipvtap" +} + // VlanProtocol type type VlanProtocol int @@ -527,6 +624,27 @@ const ( BOND_ARP_VALIDATE_ALL ) +var bondArpValidateToString = map[BondArpValidate]string{ + BOND_ARP_VALIDATE_NONE: "none", + BOND_ARP_VALIDATE_ACTIVE: "active", + BOND_ARP_VALIDATE_BACKUP: "backup", + BOND_ARP_VALIDATE_ALL: "none", +} +var StringToBondArpValidateMap = map[string]BondArpValidate{ + "none": BOND_ARP_VALIDATE_NONE, + "active": BOND_ARP_VALIDATE_ACTIVE, + "backup": BOND_ARP_VALIDATE_BACKUP, + "all": BOND_ARP_VALIDATE_ALL, +} + +func (b BondArpValidate) String() string { + s, ok := bondArpValidateToString[b] + if !ok { + return fmt.Sprintf("BondArpValidate(%d)", b) + } + return s +} + // BondPrimaryReselect type type BondPrimaryReselect int @@ -537,6 +655,25 @@ const ( BOND_PRIMARY_RESELECT_FAILURE ) +var bondPrimaryReselectToString = map[BondPrimaryReselect]string{ + BOND_PRIMARY_RESELECT_ALWAYS: "always", + BOND_PRIMARY_RESELECT_BETTER: "better", + BOND_PRIMARY_RESELECT_FAILURE: "failure", +} +var StringToBondPrimaryReselectMap = map[string]BondPrimaryReselect{ + "always": BOND_PRIMARY_RESELECT_ALWAYS, + "better": BOND_PRIMARY_RESELECT_BETTER, + "failure": BOND_PRIMARY_RESELECT_FAILURE, +} + +func (b BondPrimaryReselect) String() string { + s, ok := bondPrimaryReselectToString[b] + if !ok { + return fmt.Sprintf("BondPrimaryReselect(%d)", b) + } + return s +} + // BondArpAllTargets type type BondArpAllTargets int @@ -546,6 +683,23 @@ const ( BOND_ARP_ALL_TARGETS_ALL ) +var bondArpAllTargetsToString = map[BondArpAllTargets]string{ + BOND_ARP_ALL_TARGETS_ANY: "any", + BOND_ARP_ALL_TARGETS_ALL: "all", +} +var StringToBondArpAllTargetsMap = map[string]BondArpAllTargets{ + "any": BOND_ARP_ALL_TARGETS_ANY, + "all": BOND_ARP_ALL_TARGETS_ALL, +} + +func (b BondArpAllTargets) String() string { + s, ok := bondArpAllTargetsToString[b] + if !ok { + return fmt.Sprintf("BondArpAllTargets(%d)", b) + } + return s +} + // BondFailOverMac type type BondFailOverMac int @@ -556,6 +710,25 @@ const ( BOND_FAIL_OVER_MAC_FOLLOW ) +var bondFailOverMacToString = map[BondFailOverMac]string{ + BOND_FAIL_OVER_MAC_NONE: "none", + BOND_FAIL_OVER_MAC_ACTIVE: "active", + BOND_FAIL_OVER_MAC_FOLLOW: "follow", +} +var StringToBondFailOverMacMap = map[string]BondFailOverMac{ + "none": BOND_FAIL_OVER_MAC_NONE, + "active": BOND_FAIL_OVER_MAC_ACTIVE, + "follow": BOND_FAIL_OVER_MAC_FOLLOW, +} + +func (b BondFailOverMac) String() string { + s, ok := bondFailOverMacToString[b] + if !ok { + return fmt.Sprintf("BondFailOverMac(%d)", b) + } + return s +} + // BondXmitHashPolicy type type BondXmitHashPolicy int @@ -583,6 +756,7 @@ const ( BOND_XMIT_HASH_POLICY_LAYER2_3 BOND_XMIT_HASH_POLICY_ENCAP2_3 BOND_XMIT_HASH_POLICY_ENCAP3_4 + BOND_XMIT_HASH_POLICY_VLAN_SRCMAC BOND_XMIT_HASH_POLICY_UNKNOWN ) @@ -592,6 +766,7 @@ var bondXmitHashPolicyToString = map[BondXmitHashPolicy]string{ BOND_XMIT_HASH_POLICY_LAYER2_3: "layer2+3", BOND_XMIT_HASH_POLICY_ENCAP2_3: "encap2+3", BOND_XMIT_HASH_POLICY_ENCAP3_4: "encap3+4", + BOND_XMIT_HASH_POLICY_VLAN_SRCMAC: "vlan+srcmac", } var StringToBondXmitHashPolicyMap = map[string]BondXmitHashPolicy{ "layer2": BOND_XMIT_HASH_POLICY_LAYER2, @@ -599,6 +774,7 @@ var StringToBondXmitHashPolicyMap = map[string]BondXmitHashPolicy{ "layer2+3": BOND_XMIT_HASH_POLICY_LAYER2_3, "encap2+3": BOND_XMIT_HASH_POLICY_ENCAP2_3, "encap3+4": BOND_XMIT_HASH_POLICY_ENCAP3_4, + "vlan+srcmac": BOND_XMIT_HASH_POLICY_VLAN_SRCMAC, } // BondLacpRate type @@ -647,6 +823,25 @@ const ( BOND_AD_SELECT_COUNT ) +var bondAdSelectToString = map[BondAdSelect]string{ + BOND_AD_SELECT_STABLE: "stable", + BOND_AD_SELECT_BANDWIDTH: "bandwidth", + BOND_AD_SELECT_COUNT: "count", +} +var StringToBondAdSelectMap = map[string]BondAdSelect{ + "stable": BOND_AD_SELECT_STABLE, + "bandwidth": BOND_AD_SELECT_BANDWIDTH, + "count": BOND_AD_SELECT_COUNT, +} + +func (b BondAdSelect) String() string { + s, ok := bondAdSelectToString[b] + if !ok { + return fmt.Sprintf("BondAdSelect(%d)", b) + } + return s +} + // BondAdInfo represents ad info for bond type BondAdInfo struct { AggregatorId int @@ -678,7 +873,7 @@ type Bond struct { AllSlavesActive int MinLinks int LpInterval int - PackersPerSlave int + PacketsPerSlave int LacpRate BondLacpRate AdSelect BondAdSelect // looking at iproute tool AdInfo can only be retrived. It can't be set. @@ -711,7 +906,7 @@ func NewLinkBond(atr LinkAttrs) *Bond { AllSlavesActive: -1, MinLinks: -1, LpInterval: -1, - PackersPerSlave: -1, + PacketsPerSlave: -1, LacpRate: -1, AdSelect: -1, AdActorSysPrio: -1, @@ -761,8 +956,10 @@ func (bond *Bond) Type() string { type BondSlaveState uint8 const ( - BondStateActive = iota // Link is active. - BondStateBackup // Link is backup. + //BondStateActive Link is active. + BondStateActive BondSlaveState = iota + //BondStateBackup Link is backup. + BondStateBackup ) func (s BondSlaveState) String() string { @@ -776,15 +973,19 @@ func (s BondSlaveState) String() string { } } -// BondSlaveState represents the values of the IFLA_BOND_SLAVE_MII_STATUS bond slave +// BondSlaveMiiStatus represents the values of the IFLA_BOND_SLAVE_MII_STATUS bond slave // attribute, which contains the status of MII link monitoring type BondSlaveMiiStatus uint8 const ( - BondLinkUp = iota // link is up and running. - BondLinkFail // link has just gone down. - BondLinkDown // link has been down for too long time. - BondLinkBack // link is going back. + //BondLinkUp link is up and running. + BondLinkUp BondSlaveMiiStatus = iota + //BondLinkFail link has just gone down. + BondLinkFail + //BondLinkDown link has been down for too long time. + BondLinkDown + //BondLinkBack link is going back. + BondLinkBack ) func (s BondSlaveMiiStatus) String() string { @@ -817,6 +1018,49 @@ func (b *BondSlave) SlaveType() string { return "bond" } +type VrfSlave struct { + Table uint32 +} + +func (v *VrfSlave) SlaveType() string { + return "vrf" +} + +// Geneve devices must specify RemoteIP and ID (VNI) on create +// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/drivers/net/geneve.c#L1209-L1223 +type Geneve struct { + LinkAttrs + ID uint32 // vni + Remote net.IP + Ttl uint8 + Tos uint8 + Dport uint16 + UdpCsum uint8 + UdpZeroCsum6Tx uint8 + UdpZeroCsum6Rx uint8 + Link uint32 + FlowBased bool + InnerProtoInherit bool + Df GeneveDf +} + +func (geneve *Geneve) Attrs() *LinkAttrs { + return &geneve.LinkAttrs +} + +func (geneve *Geneve) Type() string { + return "geneve" +} + +type GeneveDf uint8 + +const ( + GENEVE_DF_UNSET GeneveDf = iota + GENEVE_DF_SET + GENEVE_DF_INHERIT + GENEVE_DF_MAX +) + // Gretap devices must specify LocalIP and RemoteIP on create type Gretap struct { LinkAttrs @@ -861,6 +1105,7 @@ type Iptun struct { EncapType uint16 EncapFlags uint16 FlowBased bool + Proto uint8 } func (iptun *Iptun) Attrs() *LinkAttrs { @@ -878,10 +1123,15 @@ type Ip6tnl struct { Remote net.IP Ttl uint8 Tos uint8 - EncapLimit uint8 Flags uint32 Proto uint8 FlowInfo uint32 + EncapLimit uint8 + EncapType uint16 + EncapFlags uint16 + EncapSport uint16 + EncapDport uint16 + FlowBased bool } func (ip6tnl *Ip6tnl) Attrs() *LinkAttrs { @@ -892,14 +1142,47 @@ func (ip6tnl *Ip6tnl) Type() string { return "ip6tnl" } +// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L84 +type TunnelEncapType uint16 + +const ( + None TunnelEncapType = iota + FOU + GUE +) + +// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L91 +type TunnelEncapFlag uint16 + +const ( + CSum TunnelEncapFlag = 1 << 0 + CSum6 = 1 << 1 + RemCSum = 1 << 2 +) + +// from https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/ip6_tunnel.h#L12 +type IP6TunnelFlag uint16 + +const ( + IP6_TNL_F_IGN_ENCAP_LIMIT IP6TunnelFlag = 1 // don't add encapsulation limit if one isn't present in inner packet + IP6_TNL_F_USE_ORIG_TCLASS = 2 // copy the traffic class field from the inner packet + IP6_TNL_F_USE_ORIG_FLOWLABEL = 4 // copy the flowlabel from the inner packet + IP6_TNL_F_MIP6_DEV = 8 // being used for Mobile IPv6 + IP6_TNL_F_RCV_DSCP_COPY = 10 // copy DSCP from the outer packet + IP6_TNL_F_USE_ORIG_FWMARK = 20 // copy fwmark from inner packet + IP6_TNL_F_ALLOW_LOCAL_REMOTE = 40 // allow remote endpoint on the local node +) + type Sittun struct { LinkAttrs Link uint32 - Local net.IP - Remote net.IP Ttl uint8 Tos uint8 PMtuDisc uint8 + Proto uint8 + Local net.IP + Remote net.IP + EncapLimit uint8 EncapType uint16 EncapFlags uint16 EncapSport uint16 @@ -950,6 +1233,7 @@ type Gretun struct { EncapFlags uint16 EncapSport uint16 EncapDport uint16 + FlowBased bool } func (gretun *Gretun) Attrs() *LinkAttrs { @@ -993,6 +1277,7 @@ func (gtp *GTP) Type() string { } // Virtual XFRM Interfaces +// // Named "xfrmi" to prevent confusion with XFRM objects type Xfrmi struct { LinkAttrs @@ -1034,6 +1319,58 @@ var StringToIPoIBMode = map[string]IPoIBMode{ "connected": IPOIB_MODE_CONNECTED, } +const ( + CAN_STATE_ERROR_ACTIVE = iota + CAN_STATE_ERROR_WARNING + CAN_STATE_ERROR_PASSIVE + CAN_STATE_BUS_OFF + CAN_STATE_STOPPED + CAN_STATE_SLEEPING +) + +type Can struct { + LinkAttrs + + BitRate uint32 + SamplePoint uint32 + TimeQuanta uint32 + PropagationSegment uint32 + PhaseSegment1 uint32 + PhaseSegment2 uint32 + SyncJumpWidth uint32 + BitRatePreScaler uint32 + + Name string + TimeSegment1Min uint32 + TimeSegment1Max uint32 + TimeSegment2Min uint32 + TimeSegment2Max uint32 + SyncJumpWidthMax uint32 + BitRatePreScalerMin uint32 + BitRatePreScalerMax uint32 + BitRatePreScalerInc uint32 + + ClockFrequency uint32 + + State uint32 + + Mask uint32 + Flags uint32 + + TxError uint16 + RxError uint16 + + RestartMs uint32 +} + +func (can *Can) Attrs() *LinkAttrs { + return &can.LinkAttrs +} + +func (can *Can) Type() string { + return "can" +} + type IPoIB struct { LinkAttrs Pkey uint16 @@ -1049,11 +1386,27 @@ func (ipoib *IPoIB) Type() string { return "ipoib" } +type BareUDP struct { + LinkAttrs + Port uint16 + EtherType uint16 + SrcPortMin uint16 + MultiProto bool +} + +func (bareudp *BareUDP) Attrs() *LinkAttrs { + return &bareudp.LinkAttrs +} + +func (bareudp *BareUDP) Type() string { + return "bareudp" +} + // iproute2 supported devices; // vlan | veth | vcan | dummy | ifb | macvlan | macvtap | // bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan | // gre | gretap | ip6gre | ip6gretap | vti | vti6 | nlmon | -// bond_slave | ipvlan | xfrm +// bond_slave | ipvlan | xfrm | bareudp // LinkNotFoundError wraps the various not found errors when // getting/reading links. This is intended for better error diff --git a/vendor/github.com/vishvananda/netlink/link_linux.go b/vendor/github.com/vishvananda/netlink/link_linux.go index ec915a0b9..d713612a9 100644 --- a/vendor/github.com/vishvananda/netlink/link_linux.go +++ b/vendor/github.com/vishvananda/netlink/link_linux.go @@ -34,14 +34,27 @@ const ( TUNTAP_MULTI_QUEUE_DEFAULTS TuntapFlag = TUNTAP_MULTI_QUEUE | TUNTAP_NO_PI ) +var StringToTuntapModeMap = map[string]TuntapMode{ + "tun": TUNTAP_MODE_TUN, + "tap": TUNTAP_MODE_TAP, +} + +func (ttm TuntapMode) String() string { + switch ttm { + case TUNTAP_MODE_TUN: + return "tun" + case TUNTAP_MODE_TAP: + return "tap" + } + return "unknown" +} + const ( VF_LINK_STATE_AUTO uint32 = 0 VF_LINK_STATE_ENABLE uint32 = 1 VF_LINK_STATE_DISABLE uint32 = 2 ) -var lookupByDump = false - var macvlanModes = [...]uint32{ 0, nl.MACVLAN_MODE_PRIVATE, @@ -138,7 +151,6 @@ func (h *Handle) LinkSetAllmulticastOn(link Link) error { msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Change = unix.IFF_ALLMULTI msg.Flags = unix.IFF_ALLMULTI - msg.Index = int32(base.Index) req.AddData(msg) @@ -168,6 +180,51 @@ func (h *Handle) LinkSetAllmulticastOff(link Link) error { return err } +// LinkSetMulticastOn enables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast on` +func LinkSetMulticastOn(link Link) error { + return pkgHandle.LinkSetMulticastOn(link) +} + +// LinkSetMulticastOn enables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast on` +func (h *Handle) LinkSetMulticastOn(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_MULTICAST + msg.Flags = unix.IFF_MULTICAST + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetAllmulticastOff disables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast off` +func LinkSetMulticastOff(link Link) error { + return pkgHandle.LinkSetMulticastOff(link) +} + +// LinkSetAllmulticastOff disables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast off` +func (h *Handle) LinkSetMulticastOff(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_MULTICAST + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + func MacvlanMACAddrAdd(link Link, addr net.HardwareAddr) error { return pkgHandle.MacvlanMACAddrAdd(link, addr) } @@ -237,6 +294,37 @@ func (h *Handle) macvlanMACAddrChange(link Link, addrs []net.HardwareAddr, mode return err } +// LinkSetMacvlanMode sets the mode of a macvlan or macvtap link device. +// Note that passthrough mode cannot be set to and from and will fail. +// Equivalent to: `ip link set $link type (macvlan|macvtap) mode $mode +func LinkSetMacvlanMode(link Link, mode MacvlanMode) error { + return pkgHandle.LinkSetMacvlanMode(link, mode) +} + +// LinkSetMacvlanMode sets the mode of the macvlan or macvtap link device. +// Note that passthrough mode cannot be set to and from and will fail. +// Equivalent to: `ip link set $link type (macvlan|macvtap) mode $mode +func (h *Handle) LinkSetMacvlanMode(link Link, mode MacvlanMode) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil) + linkInfo.AddRtAttr(nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type())) + + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[mode])) + + req.AddData(linkInfo) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + func BridgeSetMcastSnoop(link Link, on bool) error { return pkgHandle.BridgeSetMcastSnoop(link, on) } @@ -247,6 +335,26 @@ func (h *Handle) BridgeSetMcastSnoop(link Link, on bool) error { return h.linkModify(bridge, unix.NLM_F_ACK) } +func BridgeSetVlanFiltering(link Link, on bool) error { + return pkgHandle.BridgeSetVlanFiltering(link, on) +} + +func (h *Handle) BridgeSetVlanFiltering(link Link, on bool) error { + bridge := link.(*Bridge) + bridge.VlanFiltering = &on + return h.linkModify(bridge, unix.NLM_F_ACK) +} + +func BridgeSetVlanDefaultPVID(link Link, pvid uint16) error { + return pkgHandle.BridgeSetVlanDefaultPVID(link, pvid) +} + +func (h *Handle) BridgeSetVlanDefaultPVID(link Link, pvid uint16) error { + bridge := link.(*Bridge) + bridge.VlanDefaultPVID = &pvid + return h.linkModify(bridge, unix.NLM_F_ACK) +} + func SetPromiscOn(link Link) error { return pkgHandle.SetPromiscOn(link) } @@ -389,6 +497,58 @@ func (h *Handle) LinkSetAlias(link Link, name string) error { return err } +// LinkAddAltName adds a new alternative name for the link device. +// Equivalent to: `ip link property add $link altname $name` +func LinkAddAltName(link Link, name string) error { + return pkgHandle.LinkAddAltName(link, name) +} + +// LinkAddAltName adds a new alternative name for the link device. +// Equivalent to: `ip link property add $link altname $name` +func (h *Handle) LinkAddAltName(link Link, name string) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINKPROP, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_PROP_LIST|unix.NLA_F_NESTED, nil) + data.AddRtAttr(unix.IFLA_ALT_IFNAME, []byte(name)) + + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkDelAltName delete an alternative name for the link device. +// Equivalent to: `ip link property del $link altname $name` +func LinkDelAltName(link Link, name string) error { + return pkgHandle.LinkDelAltName(link, name) +} + +// LinkDelAltName delete an alternative name for the link device. +// Equivalent to: `ip link property del $link altname $name` +func (h *Handle) LinkDelAltName(link Link, name string) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_DELLINKPROP, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_PROP_LIST|unix.NLA_F_NESTED, nil) + data.AddRtAttr(unix.IFLA_ALT_IFNAME, []byte(name)) + + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + // LinkSetHardwareAddr sets the hardware address of the link device. // Equivalent to: `ip link set $link address $hwaddr` func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error { @@ -491,13 +651,50 @@ func (h *Handle) LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { req.AddData(msg) data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) - info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) vfmsg := nl.VfVlan{ Vf: uint32(vf), Vlan: uint32(vlan), Qos: uint32(qos), } - nl.NewRtAttrChild(info, nl.IFLA_VF_VLAN, vfmsg.Serialize()) + info.AddRtAttr(nl.IFLA_VF_VLAN, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfVlanQosProto sets the vlan, qos and protocol of a vf for the link. +// Equivalent to: `ip link set $link vf $vf vlan $vlan qos $qos proto $proto` +func LinkSetVfVlanQosProto(link Link, vf, vlan, qos, proto int) error { + return pkgHandle.LinkSetVfVlanQosProto(link, vf, vlan, qos, proto) +} + +// LinkSetVfVlanQosProto sets the vlan, qos and protocol of a vf for the link. +// Equivalent to: `ip link set $link vf $vf vlan $vlan qos $qos proto $proto` +func (h *Handle) LinkSetVfVlanQosProto(link Link, vf, vlan, qos, proto int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + vfInfo := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfVlanList := vfInfo.AddRtAttr(nl.IFLA_VF_VLAN_LIST, nil) + + vfmsg := nl.VfVlanInfo{ + VfVlan: nl.VfVlan{ + Vf: uint32(vf), + Vlan: uint32(vlan), + Qos: uint32(qos), + }, + VlanProto: (uint16(proto)>>8)&0xFF | (uint16(proto)&0xFF)<<8, + } + + vfVlanList.AddRtAttr(nl.IFLA_VF_VLAN_INFO, vfmsg.Serialize()) req.AddData(data) _, err := req.Execute(unix.NETLINK_ROUTE, 0) @@ -848,6 +1045,141 @@ func LinkSetXdpFdWithFlags(link Link, fd, flags int) error { return err } +// LinkSetGSOMaxSegs sets the GSO maximum segment count of the link device. +// Equivalent to: `ip link set $link gso_max_segs $maxSegs` +func LinkSetGSOMaxSegs(link Link, maxSegs int) error { + return pkgHandle.LinkSetGSOMaxSegs(link, maxSegs) +} + +// LinkSetGSOMaxSegs sets the GSO maximum segment count of the link device. +// Equivalent to: `ip link set $link gso_max_segs $maxSegs` +func (h *Handle) LinkSetGSOMaxSegs(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GSO_MAX_SEGS, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGSOMaxSize sets the IPv6 GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_max_size $maxSize` +func LinkSetGSOMaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGSOMaxSize(link, maxSize) +} + +// LinkSetGSOMaxSize sets the IPv6 GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_max_size $maxSize` +func (h *Handle) LinkSetGSOMaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GSO_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGROMaxSize sets the IPv6 GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_max_size $maxSize` +func LinkSetGROMaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGROMaxSize(link, maxSize) +} + +// LinkSetGROMaxSize sets the IPv6 GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_max_size $maxSize` +func (h *Handle) LinkSetGROMaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GRO_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGSOIPv4MaxSize sets the IPv4 GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_ipv4_max_size $maxSize` +func LinkSetGSOIPv4MaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGSOIPv4MaxSize(link, maxSize) +} + +// LinkSetGSOIPv4MaxSize sets the IPv4 GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_ipv4_max_size $maxSize` +func (h *Handle) LinkSetGSOIPv4MaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GSO_IPV4_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGROIPv4MaxSize sets the IPv4 GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_ipv4_max_size $maxSize` +func LinkSetGROIPv4MaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGROIPv4MaxSize(link, maxSize) +} + +// LinkSetGROIPv4MaxSize sets the IPv4 GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_ipv4_max_size $maxSize` +func (h *Handle) LinkSetGROIPv4MaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GRO_IPV4_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + func boolAttr(val bool) []byte { var v uint8 if val { @@ -1005,8 +1337,8 @@ func addBondAttrs(bond *Bond, linkInfo *nl.RtAttr) { if bond.LpInterval >= 0 { data.AddRtAttr(nl.IFLA_BOND_LP_INTERVAL, nl.Uint32Attr(uint32(bond.LpInterval))) } - if bond.PackersPerSlave >= 0 { - data.AddRtAttr(nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PackersPerSlave))) + if bond.PacketsPerSlave >= 0 { + data.AddRtAttr(nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PacketsPerSlave))) } if bond.LacpRate >= 0 { data.AddRtAttr(nl.IFLA_BOND_AD_LACP_RATE, nl.Uint8Attr(uint8(bond.LacpRate))) @@ -1048,6 +1380,14 @@ func (h *Handle) LinkAdd(link Link) error { return h.linkModify(link, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) } +func LinkModify(link Link) error { + return pkgHandle.LinkModify(link) +} + +func (h *Handle) LinkModify(link Link) error { + return h.linkModify(link, unix.NLM_F_REQUEST|unix.NLM_F_ACK) +} + func (h *Handle) linkModify(link Link, flags int) error { // TODO: support extra data for macvlan base := link.Attrs() @@ -1060,8 +1400,6 @@ func (h *Handle) linkModify(link Link, flags int) error { } if isTuntap { - // TODO: support user - // TODO: support group if tuntap.Mode < unix.IFF_TUN || tuntap.Mode > unix.IFF_TAP { return fmt.Errorf("Tuntap.Mode %v unknown", tuntap.Mode) } @@ -1089,21 +1427,64 @@ func (h *Handle) linkModify(link Link, flags int) error { } req.Flags |= uint16(tuntap.Mode) - + const TUN = "/dev/net/tun" for i := 0; i < queues; i++ { localReq := req - file, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0) + fd, err := unix.Open(TUN, os.O_RDWR|syscall.O_CLOEXEC, 0) if err != nil { cleanupFds(fds) return err } - fds = append(fds, file) - _, _, errno := unix.Syscall(unix.SYS_IOCTL, file.Fd(), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&localReq))) + _, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&localReq))) if errno != 0 { + // close the new fd + unix.Close(fd) + // and the already opened ones cleanupFds(fds) return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed [%d], errno %v", i, errno) } + + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.TUNSETOWNER, uintptr(tuntap.Owner)) + if errno != 0 { + cleanupFds(fds) + return fmt.Errorf("Tuntap IOCTL TUNSETOWNER failed [%d], errno %v", i, errno) + } + + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.TUNSETGROUP, uintptr(tuntap.Group)) + if errno != 0 { + cleanupFds(fds) + return fmt.Errorf("Tuntap IOCTL TUNSETGROUP failed [%d], errno %v", i, errno) + } + + // Set the tun device to non-blocking before use. The below comment + // taken from: + // + // https://github.com/mistsys/tuntap/commit/161418c25003bbee77d085a34af64d189df62bea + // + // Note there is a complication because in go, if a device node is + // opened, go sets it to use nonblocking I/O. However a /dev/net/tun + // doesn't work with epoll until after the TUNSETIFF ioctl has been + // done. So we open the unix fd directly, do the ioctl, then put the + // fd in nonblocking mode, an then finally wrap it in a os.File, + // which will see the nonblocking mode and add the fd to the + // pollable set, so later on when we Read() from it blocked the + // calling thread in the kernel. + // + // See + // https://github.com/golang/go/issues/30426 + // which got exposed in go 1.13 by the fix to + // https://github.com/golang/go/issues/30624 + err = unix.SetNonblock(fd, true) + if err != nil { + cleanupFds(fds) + return fmt.Errorf("Tuntap set to non-blocking failed [%d], err %v", i, err) + } + + // create the file from the file descriptor and store it + file := os.NewFile(uintptr(fd), TUN) + fds = append(fds, file) + // 1) we only care for the name of the first tap in the multi queue set // 2) if the original name was empty, the localReq has now the actual name // @@ -1114,11 +1495,29 @@ func (h *Handle) linkModify(link Link, flags int) error { if i == 0 { link.Attrs().Name = strings.Trim(string(localReq.Name[:]), "\x00") } + + } + + control := func(file *os.File, f func(fd uintptr)) error { + name := file.Name() + conn, err := file.SyscallConn() + if err != nil { + return fmt.Errorf("SyscallConn() failed on %s: %v", name, err) + } + if err := conn.Control(f); err != nil { + return fmt.Errorf("Failed to get file descriptor for %s: %v", name, err) + } + return nil } // only persist interface if NonPersist is NOT set if !tuntap.NonPersist { - _, _, errno := unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 1) + var errno syscall.Errno + if err := control(fds[0], func(fd uintptr) { + _, _, errno = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 1) + }); err != nil { + return err + } if errno != 0 { cleanupFds(fds) return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno) @@ -1135,7 +1534,10 @@ func (h *Handle) linkModify(link Link, flags int) error { // un-persist (e.g. allow the interface to be removed) the tuntap // should not hurt if not set prior, condition might be not needed if !tuntap.NonPersist { - _, _, _ = unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 0) + // ignore error + _ = control(fds[0], func(fd uintptr) { + _, _, _ = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 0) + }) } cleanupFds(fds) return err @@ -1193,6 +1595,11 @@ func (h *Handle) linkModify(link Link, flags int) error { nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(base.Name)) req.AddData(nameData) + if base.Alias != "" { + alias := nl.NewRtAttr(unix.IFLA_IFALIAS, []byte(base.Alias)) + req.AddData(alias) + } + if base.MTU > 0 { mtu := nl.NewRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) req.AddData(mtu) @@ -1228,6 +1635,21 @@ func (h *Handle) linkModify(link Link, flags int) error { req.AddData(gsoAttr) } + if base.GROMaxSize > 0 { + groAttr := nl.NewRtAttr(unix.IFLA_GRO_MAX_SIZE, nl.Uint32Attr(base.GROMaxSize)) + req.AddData(groAttr) + } + + if base.GSOIPv4MaxSize > 0 { + gsoAttr := nl.NewRtAttr(unix.IFLA_GSO_IPV4_MAX_SIZE, nl.Uint32Attr(base.GSOIPv4MaxSize)) + req.AddData(gsoAttr) + } + + if base.GROIPv4MaxSize > 0 { + groAttr := nl.NewRtAttr(unix.IFLA_GRO_IPV4_MAX_SIZE, nl.Uint32Attr(base.GROIPv4MaxSize)) + req.AddData(groAttr) + } + if base.Group > 0 { groupAttr := nl.NewRtAttr(unix.IFLA_GROUP, nl.Uint32Attr(base.Group)) req.AddData(groupAttr) @@ -1264,6 +1686,10 @@ func (h *Handle) linkModify(link Link, flags int) error { if link.VlanProtocol != VLAN_PROTOCOL_UNKNOWN { data.AddRtAttr(nl.IFLA_VLAN_PROTOCOL, htons(uint16(link.VlanProtocol))) } + case *Netkit: + if err := addNetkitAttrs(link, linkInfo, flags); err != nil { + return err + } case *Veth: data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) peer := data.AddRtAttr(nl.VETH_INFO_PEER, nil) @@ -1272,12 +1698,28 @@ func (h *Handle) linkModify(link Link, flags int) error { if base.TxQLen >= 0 { peer.AddRtAttr(unix.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) } + if base.NumTxQueues > 0 { + peer.AddRtAttr(unix.IFLA_NUM_TX_QUEUES, nl.Uint32Attr(uint32(base.NumTxQueues))) + } + if base.NumRxQueues > 0 { + peer.AddRtAttr(unix.IFLA_NUM_RX_QUEUES, nl.Uint32Attr(uint32(base.NumRxQueues))) + } if base.MTU > 0 { peer.AddRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) } if link.PeerHardwareAddr != nil { peer.AddRtAttr(unix.IFLA_ADDRESS, []byte(link.PeerHardwareAddr)) } + if link.PeerNamespace != nil { + switch ns := link.PeerNamespace.(type) { + case NsPid: + val := nl.Uint32Attr(uint32(ns)) + peer.AddRtAttr(unix.IFLA_NET_NS_PID, val) + case NsFd: + val := nl.Uint32Attr(uint32(ns)) + peer.AddRtAttr(unix.IFLA_NET_NS_FD, val) + } + } case *Vxlan: addVxlanAttrs(link, linkInfo) case *Bond: @@ -1286,16 +1728,16 @@ func (h *Handle) linkModify(link Link, flags int) error { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode))) data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag))) + case *IPVtap: + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode))) + data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag))) case *Macvlan: - if link.Mode != MACVLAN_MODE_DEFAULT { - data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) - data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode])) - } + addMacvlanAttrs(link, linkInfo) case *Macvtap: - if link.Mode != MACVLAN_MODE_DEFAULT { - data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) - data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode])) - } + addMacvtapAttrs(link, linkInfo) + case *Geneve: + addGeneveAttrs(link, linkInfo) case *Gretap: addGretapAttrs(link, linkInfo) case *Iptun: @@ -1318,6 +1760,8 @@ func (h *Handle) linkModify(link Link, flags int) error { addXfrmiAttrs(link, linkInfo) case *IPoIB: addIPoIBAttrs(link, linkInfo) + case *BareUDP: + addBareUDPAttrs(link, linkInfo) } req.AddData(linkInfo) @@ -1372,6 +1816,13 @@ func (h *Handle) linkByNameDump(name string) (Link, error) { if link.Attrs().Name == name { return link, nil } + + // support finding interfaces also via altnames + for _, altName := range link.Attrs().AltNames { + if altName == name { + return link, nil + } + } } return nil, LinkNotFoundError{fmt.Errorf("Link %s not found", name)} } @@ -1410,6 +1861,9 @@ func (h *Handle) LinkByName(name string) (Link, error) { req.AddData(attr) nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(name)) + if len(name) > 15 { + nameData = nl.NewRtAttr(unix.IFLA_ALT_IFNAME, nl.ZeroTerminated(name)) + } req.AddData(nameData) link, err := execGetLink(req) @@ -1499,7 +1953,7 @@ func execGetLink(req *nl.NetlinkRequest) (Link, error) { } } -// linkDeserialize deserializes a raw message received from netlink into +// LinkDeserialize deserializes a raw message received from netlink into // a link object. func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { msg := nl.DeserializeIfInfomsg(m) @@ -1509,10 +1963,19 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { return nil, err } - base := LinkAttrs{Index: int(msg.Index), RawFlags: msg.Flags, Flags: linkFlags(msg.Flags), EncapType: msg.EncapType()} - if msg.Flags&unix.IFF_PROMISC != 0 { - base.Promisc = 1 + base := NewLinkAttrs() + base.Index = int(msg.Index) + base.RawFlags = msg.Flags + base.Flags = linkFlags(msg.Flags) + base.EncapType = msg.EncapType() + base.NetNsID = -1 + if msg.Flags&unix.IFF_ALLMULTI != 0 { + base.Allmulti = 1 } + if msg.Flags&unix.IFF_MULTICAST != 0 { + base.Multi = 1 + } + var ( link Link stats32 *LinkStatistics32 @@ -1541,18 +2004,26 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { link = &Bridge{} case "vlan": link = &Vlan{} + case "netkit": + link = &Netkit{} case "veth": link = &Veth{} + case "wireguard": + link = &Wireguard{} case "vxlan": link = &Vxlan{} case "bond": link = &Bond{} case "ipvlan": link = &IPVlan{} + case "ipvtap": + link = &IPVtap{} case "macvlan": link = &Macvlan{} case "macvtap": link = &Macvtap{} + case "geneve": + link = &Geneve{} case "gretap": link = &Gretap{} case "ip6gretap": @@ -1579,6 +2050,10 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { link = &Tuntap{} case "ipoib": link = &IPoIB{} + case "can": + link = &Can{} + case "bareudp": + link = &BareUDP{} default: link = &GenericLink{LinkType: linkType} } @@ -1588,6 +2063,8 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { return nil, err } switch linkType { + case "netkit": + parseNetkitData(link, data) case "vlan": parseVlanData(link, data) case "vxlan": @@ -1596,10 +2073,14 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { parseBondData(link, data) case "ipvlan": parseIPVlanData(link, data) + case "ipvtap": + parseIPVtapData(link, data) case "macvlan": parseMacvlanData(link, data) case "macvtap": parseMacvtapData(link, data) + case "geneve": + parseGeneveData(link, data) case "gretap": parseGretapData(link, data) case "ip6gretap": @@ -1628,13 +2109,21 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { parseTuntapData(link, data) case "ipoib": parseIPoIBData(link, data) + case "can": + parseCanData(link, data) + case "bareudp": + parseBareUDPData(link, data) } + case nl.IFLA_INFO_SLAVE_KIND: slaveType = string(info.Value[:len(info.Value)-1]) switch slaveType { case "bond": linkSlave = &BondSlave{} + case "vrf": + linkSlave = &VrfSlave{} } + case nl.IFLA_INFO_SLAVE_DATA: switch slaveType { case "bond": @@ -1643,6 +2132,12 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { return nil, err } parseBondSlaveData(linkSlave, data) + case "vrf": + data, err := nl.ParseRouteAttr(info.Value) + if err != nil { + return nil, err + } + parseVrfSlaveData(linkSlave, data) } } } @@ -1660,6 +2155,8 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { base.Name = string(attr.Value[:len(attr.Value)-1]) case unix.IFLA_MTU: base.MTU = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_PROMISCUITY: + base.Promisc = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_LINK: base.ParentIndex = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_MASTER: @@ -1694,14 +2191,38 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { protinfo := parseProtinfo(attrs) base.Protinfo = &protinfo } + case unix.IFLA_PROP_LIST | unix.NLA_F_NESTED: + attrs, err := nl.ParseRouteAttr(attr.Value[:]) + if err != nil { + return nil, err + } + + base.AltNames = []string{} + for _, attr := range attrs { + if attr.Attr.Type == unix.IFLA_ALT_IFNAME { + base.AltNames = append(base.AltNames, nl.BytesToString(attr.Value)) + } + } case unix.IFLA_OPERSTATE: base.OperState = LinkOperState(uint8(attr.Value[0])) + case unix.IFLA_PHYS_SWITCH_ID: + base.PhysSwitchID = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_LINK_NETNSID: base.NetNsID = int(native.Uint32(attr.Value[0:4])) - case unix.IFLA_GSO_MAX_SIZE: - base.GSOMaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_TSO_MAX_SEGS: + base.TSOMaxSegs = native.Uint32(attr.Value[0:4]) + case unix.IFLA_TSO_MAX_SIZE: + base.TSOMaxSize = native.Uint32(attr.Value[0:4]) case unix.IFLA_GSO_MAX_SEGS: base.GSOMaxSegs = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GSO_MAX_SIZE: + base.GSOMaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GRO_MAX_SIZE: + base.GROMaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GSO_IPV4_MAX_SIZE: + base.GSOIPv4MaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GRO_IPV4_MAX_SIZE: + base.GROIPv4MaxSize = native.Uint32(attr.Value[0:4]) case unix.IFLA_VFINFO_LIST: data, err := nl.ParseRouteAttr(attr.Value) if err != nil { @@ -1718,6 +2239,13 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { base.NumRxQueues = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_GROUP: base.Group = native.Uint32(attr.Value[0:4]) + case unix.IFLA_PERM_ADDRESS: + for _, b := range attr.Value { + if b != 0 { + base.PermHWAddr = attr.Value[:] + break + } + } } } @@ -1830,21 +2358,24 @@ type LinkUpdate struct { // LinkSubscribe takes a chan down which notifications will be sent // when links change. Close the 'done' chan to stop subscription. func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error { - return linkSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) + return linkSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false) } // LinkSubscribeAt works like LinkSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func LinkSubscribeAt(ns netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error { - return linkSubscribeAt(ns, netns.None(), ch, done, nil, false) + return linkSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false) } // LinkSubscribeOptions contains a set of options to use with // LinkSubscribeWithOptions. type LinkSubscribeOptions struct { - Namespace *netns.NsHandle - ErrorCallback func(error) - ListExisting bool + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool + ReceiveBufferSize int + ReceiveBufferForceSize bool + ReceiveTimeout *unix.Timeval } // LinkSubscribeWithOptions work like LinkSubscribe but enable to @@ -1855,14 +2386,27 @@ func LinkSubscribeWithOptions(ch chan<- LinkUpdate, done <-chan struct{}, option none := netns.None() options.Namespace = &none } - return linkSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) + return linkSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, + options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize) } -func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { +func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}, cberr func(error), listExisting bool, + rcvbuf int, rcvTimeout *unix.Timeval, rcvbufForce bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_LINK) if err != nil { return err } + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } + } + if rcvbuf != 0 { + err = s.SetReceiveBufferSize(rcvbuf, rcvbufForce) + if err != nil { + return err + } + } if done != nil { go func() { <-done @@ -1884,7 +2428,8 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c msgs, from, err := s.Receive() if err != nil { if cberr != nil { - cberr(err) + cberr(fmt.Errorf("Receive failed: %v", + err)) } return } @@ -1899,15 +2444,15 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c continue } if m.Header.Type == unix.NLMSG_ERROR { - native := nl.NativeEndian() error := int32(native.Uint32(m.Data[0:4])) if error == 0 { continue } if cberr != nil { - cberr(syscall.Errno(-error)) + cberr(fmt.Errorf("error message: %v", + syscall.Errno(-error))) } - return + continue } ifmsg := nl.DeserializeIfInfomsg(m.Data) header := unix.NlMsghdr(m.Header) @@ -1916,7 +2461,7 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c if cberr != nil { cberr(err) } - return + continue } ch <- LinkUpdate{IfInfomsg: *ifmsg, Header: header, Link: link} } @@ -1942,6 +2487,16 @@ func (h *Handle) LinkSetGuard(link Link, mode bool) error { return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_GUARD) } +// LinkSetBRSlaveGroupFwdMask set the group_fwd_mask of a bridge slave interface +func LinkSetBRSlaveGroupFwdMask(link Link, mask uint16) error { + return pkgHandle.LinkSetBRSlaveGroupFwdMask(link, mask) +} + +// LinkSetBRSlaveGroupFwdMask set the group_fwd_mask of a bridge slave interface +func (h *Handle) LinkSetBRSlaveGroupFwdMask(link Link, mask uint16) error { + return h.setProtinfoAttrRawVal(link, nl.Uint16Attr(mask), nl.IFLA_BRPORT_GROUP_FWD_MASK) +} + func LinkSetFastLeave(link Link, mode bool) error { return pkgHandle.LinkSetFastLeave(link, mode) } @@ -1974,6 +2529,14 @@ func (h *Handle) LinkSetFlood(link Link, mode bool) error { return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_UNICAST_FLOOD) } +func LinkSetIsolated(link Link, mode bool) error { + return pkgHandle.LinkSetIsolated(link, mode) +} + +func (h *Handle) LinkSetIsolated(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_ISOLATED) +} + func LinkSetBrProxyArp(link Link, mode bool) error { return pkgHandle.LinkSetBrProxyArp(link, mode) } @@ -1990,7 +2553,15 @@ func (h *Handle) LinkSetBrProxyArpWiFi(link Link, mode bool) error { return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_PROXYARP_WIFI) } -func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { +func LinkSetBrNeighSuppress(link Link, mode bool) error { + return pkgHandle.LinkSetBrNeighSuppress(link, mode) +} + +func (h *Handle) LinkSetBrNeighSuppress(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_NEIGH_SUPPRESS) +} + +func (h *Handle) setProtinfoAttrRawVal(link Link, val []byte, attr int) error { base := link.Attrs() h.ensureIndex(base) req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) @@ -2000,7 +2571,7 @@ func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { req.AddData(msg) br := nl.NewRtAttr(unix.IFLA_PROTINFO|unix.NLA_F_NESTED, nil) - br.AddRtAttr(attr, boolToByte(mode)) + br.AddRtAttr(attr, val) req.AddData(br) _, err := req.Execute(unix.NETLINK_ROUTE, 0) if err != nil { @@ -2008,6 +2579,9 @@ func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { } return nil } +func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { + return h.setProtinfoAttrRawVal(link, boolToByte(mode), attr) +} // LinkSetTxQLen sets the transaction queue length for the link. // Equivalent to: `ip link set $link txqlen $qlen` @@ -2065,6 +2639,80 @@ func (h *Handle) LinkSetGroup(link Link, group int) error { return err } +func addNetkitAttrs(nk *Netkit, linkInfo *nl.RtAttr, flag int) error { + if nk.peerLinkAttrs.HardwareAddr != nil || nk.HardwareAddr != nil { + return fmt.Errorf("netkit doesn't support setting Ethernet") + } + + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + // Kernel will return error if trying to change the mode of an existing netkit device + data.AddRtAttr(nl.IFLA_NETKIT_MODE, nl.Uint32Attr(uint32(nk.Mode))) + data.AddRtAttr(nl.IFLA_NETKIT_POLICY, nl.Uint32Attr(uint32(nk.Policy))) + data.AddRtAttr(nl.IFLA_NETKIT_PEER_POLICY, nl.Uint32Attr(uint32(nk.PeerPolicy))) + + if (flag & unix.NLM_F_EXCL) == 0 { + // Modifying peer link attributes will not take effect + return nil + } + + peer := data.AddRtAttr(nl.IFLA_NETKIT_PEER_INFO, nil) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + if nk.peerLinkAttrs.Flags&net.FlagUp != 0 { + msg.Change = unix.IFF_UP + msg.Flags = unix.IFF_UP + } + if nk.peerLinkAttrs.Index != 0 { + msg.Index = int32(nk.peerLinkAttrs.Index) + } + peer.AddChild(msg) + if nk.peerLinkAttrs.Name != "" { + peer.AddRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(nk.peerLinkAttrs.Name)) + } + if nk.peerLinkAttrs.MTU > 0 { + peer.AddRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(nk.peerLinkAttrs.MTU))) + } + if nk.peerLinkAttrs.GSOMaxSegs > 0 { + peer.AddRtAttr(unix.IFLA_GSO_MAX_SEGS, nl.Uint32Attr(nk.peerLinkAttrs.GSOMaxSegs)) + } + if nk.peerLinkAttrs.GSOMaxSize > 0 { + peer.AddRtAttr(unix.IFLA_GSO_MAX_SIZE, nl.Uint32Attr(nk.peerLinkAttrs.GSOMaxSize)) + } + if nk.peerLinkAttrs.GSOIPv4MaxSize > 0 { + peer.AddRtAttr(unix.IFLA_GSO_IPV4_MAX_SIZE, nl.Uint32Attr(nk.peerLinkAttrs.GSOIPv4MaxSize)) + } + if nk.peerLinkAttrs.GROIPv4MaxSize > 0 { + peer.AddRtAttr(unix.IFLA_GRO_IPV4_MAX_SIZE, nl.Uint32Attr(nk.peerLinkAttrs.GROIPv4MaxSize)) + } + if nk.peerLinkAttrs.Namespace != nil { + switch ns := nk.peerLinkAttrs.Namespace.(type) { + case NsPid: + peer.AddRtAttr(unix.IFLA_NET_NS_PID, nl.Uint32Attr(uint32(ns))) + case NsFd: + peer.AddRtAttr(unix.IFLA_NET_NS_FD, nl.Uint32Attr(uint32(ns))) + } + } + return nil +} + +func parseNetkitData(link Link, data []syscall.NetlinkRouteAttr) { + netkit := link.(*Netkit) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_NETKIT_PRIMARY: + isPrimary := datum.Value[0:1][0] + if isPrimary != 0 { + netkit.isPrimary = true + } + case nl.IFLA_NETKIT_MODE: + netkit.Mode = NetkitMode(native.Uint32(datum.Value[0:4])) + case nl.IFLA_NETKIT_POLICY: + netkit.Policy = NetkitPolicy(native.Uint32(datum.Value[0:4])) + case nl.IFLA_NETKIT_PEER_POLICY: + netkit.PeerPolicy = NetkitPolicy(native.Uint32(datum.Value[0:4])) + } + } +} + func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) { vlan := link.(*Vlan) for _, datum := range data { @@ -2080,6 +2728,13 @@ func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) { func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) { vxlan := link.(*Vxlan) for _, datum := range data { + // NOTE(vish): Apparently some messages can be sent with no value. + // We special case GBP here to not change existing + // functionality. It appears that GBP sends a datum.Value + // of null. + if len(datum.Value) == 0 && datum.Attr.Type != nl.IFLA_VXLAN_GBP { + continue + } switch datum.Attr.Type { case nl.IFLA_VXLAN_ID: vxlan.VxlanId = int(native.Uint32(datum.Value[0:4])) @@ -2178,7 +2833,7 @@ func parseBondData(link Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_BOND_LP_INTERVAL: bond.LpInterval = int(native.Uint32(data[i].Value[0:4])) case nl.IFLA_BOND_PACKETS_PER_SLAVE: - bond.PackersPerSlave = int(native.Uint32(data[i].Value[0:4])) + bond.PacketsPerSlave = int(native.Uint32(data[i].Value[0:4])) case nl.IFLA_BOND_AD_LACP_RATE: bond.LacpRate = BondLacpRate(data[i].Value[0]) case nl.IFLA_BOND_AD_SELECT: @@ -2258,6 +2913,16 @@ func parseBondSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) { } } +func parseVrfSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) { + vrfSlave := slave.(*VrfSlave) + for i := range data { + switch data[i].Attr.Type { + case nl.IFLA_BOND_SLAVE_STATE: + vrfSlave.Table = native.Uint32(data[i].Value[0:4]) + } + } +} + func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) { ipv := link.(*IPVlan) for _, datum := range data { @@ -2270,11 +2935,42 @@ func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) { } } +func parseIPVtapData(link Link, data []syscall.NetlinkRouteAttr) { + ipv := link.(*IPVtap) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_IPVLAN_MODE: + ipv.Mode = IPVlanMode(native.Uint32(datum.Value[0:4])) + case nl.IFLA_IPVLAN_FLAG: + ipv.Flag = IPVlanFlag(native.Uint32(datum.Value[0:4])) + } + } +} + +func addMacvtapAttrs(macvtap *Macvtap, linkInfo *nl.RtAttr) { + addMacvlanAttrs(&macvtap.Macvlan, linkInfo) +} + func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) { macv := link.(*Macvtap) parseMacvlanData(&macv.Macvlan, data) } +func addMacvlanAttrs(macvlan *Macvlan, linkInfo *nl.RtAttr) { + var data *nl.RtAttr + + if macvlan.Mode != MACVLAN_MODE_DEFAULT || macvlan.BCQueueLen > 0 { + data = linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + } + + if macvlan.Mode != MACVLAN_MODE_DEFAULT { + data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macvlan.Mode])) + } + if macvlan.BCQueueLen > 0 { + data.AddRtAttr(nl.IFLA_MACVLAN_BC_QUEUE_LEN, nl.Uint32Attr(macvlan.BCQueueLen)) + } +} + func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { macv := link.(*Macvlan) for _, datum := range data { @@ -2302,6 +2998,10 @@ func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { for _, macDatum := range macs { macv.MACAddrs = append(macv.MACAddrs, net.HardwareAddr(macDatum.Value[0:6])) } + case nl.IFLA_MACVLAN_BC_QUEUE_LEN: + macv.BCQueueLen = native.Uint32(datum.Value[0:4]) + case nl.IFLA_MACVLAN_BC_QUEUE_LEN_USED: + macv.UsedBCQueueLen = native.Uint32(datum.Value[0:4]) } } } @@ -2327,12 +3027,73 @@ func linkFlags(rawFlags uint32) net.Flags { return f } +func addGeneveAttrs(geneve *Geneve, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + if geneve.InnerProtoInherit { + data.AddRtAttr(nl.IFLA_GENEVE_INNER_PROTO_INHERIT, []byte{}) + } + + if geneve.FlowBased { + geneve.ID = 0 + data.AddRtAttr(nl.IFLA_GENEVE_COLLECT_METADATA, []byte{}) + } + + if ip := geneve.Remote; ip != nil { + if ip4 := ip.To4(); ip4 != nil { + data.AddRtAttr(nl.IFLA_GENEVE_REMOTE, ip.To4()) + } else { + data.AddRtAttr(nl.IFLA_GENEVE_REMOTE6, []byte(ip)) + } + } + + if geneve.ID != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_ID, nl.Uint32Attr(geneve.ID)) + } + + if geneve.Dport != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_PORT, htons(geneve.Dport)) + } + + if geneve.Ttl != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_TTL, nl.Uint8Attr(geneve.Ttl)) + } + + if geneve.Tos != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_TOS, nl.Uint8Attr(geneve.Tos)) + } + + data.AddRtAttr(nl.IFLA_GENEVE_DF, nl.Uint8Attr(uint8(geneve.Df))) +} + +func parseGeneveData(link Link, data []syscall.NetlinkRouteAttr) { + geneve := link.(*Geneve) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_GENEVE_ID: + geneve.ID = native.Uint32(datum.Value[0:4]) + case nl.IFLA_GENEVE_REMOTE, nl.IFLA_GENEVE_REMOTE6: + geneve.Remote = datum.Value + case nl.IFLA_GENEVE_PORT: + geneve.Dport = ntohs(datum.Value[0:2]) + case nl.IFLA_GENEVE_TTL: + geneve.Ttl = uint8(datum.Value[0]) + case nl.IFLA_GENEVE_TOS: + geneve.Tos = uint8(datum.Value[0]) + case nl.IFLA_GENEVE_COLLECT_METADATA: + geneve.FlowBased = true + case nl.IFLA_GENEVE_INNER_PROTO_INHERIT: + geneve.InnerProtoInherit = true + } + } +} + func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) if gretap.FlowBased { // In flow based mode, no other attributes need to be configured - data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, boolAttr(gretap.FlowBased)) + data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, []byte{}) return } @@ -2415,6 +3176,12 @@ func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) { func addGretunAttrs(gre *Gretun, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if gre.FlowBased { + // In flow based mode, no other attributes need to be configured + data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, []byte{}) + return + } + if ip := gre.Local; ip != nil { if ip.To4() != nil { ip = ip.To4() @@ -2485,6 +3252,8 @@ func parseGretunData(link Link, data []syscall.NetlinkRouteAttr) { gre.EncapSport = ntohs(datum.Value[0:2]) case nl.IFLA_GRE_ENCAP_DPORT: gre.EncapDport = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_COLLECT_METADATA: + gre.FlowBased = true } } } @@ -2513,7 +3282,8 @@ func parseLinkXdp(data []byte) (*LinkXdp, error) { case nl.IFLA_XDP_FD: xdp.Fd = int(native.Uint32(attr.Value[0:4])) case nl.IFLA_XDP_ATTACHED: - xdp.Attached = attr.Value[0] != 0 + xdp.AttachMode = uint32(attr.Value[0]) + xdp.Attached = xdp.AttachMode != 0 case nl.IFLA_XDP_FLAGS: xdp.Flags = native.Uint32(attr.Value[0:4]) case nl.IFLA_XDP_PROG_ID: @@ -2524,14 +3294,14 @@ func parseLinkXdp(data []byte) (*LinkXdp, error) { } func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if iptun.FlowBased { // In flow based mode, no other attributes need to be configured - linkInfo.AddRtAttr(nl.IFLA_IPTUN_COLLECT_METADATA, boolAttr(iptun.FlowBased)) + data.AddRtAttr(nl.IFLA_IPTUN_COLLECT_METADATA, []byte{}) return } - data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) - ip := iptun.Local.To4() if ip != nil { data.AddRtAttr(nl.IFLA_IPTUN_LOCAL, []byte(ip)) @@ -2552,6 +3322,7 @@ func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) { data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(iptun.EncapFlags)) data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(iptun.EncapSport)) data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(iptun.EncapDport)) + data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(iptun.Proto)) } func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { @@ -2577,7 +3348,9 @@ func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_IPTUN_ENCAP_FLAGS: iptun.EncapFlags = native.Uint16(datum.Value[0:2]) case nl.IFLA_IPTUN_COLLECT_METADATA: - iptun.FlowBased = int8(datum.Value[0]) != 0 + iptun.FlowBased = true + case nl.IFLA_IPTUN_PROTO: + iptun.Proto = datum.Value[0] } } } @@ -2585,6 +3358,12 @@ func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { func addIp6tnlAttrs(ip6tnl *Ip6tnl, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if ip6tnl.FlowBased { + // In flow based mode, no other attributes need to be configured + data.AddRtAttr(nl.IFLA_IPTUN_COLLECT_METADATA, []byte{}) + return + } + if ip6tnl.Link != 0 { data.AddRtAttr(nl.IFLA_IPTUN_LINK, nl.Uint32Attr(ip6tnl.Link)) } @@ -2601,10 +3380,14 @@ func addIp6tnlAttrs(ip6tnl *Ip6tnl, linkInfo *nl.RtAttr) { data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(ip6tnl.Ttl)) data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(ip6tnl.Tos)) - data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(ip6tnl.EncapLimit)) data.AddRtAttr(nl.IFLA_IPTUN_FLAGS, nl.Uint32Attr(ip6tnl.Flags)) data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(ip6tnl.Proto)) data.AddRtAttr(nl.IFLA_IPTUN_FLOWINFO, nl.Uint32Attr(ip6tnl.FlowInfo)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(ip6tnl.EncapLimit)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(ip6tnl.EncapType)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(ip6tnl.EncapFlags)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(ip6tnl.EncapSport)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(ip6tnl.EncapDport)) } func parseIp6tnlData(link Link, data []syscall.NetlinkRouteAttr) { @@ -2616,17 +3399,27 @@ func parseIp6tnlData(link Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_IPTUN_REMOTE: ip6tnl.Remote = net.IP(datum.Value[:16]) case nl.IFLA_IPTUN_TTL: - ip6tnl.Ttl = uint8(datum.Value[0]) + ip6tnl.Ttl = datum.Value[0] case nl.IFLA_IPTUN_TOS: - ip6tnl.Tos = uint8(datum.Value[0]) - case nl.IFLA_IPTUN_ENCAP_LIMIT: - ip6tnl.EncapLimit = uint8(datum.Value[0]) + ip6tnl.Tos = datum.Value[0] case nl.IFLA_IPTUN_FLAGS: ip6tnl.Flags = native.Uint32(datum.Value[:4]) case nl.IFLA_IPTUN_PROTO: - ip6tnl.Proto = uint8(datum.Value[0]) + ip6tnl.Proto = datum.Value[0] case nl.IFLA_IPTUN_FLOWINFO: ip6tnl.FlowInfo = native.Uint32(datum.Value[:4]) + case nl.IFLA_IPTUN_ENCAP_LIMIT: + ip6tnl.EncapLimit = datum.Value[0] + case nl.IFLA_IPTUN_ENCAP_TYPE: + ip6tnl.EncapType = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_FLAGS: + ip6tnl.EncapFlags = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_SPORT: + ip6tnl.EncapSport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_DPORT: + ip6tnl.EncapDport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_COLLECT_METADATA: + ip6tnl.FlowBased = true } } } @@ -2653,8 +3446,10 @@ func addSittunAttrs(sittun *Sittun, linkInfo *nl.RtAttr) { data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(sittun.Ttl)) } + data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(sittun.Proto)) data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(sittun.Tos)) data.AddRtAttr(nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(sittun.PMtuDisc)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(sittun.EncapLimit)) data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(sittun.EncapType)) data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(sittun.EncapFlags)) data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(sittun.EncapSport)) @@ -2670,11 +3465,13 @@ func parseSittunData(link Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_IPTUN_REMOTE: sittun.Remote = net.IP(datum.Value[0:4]) case nl.IFLA_IPTUN_TTL: - sittun.Ttl = uint8(datum.Value[0]) + sittun.Ttl = datum.Value[0] case nl.IFLA_IPTUN_TOS: - sittun.Tos = uint8(datum.Value[0]) + sittun.Tos = datum.Value[0] case nl.IFLA_IPTUN_PMTUDISC: - sittun.PMtuDisc = uint8(datum.Value[0]) + sittun.PMtuDisc = datum.Value[0] + case nl.IFLA_IPTUN_PROTO: + sittun.Proto = datum.Value[0] case nl.IFLA_IPTUN_ENCAP_TYPE: sittun.EncapType = native.Uint16(datum.Value[0:2]) case nl.IFLA_IPTUN_ENCAP_FLAGS: @@ -2761,18 +3558,30 @@ func addBridgeAttrs(bridge *Bridge, linkInfo *nl.RtAttr) { if bridge.MulticastSnooping != nil { data.AddRtAttr(nl.IFLA_BR_MCAST_SNOOPING, boolToByte(*bridge.MulticastSnooping)) } + if bridge.AgeingTime != nil { + data.AddRtAttr(nl.IFLA_BR_AGEING_TIME, nl.Uint32Attr(*bridge.AgeingTime)) + } if bridge.HelloTime != nil { data.AddRtAttr(nl.IFLA_BR_HELLO_TIME, nl.Uint32Attr(*bridge.HelloTime)) } if bridge.VlanFiltering != nil { data.AddRtAttr(nl.IFLA_BR_VLAN_FILTERING, boolToByte(*bridge.VlanFiltering)) } + if bridge.VlanDefaultPVID != nil { + data.AddRtAttr(nl.IFLA_BR_VLAN_DEFAULT_PVID, nl.Uint16Attr(*bridge.VlanDefaultPVID)) + } + if bridge.GroupFwdMask != nil { + data.AddRtAttr(nl.IFLA_BR_GROUP_FWD_MASK, nl.Uint16Attr(*bridge.GroupFwdMask)) + } } func parseBridgeData(bridge Link, data []syscall.NetlinkRouteAttr) { br := bridge.(*Bridge) for _, datum := range data { switch datum.Attr.Type { + case nl.IFLA_BR_AGEING_TIME: + ageingTime := native.Uint32(datum.Value[0:4]) + br.AgeingTime = &ageingTime case nl.IFLA_BR_HELLO_TIME: helloTime := native.Uint32(datum.Value[0:4]) br.HelloTime = &helloTime @@ -2782,6 +3591,12 @@ func parseBridgeData(bridge Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_BR_VLAN_FILTERING: vlanFiltering := datum.Value[0] == 1 br.VlanFiltering = &vlanFiltering + case nl.IFLA_BR_VLAN_DEFAULT_PVID: + vlanDefaultPVID := native.Uint16(datum.Value[0:2]) + br.VlanDefaultPVID = &vlanDefaultPVID + case nl.IFLA_BR_GROUP_FWD_MASK: + mask := native.Uint16(datum.Value[0:2]) + br.GroupFwdMask = &mask } } } @@ -2823,12 +3638,17 @@ func parseVfInfoList(data []syscall.NetlinkRouteAttr) ([]VfInfo, error) { if err != nil { return nil, err } - vfs = append(vfs, parseVfInfo(vfAttrs, i)) + + vf, err := parseVfInfo(vfAttrs, i) + if err != nil { + return nil, err + } + vfs = append(vfs, vf) } return vfs, nil } -func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo { +func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) (VfInfo, error) { vf := VfInfo{ID: id} for _, element := range data { switch element.Attr.Type { @@ -2839,6 +3659,12 @@ func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo { vl := nl.DeserializeVfVlan(element.Value[:]) vf.Vlan = int(vl.Vlan) vf.Qos = int(vl.Qos) + case nl.IFLA_VF_VLAN_LIST: + vfVlanInfoList, err := nl.DeserializeVfVlanList(element.Value[:]) + if err != nil { + return vf, err + } + vf.VlanProto = int(vfVlanInfoList[0].VlanProto) case nl.IFLA_VF_TX_RATE: txr := nl.DeserializeVfTxRate(element.Value[:]) vf.TxRate = int(txr.Rate) @@ -2852,16 +3678,35 @@ func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo { vfr := nl.DeserializeVfRate(element.Value[:]) vf.MaxTxRate = vfr.MaxTxRate vf.MinTxRate = vfr.MinTxRate + case nl.IFLA_VF_STATS: + vfstats := nl.DeserializeVfStats(element.Value[:]) + vf.RxPackets = vfstats.RxPackets + vf.TxPackets = vfstats.TxPackets + vf.RxBytes = vfstats.RxBytes + vf.TxBytes = vfstats.TxBytes + vf.Multicast = vfstats.Multicast + vf.Broadcast = vfstats.Broadcast + vf.RxDropped = vfstats.RxDropped + vf.TxDropped = vfstats.TxDropped + + case nl.IFLA_VF_RSS_QUERY_EN: + result := nl.DeserializeVfRssQueryEn(element.Value) + vf.RssQuery = result.Setting + + case nl.IFLA_VF_TRUST: + result := nl.DeserializeVfTrust(element.Value) + vf.Trust = result.Setting } } - return vf + return vf, nil } func addXfrmiAttrs(xfrmi *Xfrmi, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) data.AddRtAttr(nl.IFLA_XFRM_LINK, nl.Uint32Attr(uint32(xfrmi.ParentIndex))) - data.AddRtAttr(nl.IFLA_XFRM_IF_ID, nl.Uint32Attr(xfrmi.Ifid)) - + if xfrmi.Ifid != 0 { + data.AddRtAttr(nl.IFLA_XFRM_IF_ID, nl.Uint32Attr(xfrmi.Ifid)) + } } func parseXfrmiData(link Link, data []syscall.NetlinkRouteAttr) { @@ -2876,8 +3721,7 @@ func parseXfrmiData(link Link, data []syscall.NetlinkRouteAttr) { } } -// LinkSetBondSlave add slave to bond link via ioctl interface. -func LinkSetBondSlave(link Link, master *Bond) error { +func ioctlBondSlave(cmd uintptr, link Link, master *Bond) error { fd, err := getSocketUDP() if err != nil { return err @@ -2885,10 +3729,38 @@ func LinkSetBondSlave(link Link, master *Bond) error { defer syscall.Close(fd) ifreq := newIocltSlaveReq(link.Attrs().Name, master.Attrs().Name) - - _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), unix.SIOCBONDENSLAVE, uintptr(unsafe.Pointer(ifreq))) + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), cmd, uintptr(unsafe.Pointer(ifreq))) if errno != 0 { - return fmt.Errorf("Failed to enslave %q to %q, errno=%v", link.Attrs().Name, master.Attrs().Name, errno) + return fmt.Errorf("errno=%v", errno) + } + return nil +} + +// LinkSetBondSlaveActive sets specified slave to ACTIVE in an `active-backup` bond link via ioctl interface. +// +// Multiple calls keeps the status unchanged(shown in the unit test). +func LinkSetBondSlaveActive(link Link, master *Bond) error { + err := ioctlBondSlave(unix.SIOCBONDCHANGEACTIVE, link, master) + if err != nil { + return fmt.Errorf("Failed to set slave %q active in %q, %v", link.Attrs().Name, master.Attrs().Name, err) + } + return nil +} + +// LinkSetBondSlave add slave to bond link via ioctl interface. +func LinkSetBondSlave(link Link, master *Bond) error { + err := ioctlBondSlave(unix.SIOCBONDENSLAVE, link, master) + if err != nil { + return fmt.Errorf("Failed to enslave %q to %q, %v", link.Attrs().Name, master.Attrs().Name, err) + } + return nil +} + +// LinkSetBondSlave removes specified slave from bond link via ioctl interface. +func LinkDelBondSlave(link Link, master *Bond) error { + err := ioctlBondSlave(unix.SIOCBONDRELEASE, link, master) + if err != nil { + return fmt.Errorf("Failed to del slave %q from %q, %v", link.Attrs().Name, master.Attrs().Name, err) } return nil } @@ -3010,9 +3882,86 @@ func parseIPoIBData(link Link, data []syscall.NetlinkRouteAttr) { } } +func parseCanData(link Link, data []syscall.NetlinkRouteAttr) { + can := link.(*Can) + for _, datum := range data { + + switch datum.Attr.Type { + case nl.IFLA_CAN_BITTIMING: + can.BitRate = native.Uint32(datum.Value) + can.SamplePoint = native.Uint32(datum.Value[4:]) + can.TimeQuanta = native.Uint32(datum.Value[8:]) + can.PropagationSegment = native.Uint32(datum.Value[12:]) + can.PhaseSegment1 = native.Uint32(datum.Value[16:]) + can.PhaseSegment2 = native.Uint32(datum.Value[20:]) + can.SyncJumpWidth = native.Uint32(datum.Value[24:]) + can.BitRatePreScaler = native.Uint32(datum.Value[28:]) + case nl.IFLA_CAN_BITTIMING_CONST: + can.Name = string(datum.Value[:16]) + can.TimeSegment1Min = native.Uint32(datum.Value[16:]) + can.TimeSegment1Max = native.Uint32(datum.Value[20:]) + can.TimeSegment2Min = native.Uint32(datum.Value[24:]) + can.TimeSegment2Max = native.Uint32(datum.Value[28:]) + can.SyncJumpWidthMax = native.Uint32(datum.Value[32:]) + can.BitRatePreScalerMin = native.Uint32(datum.Value[36:]) + can.BitRatePreScalerMax = native.Uint32(datum.Value[40:]) + can.BitRatePreScalerInc = native.Uint32(datum.Value[44:]) + case nl.IFLA_CAN_CLOCK: + can.ClockFrequency = native.Uint32(datum.Value) + case nl.IFLA_CAN_STATE: + can.State = native.Uint32(datum.Value) + case nl.IFLA_CAN_CTRLMODE: + can.Mask = native.Uint32(datum.Value) + can.Flags = native.Uint32(datum.Value[4:]) + case nl.IFLA_CAN_BERR_COUNTER: + can.TxError = native.Uint16(datum.Value) + can.RxError = native.Uint16(datum.Value[2:]) + case nl.IFLA_CAN_RESTART_MS: + can.RestartMs = native.Uint32(datum.Value) + case nl.IFLA_CAN_DATA_BITTIMING_CONST: + case nl.IFLA_CAN_RESTART: + case nl.IFLA_CAN_DATA_BITTIMING: + case nl.IFLA_CAN_TERMINATION: + case nl.IFLA_CAN_TERMINATION_CONST: + case nl.IFLA_CAN_BITRATE_CONST: + case nl.IFLA_CAN_DATA_BITRATE_CONST: + case nl.IFLA_CAN_BITRATE_MAX: + } + } +} + func addIPoIBAttrs(ipoib *IPoIB, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) data.AddRtAttr(nl.IFLA_IPOIB_PKEY, nl.Uint16Attr(uint16(ipoib.Pkey))) data.AddRtAttr(nl.IFLA_IPOIB_MODE, nl.Uint16Attr(uint16(ipoib.Mode))) data.AddRtAttr(nl.IFLA_IPOIB_UMCAST, nl.Uint16Attr(uint16(ipoib.Umcast))) } + +func addBareUDPAttrs(bareudp *BareUDP, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + data.AddRtAttr(nl.IFLA_BAREUDP_PORT, nl.Uint16Attr(nl.Swap16(bareudp.Port))) + data.AddRtAttr(nl.IFLA_BAREUDP_ETHERTYPE, nl.Uint16Attr(nl.Swap16(bareudp.EtherType))) + if bareudp.SrcPortMin != 0 { + data.AddRtAttr(nl.IFLA_BAREUDP_SRCPORT_MIN, nl.Uint16Attr(bareudp.SrcPortMin)) + } + if bareudp.MultiProto { + data.AddRtAttr(nl.IFLA_BAREUDP_MULTIPROTO_MODE, []byte{}) + } +} + +func parseBareUDPData(link Link, data []syscall.NetlinkRouteAttr) { + bareudp := link.(*BareUDP) + for _, attr := range data { + switch attr.Attr.Type { + case nl.IFLA_BAREUDP_PORT: + bareudp.Port = binary.BigEndian.Uint16(attr.Value) + case nl.IFLA_BAREUDP_ETHERTYPE: + bareudp.EtherType = binary.BigEndian.Uint16(attr.Value) + case nl.IFLA_BAREUDP_SRCPORT_MIN: + bareudp.SrcPortMin = native.Uint16(attr.Value) + case nl.IFLA_BAREUDP_MULTIPROTO_MODE: + bareudp.MultiProto = true + } + } +} diff --git a/vendor/github.com/vishvananda/netlink/neigh.go b/vendor/github.com/vishvananda/netlink/neigh.go index 379e5655f..32d722e88 100644 --- a/vendor/github.com/vishvananda/netlink/neigh.go +++ b/vendor/github.com/vishvananda/netlink/neigh.go @@ -12,6 +12,7 @@ type Neigh struct { State int Type int Flags int + FlagsExt int IP net.IP HardwareAddr net.HardwareAddr LLIPAddr net.IP //Used in the case of NHRP diff --git a/vendor/github.com/vishvananda/netlink/neigh_linux.go b/vendor/github.com/vishvananda/netlink/neigh_linux.go index cb3b55d35..2d93044a6 100644 --- a/vendor/github.com/vishvananda/netlink/neigh_linux.go +++ b/vendor/github.com/vishvananda/netlink/neigh_linux.go @@ -24,7 +24,11 @@ const ( NDA_MASTER NDA_LINK_NETNSID NDA_SRC_VNI - NDA_MAX = NDA_SRC_VNI + NDA_PROTOCOL + NDA_NH_ID + NDA_FDB_EXT_ATTRS + NDA_FLAGS_EXT + NDA_MAX = NDA_FLAGS_EXT ) // Neighbor Cache Entry States. @@ -42,11 +46,19 @@ const ( // Neighbor Flags const ( - NTF_USE = 0x01 - NTF_SELF = 0x02 - NTF_MASTER = 0x04 - NTF_PROXY = 0x08 - NTF_ROUTER = 0x80 + NTF_USE = 0x01 + NTF_SELF = 0x02 + NTF_MASTER = 0x04 + NTF_PROXY = 0x08 + NTF_EXT_LEARNED = 0x10 + NTF_OFFLOADED = 0x20 + NTF_STICKY = 0x40 + NTF_ROUTER = 0x80 +) + +// Extended Neighbor Flags +const ( + NTF_EXT_MANAGED = 0x00000001 ) // Ndmsg is for adding, removing or receiving information about a neighbor table entry @@ -162,11 +174,16 @@ func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error { if neigh.LLIPAddr != nil { llIPData := nl.NewRtAttr(NDA_LLADDR, neigh.LLIPAddr.To4()) req.AddData(llIPData) - } else if neigh.Flags != NTF_PROXY || neigh.HardwareAddr != nil { + } else if neigh.HardwareAddr != nil { hwData := nl.NewRtAttr(NDA_LLADDR, []byte(neigh.HardwareAddr)) req.AddData(hwData) } + if neigh.FlagsExt != 0 { + flagsExtData := nl.NewRtAttr(NDA_FLAGS_EXT, nl.Uint32Attr(uint32(neigh.FlagsExt))) + req.AddData(flagsExtData) + } + if neigh.Vlan != 0 { vlanData := nl.NewRtAttr(NDA_VLAN, nl.Uint16Attr(uint16(neigh.Vlan))) req.AddData(vlanData) @@ -243,6 +260,18 @@ func (h *Handle) NeighListExecute(msg Ndmsg) ([]Neigh, error) { // Ignore messages from other interfaces continue } + if msg.Family != 0 && ndm.Family != msg.Family { + continue + } + if msg.State != 0 && ndm.State != msg.State { + continue + } + if msg.Type != 0 && ndm.Type != msg.Type { + continue + } + if msg.Flags != 0 && ndm.Flags != msg.Flags { + continue + } neigh, err := NeighDeserialize(m) if err != nil { @@ -293,6 +322,8 @@ func NeighDeserialize(m []byte) (*Neigh, error) { } else { neigh.HardwareAddr = net.HardwareAddr(attr.Value) } + case NDA_FLAGS_EXT: + neigh.FlagsExt = int(native.Uint32(attr.Value[0:4])) case NDA_VLAN: neigh.Vlan = int(native.Uint16(attr.Value[0:2])) case NDA_VNI: @@ -308,13 +339,13 @@ func NeighDeserialize(m []byte) (*Neigh, error) { // NeighSubscribe takes a chan down which notifications will be sent // when neighbors are added or deleted. Close the 'done' chan to stop subscription. func NeighSubscribe(ch chan<- NeighUpdate, done <-chan struct{}) error { - return neighSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) + return neighSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false) } // NeighSubscribeAt works like NeighSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func NeighSubscribeAt(ns netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}) error { - return neighSubscribeAt(ns, netns.None(), ch, done, nil, false) + return neighSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false) } // NeighSubscribeOptions contains a set of options to use with @@ -323,6 +354,11 @@ type NeighSubscribeOptions struct { Namespace *netns.NsHandle ErrorCallback func(error) ListExisting bool + + // max size is based on value of /proc/sys/net/core/rmem_max + ReceiveBufferSize int + ReceiveBufferForceSize bool + ReceiveTimeout *unix.Timeval } // NeighSubscribeWithOptions work like NeighSubscribe but enable to @@ -333,16 +369,17 @@ func NeighSubscribeWithOptions(ch chan<- NeighUpdate, done <-chan struct{}, opti none := netns.None() options.Namespace = &none } - return neighSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) + return neighSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, + options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize) } -func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { +func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}, cberr func(error), listExisting bool, + rcvbuf int, rcvTimeout *unix.Timeval, rcvbufForce bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH) makeRequest := func(family int) error { - req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH, - unix.NLM_F_DUMP) - infmsg := nl.NewIfInfomsg(family) - req.AddData(infmsg) + req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH, unix.NLM_F_DUMP) + ndmsg := &Ndmsg{Family: uint8(family)} + req.AddData(ndmsg) if err := s.Send(req); err != nil { return err } @@ -351,6 +388,17 @@ func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done < if err != nil { return err } + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } + } + if rcvbuf != 0 { + err = s.SetReceiveBufferSize(rcvbuf, rcvbufForce) + if err != nil { + return err + } + } if done != nil { go func() { <-done @@ -396,13 +444,12 @@ func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done < continue } if m.Header.Type == unix.NLMSG_ERROR { - native := nl.NativeEndian() - error := int32(native.Uint32(m.Data[0:4])) - if error == 0 { + nError := int32(native.Uint32(m.Data[0:4])) + if nError == 0 { continue } if cberr != nil { - cberr(syscall.Errno(-error)) + cberr(syscall.Errno(-nError)) } return } diff --git a/vendor/github.com/vishvananda/netlink/netlink_unspecified.go b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go index 42d3acf91..da12c42a5 100644 --- a/vendor/github.com/vishvananda/netlink/netlink_unspecified.go +++ b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go @@ -16,7 +16,7 @@ func LinkSetMTU(link Link, mtu int) error { return ErrNotImplemented } -func LinkSetMaster(link Link, master *Bridge) error { +func LinkSetMaster(link Link, master Link) error { return ErrNotImplemented } @@ -52,6 +52,10 @@ func LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { return ErrNotImplemented } +func LinkSetVfVlanQosProto(link Link, vf, vlan, qos, proto int) error { + return ErrNotImplemented +} + func LinkSetVfTxRate(link Link, vf, rate int) error { return ErrNotImplemented } @@ -72,6 +76,10 @@ func LinkSetXdpFd(link Link, fd int) error { return ErrNotImplemented } +func LinkSetXdpFdWithFlags(link Link, fd, flags int) error { + return ErrNotImplemented +} + func LinkSetARPOff(link Link) error { return ErrNotImplemented } @@ -120,6 +128,22 @@ func LinkSetTxQLen(link Link, qlen int) error { return ErrNotImplemented } +func LinkSetGSOMaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func LinkSetGROMaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func LinkSetGSOIPv4MaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func LinkSetGROIPv4MaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + func LinkAdd(link Link) error { return ErrNotImplemented } @@ -176,14 +200,34 @@ func RouteAdd(route *Route) error { return ErrNotImplemented } +func RouteAppend(route *Route) error { + return ErrNotImplemented +} + +func RouteChange(route *Route) error { + return ErrNotImplemented +} + func RouteDel(route *Route) error { return ErrNotImplemented } +func RouteGet(destination net.IP) ([]Route, error) { + return nil, ErrNotImplemented +} + func RouteList(link Link, family int) ([]Route, error) { return nil, ErrNotImplemented } +func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) { + return nil, ErrNotImplemented +} + +func RouteReplace(route *Route) error { + return ErrNotImplemented +} + func XfrmPolicyAdd(policy *XfrmPolicy) error { return ErrNotImplemented } @@ -196,6 +240,10 @@ func XfrmPolicyList(family int) ([]XfrmPolicy, error) { return nil, ErrNotImplemented } +func XfrmPolicyGet(policy *XfrmPolicy) (*XfrmPolicy, error) { + return nil, ErrNotImplemented +} + func XfrmStateAdd(policy *XfrmState) error { return ErrNotImplemented } @@ -235,3 +283,7 @@ func NeighDeserialize(m []byte) (*Neigh, error) { func SocketGet(local, remote net.Addr) (*Socket, error) { return nil, ErrNotImplemented } + +func SocketDestroy(local, remote net.Addr) (*Socket, error) { + return nil, ErrNotImplemented +} diff --git a/vendor/github.com/vishvananda/netlink/netns_linux.go b/vendor/github.com/vishvananda/netlink/netns_linux.go index 77cf6f469..2eb29c7ce 100644 --- a/vendor/github.com/vishvananda/netlink/netns_linux.go +++ b/vendor/github.com/vishvananda/netlink/netns_linux.go @@ -87,7 +87,7 @@ func (h *Handle) getNetNsId(attrType int, val uint32) (int, error) { rtgen := nl.NewRtGenMsg() req.AddData(rtgen) - b := make([]byte, 4, 4) + b := make([]byte, 4) native.PutUint32(b, val) attr := nl.NewRtAttr(attrType, b) req.AddData(attr) @@ -126,12 +126,12 @@ func (h *Handle) setNetNsId(attrType int, val uint32, newnsid uint32) error { rtgen := nl.NewRtGenMsg() req.AddData(rtgen) - b := make([]byte, 4, 4) + b := make([]byte, 4) native.PutUint32(b, val) attr := nl.NewRtAttr(attrType, b) req.AddData(attr) - b1 := make([]byte, 4, 4) + b1 := make([]byte, 4) native.PutUint32(b1, newnsid) attr1 := nl.NewRtAttr(NETNSA_NSID, b1) req.AddData(attr1) diff --git a/vendor/github.com/vishvananda/netlink/nl/addr_linux.go b/vendor/github.com/vishvananda/netlink/nl/addr_linux.go index 50db3b4cd..6bea4ed02 100644 --- a/vendor/github.com/vishvananda/netlink/nl/addr_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/addr_linux.go @@ -54,24 +54,18 @@ func (msg *IfAddrmsg) Len() int { // __u32 tstamp; /* updated timestamp, hundredths of seconds */ // }; -const IFA_CACHEINFO = 6 -const SizeofIfaCacheInfo = 0x10 - type IfaCacheInfo struct { - IfaPrefered uint32 - IfaValid uint32 - Cstamp uint32 - Tstamp uint32 + unix.IfaCacheinfo } func (msg *IfaCacheInfo) Len() int { - return SizeofIfaCacheInfo + return unix.SizeofIfaCacheinfo } func DeserializeIfaCacheInfo(b []byte) *IfaCacheInfo { - return (*IfaCacheInfo)(unsafe.Pointer(&b[0:SizeofIfaCacheInfo][0])) + return (*IfaCacheInfo)(unsafe.Pointer(&b[0:unix.SizeofIfaCacheinfo][0])) } func (msg *IfaCacheInfo) Serialize() []byte { - return (*(*[SizeofIfaCacheInfo]byte)(unsafe.Pointer(msg)))[:] + return (*(*[unix.SizeofIfaCacheinfo]byte)(unsafe.Pointer(msg)))[:] } diff --git a/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go b/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go index 79d2b6b89..6989d1edc 100644 --- a/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go @@ -15,6 +15,38 @@ var L4ProtoMap = map[uint8]string{ 17: "udp", } +// From https://git.netfilter.org/libnetfilter_conntrack/tree/include/libnetfilter_conntrack/libnetfilter_conntrack_tcp.h +// enum tcp_state { +// TCP_CONNTRACK_NONE, +// TCP_CONNTRACK_SYN_SENT, +// TCP_CONNTRACK_SYN_RECV, +// TCP_CONNTRACK_ESTABLISHED, +// TCP_CONNTRACK_FIN_WAIT, +// TCP_CONNTRACK_CLOSE_WAIT, +// TCP_CONNTRACK_LAST_ACK, +// TCP_CONNTRACK_TIME_WAIT, +// TCP_CONNTRACK_CLOSE, +// TCP_CONNTRACK_LISTEN, /* obsolete */ +// #define TCP_CONNTRACK_SYN_SENT2 TCP_CONNTRACK_LISTEN +// TCP_CONNTRACK_MAX, +// TCP_CONNTRACK_IGNORE +// }; +const ( + TCP_CONNTRACK_NONE = 0 + TCP_CONNTRACK_SYN_SENT = 1 + TCP_CONNTRACK_SYN_RECV = 2 + TCP_CONNTRACK_ESTABLISHED = 3 + TCP_CONNTRACK_FIN_WAIT = 4 + TCP_CONNTRACK_CLOSE_WAIT = 5 + TCP_CONNTRACK_LAST_ACK = 6 + TCP_CONNTRACK_TIME_WAIT = 7 + TCP_CONNTRACK_CLOSE = 8 + TCP_CONNTRACK_LISTEN = 9 + TCP_CONNTRACK_SYN_SENT2 = 9 + TCP_CONNTRACK_MAX = 10 + TCP_CONNTRACK_IGNORE = 11 +) + // All the following constants are coming from: // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -31,6 +63,7 @@ var L4ProtoMap = map[uint8]string{ // IPCTNL_MSG_MAX // }; const ( + IPCTNL_MSG_CT_NEW = 0 IPCTNL_MSG_CT_GET = 1 IPCTNL_MSG_CT_DELETE = 2 ) @@ -40,9 +73,11 @@ const ( NFNETLINK_V0 = 0 ) -// #define NLA_F_NESTED (1 << 15) const ( - NLA_F_NESTED = (1 << 15) + NLA_F_NESTED uint16 = (1 << 15) // #define NLA_F_NESTED (1 << 15) + NLA_F_NET_BYTEORDER uint16 = (1 << 14) // #define NLA_F_NESTED (1 << 14) + NLA_TYPE_MASK = ^(NLA_F_NESTED | NLA_F_NET_BYTEORDER) + NLA_ALIGNTO uint16 = 4 // #define NLA_ALIGNTO 4 ) // enum ctattr_type { @@ -86,7 +121,10 @@ const ( CTA_COUNTERS_REPLY = 10 CTA_USE = 11 CTA_ID = 12 + CTA_ZONE = 18 CTA_TIMESTAMP = 20 + CTA_LABELS = 22 + CTA_LABELS_MASK = 23 ) // enum ctattr_tuple { @@ -147,7 +185,10 @@ const ( // }; // #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) const ( + CTA_PROTOINFO_UNSPEC = 0 CTA_PROTOINFO_TCP = 1 + CTA_PROTOINFO_DCCP = 2 + CTA_PROTOINFO_SCTP = 3 ) // enum ctattr_protoinfo_tcp { diff --git a/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go b/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go index db66faaad..956367b29 100644 --- a/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go @@ -9,17 +9,56 @@ const ( ) const ( - DEVLINK_CMD_GET = 1 - DEVLINK_CMD_ESWITCH_GET = 29 - DEVLINK_CMD_ESWITCH_SET = 30 + DEVLINK_CMD_GET = 1 + DEVLINK_CMD_PORT_GET = 5 + DEVLINK_CMD_PORT_SET = 6 + DEVLINK_CMD_PORT_NEW = 7 + DEVLINK_CMD_PORT_DEL = 8 + DEVLINK_CMD_ESWITCH_GET = 29 + DEVLINK_CMD_ESWITCH_SET = 30 + DEVLINK_CMD_RESOURCE_DUMP = 36 + DEVLINK_CMD_PARAM_GET = 38 + DEVLINK_CMD_PARAM_SET = 39 + DEVLINK_CMD_INFO_GET = 51 ) const ( - DEVLINK_ATTR_BUS_NAME = 1 - DEVLINK_ATTR_DEV_NAME = 2 - DEVLINK_ATTR_ESWITCH_MODE = 25 - DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26 - DEVLINK_ATTR_ESWITCH_ENCAP_MODE = 62 + DEVLINK_ATTR_BUS_NAME = 1 + DEVLINK_ATTR_DEV_NAME = 2 + DEVLINK_ATTR_PORT_INDEX = 3 + DEVLINK_ATTR_PORT_TYPE = 4 + DEVLINK_ATTR_PORT_NETDEV_IFINDEX = 6 + DEVLINK_ATTR_PORT_NETDEV_NAME = 7 + DEVLINK_ATTR_PORT_IBDEV_NAME = 8 + DEVLINK_ATTR_ESWITCH_MODE = 25 + DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26 + DEVLINK_ATTR_ESWITCH_ENCAP_MODE = 62 + DEVLINK_ATTR_RESOURCE_LIST = 63 /* nested */ + DEVLINK_ATTR_RESOURCE = 64 /* nested */ + DEVLINK_ATTR_RESOURCE_NAME = 65 /* string */ + DEVLINK_ATTR_RESOURCE_ID = 66 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE = 67 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_NEW = 68 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_VALID = 69 /* u8 */ + DEVLINK_ATTR_RESOURCE_SIZE_MIN = 70 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_MAX = 71 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_GRAN = 72 /* u64 */ + DEVLINK_ATTR_RESOURCE_UNIT = 73 /* u8 */ + DEVLINK_ATTR_RESOURCE_OCC = 74 /* u64 */ + DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID = 75 /* u64 */ + DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS = 76 /* u64 */ + DEVLINK_ATTR_PORT_FLAVOUR = 77 + DEVLINK_ATTR_INFO_DRIVER_NAME = 98 + DEVLINK_ATTR_INFO_SERIAL_NUMBER = 99 + DEVLINK_ATTR_INFO_VERSION_FIXED = 100 + DEVLINK_ATTR_INFO_VERSION_RUNNING = 101 + DEVLINK_ATTR_INFO_VERSION_STORED = 102 + DEVLINK_ATTR_INFO_VERSION_NAME = 103 + DEVLINK_ATTR_INFO_VERSION_VALUE = 104 + DEVLINK_ATTR_PORT_PCI_PF_NUMBER = 127 + DEVLINK_ATTR_PORT_FUNCTION = 145 + DEVLINK_ATTR_PORT_CONTROLLER_NUMBER = 150 + DEVLINK_ATTR_PORT_PCI_SF_NUMBER = 164 ) const ( @@ -38,3 +77,66 @@ const ( DEVLINK_ESWITCH_ENCAP_MODE_NONE = 0 DEVLINK_ESWITCH_ENCAP_MODE_BASIC = 1 ) + +const ( + DEVLINK_PORT_FLAVOUR_PHYSICAL = 0 + DEVLINK_PORT_FLAVOUR_CPU = 1 + DEVLINK_PORT_FLAVOUR_DSA = 2 + DEVLINK_PORT_FLAVOUR_PCI_PF = 3 + DEVLINK_PORT_FLAVOUR_PCI_VF = 4 + DEVLINK_PORT_FLAVOUR_VIRTUAL = 5 + DEVLINK_PORT_FLAVOUR_UNUSED = 6 + DEVLINK_PORT_FLAVOUR_PCI_SF = 7 +) + +const ( + DEVLINK_PORT_TYPE_NOTSET = 0 + DEVLINK_PORT_TYPE_AUTO = 1 + DEVLINK_PORT_TYPE_ETH = 2 + DEVLINK_PORT_TYPE_IB = 3 +) + +const ( + DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR = 1 + DEVLINK_PORT_FN_ATTR_STATE = 2 + DEVLINK_PORT_FN_ATTR_OPSTATE = 3 +) + +const ( + DEVLINK_PORT_FN_STATE_INACTIVE = 0 + DEVLINK_PORT_FN_STATE_ACTIVE = 1 +) + +const ( + DEVLINK_PORT_FN_OPSTATE_DETACHED = 0 + DEVLINK_PORT_FN_OPSTATE_ATTACHED = 1 +) + +const ( + DEVLINK_RESOURCE_UNIT_ENTRY uint8 = 0 +) + +const ( + DEVLINK_ATTR_PARAM = iota + 80 /* nested */ + DEVLINK_ATTR_PARAM_NAME /* string */ + DEVLINK_ATTR_PARAM_GENERIC /* flag */ + DEVLINK_ATTR_PARAM_TYPE /* u8 */ + DEVLINK_ATTR_PARAM_VALUES_LIST /* nested */ + DEVLINK_ATTR_PARAM_VALUE /* nested */ + DEVLINK_ATTR_PARAM_VALUE_DATA /* dynamic */ + DEVLINK_ATTR_PARAM_VALUE_CMODE /* u8 */ +) + +const ( + DEVLINK_PARAM_TYPE_U8 = 1 + DEVLINK_PARAM_TYPE_U16 = 2 + DEVLINK_PARAM_TYPE_U32 = 3 + DEVLINK_PARAM_TYPE_STRING = 5 + DEVLINK_PARAM_TYPE_BOOL = 6 +) + +const ( + DEVLINK_PARAM_CMODE_RUNTIME = iota + DEVLINK_PARAM_CMODE_DRIVERINIT + DEVLINK_PARAM_CMODE_PERMANENT +) diff --git a/vendor/github.com/vishvananda/netlink/nl/ip6tnl_linux.go b/vendor/github.com/vishvananda/netlink/nl/ip6tnl_linux.go new file mode 100644 index 000000000..d5dd69e0c --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/ip6tnl_linux.go @@ -0,0 +1,21 @@ +package nl + +// id's of route attribute from https://elixir.bootlin.com/linux/v5.17.3/source/include/uapi/linux/lwtunnel.h#L38 +// the value's size are specified in https://elixir.bootlin.com/linux/v5.17.3/source/net/ipv4/ip_tunnel_core.c#L928 + +const ( + LWTUNNEL_IP6_UNSPEC = iota + LWTUNNEL_IP6_ID + LWTUNNEL_IP6_DST + LWTUNNEL_IP6_SRC + LWTUNNEL_IP6_HOPLIMIT + LWTUNNEL_IP6_TC + LWTUNNEL_IP6_FLAGS + LWTUNNEL_IP6_PAD // not implemented + LWTUNNEL_IP6_OPTS // not implemented + __LWTUNNEL_IP6_MAX +) + + + + diff --git a/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go b/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go new file mode 100644 index 000000000..89dd009df --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go @@ -0,0 +1,227 @@ +package nl + +import ( + "strconv" + + "golang.org/x/sys/unix" +) + +const ( + /* The protocol version */ + IPSET_PROTOCOL = 6 + + /* The max length of strings including NUL: set and type identifiers */ + IPSET_MAXNAMELEN = 32 + + /* The maximum permissible comment length we will accept over netlink */ + IPSET_MAX_COMMENT_SIZE = 255 +) + +const ( + _ = iota + IPSET_CMD_PROTOCOL /* 1: Return protocol version */ + IPSET_CMD_CREATE /* 2: Create a new (empty) set */ + IPSET_CMD_DESTROY /* 3: Destroy a (empty) set */ + IPSET_CMD_FLUSH /* 4: Remove all elements from a set */ + IPSET_CMD_RENAME /* 5: Rename a set */ + IPSET_CMD_SWAP /* 6: Swap two sets */ + IPSET_CMD_LIST /* 7: List sets */ + IPSET_CMD_SAVE /* 8: Save sets */ + IPSET_CMD_ADD /* 9: Add an element to a set */ + IPSET_CMD_DEL /* 10: Delete an element from a set */ + IPSET_CMD_TEST /* 11: Test an element in a set */ + IPSET_CMD_HEADER /* 12: Get set header data only */ + IPSET_CMD_TYPE /* 13: Get set type */ +) + +/* Attributes at command level */ +const ( + _ = iota + IPSET_ATTR_PROTOCOL /* 1: Protocol version */ + IPSET_ATTR_SETNAME /* 2: Name of the set */ + IPSET_ATTR_TYPENAME /* 3: Typename */ + IPSET_ATTR_REVISION /* 4: Settype revision */ + IPSET_ATTR_FAMILY /* 5: Settype family */ + IPSET_ATTR_FLAGS /* 6: Flags at command level */ + IPSET_ATTR_DATA /* 7: Nested attributes */ + IPSET_ATTR_ADT /* 8: Multiple data containers */ + IPSET_ATTR_LINENO /* 9: Restore lineno */ + IPSET_ATTR_PROTOCOL_MIN /* 10: Minimal supported version number */ + + IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME /* Setname at rename/swap */ + IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN /* type rev min */ +) + +/* CADT specific attributes */ +const ( + IPSET_ATTR_IP = 1 + IPSET_ATTR_IP_FROM = 1 + IPSET_ATTR_IP_TO = 2 + IPSET_ATTR_CIDR = 3 + IPSET_ATTR_PORT = 4 + IPSET_ATTR_PORT_FROM = 4 + IPSET_ATTR_PORT_TO = 5 + IPSET_ATTR_TIMEOUT = 6 + IPSET_ATTR_PROTO = 7 + IPSET_ATTR_CADT_FLAGS = 8 + IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO /* 9 */ + IPSET_ATTR_MARK = 10 + IPSET_ATTR_MARKMASK = 11 + + /* Reserve empty slots */ + IPSET_ATTR_CADT_MAX = 16 + + /* Create-only specific attributes */ + IPSET_ATTR_GC = 3 + iota + IPSET_ATTR_HASHSIZE + IPSET_ATTR_MAXELEM + IPSET_ATTR_NETMASK + IPSET_ATTR_PROBES + IPSET_ATTR_RESIZE + IPSET_ATTR_SIZE + + /* Kernel-only */ + IPSET_ATTR_ELEMENTS + IPSET_ATTR_REFERENCES + IPSET_ATTR_MEMSIZE + + SET_ATTR_CREATE_MAX +) + +const ( + IPSET_ATTR_IPADDR_IPV4 = 1 + IPSET_ATTR_IPADDR_IPV6 = 2 +) + +/* ADT specific attributes */ +const ( + IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + iota + 1 + IPSET_ATTR_NAME + IPSET_ATTR_NAMEREF + IPSET_ATTR_IP2 + IPSET_ATTR_CIDR2 + IPSET_ATTR_IP2_TO + IPSET_ATTR_IFACE + IPSET_ATTR_BYTES + IPSET_ATTR_PACKETS + IPSET_ATTR_COMMENT + IPSET_ATTR_SKBMARK + IPSET_ATTR_SKBPRIO + IPSET_ATTR_SKBQUEUE +) + +/* Flags at CADT attribute level, upper half of cmdattrs */ +const ( + IPSET_FLAG_BIT_BEFORE = 0 + IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE) + IPSET_FLAG_BIT_PHYSDEV = 1 + IPSET_FLAG_PHYSDEV = (1 << IPSET_FLAG_BIT_PHYSDEV) + IPSET_FLAG_BIT_NOMATCH = 2 + IPSET_FLAG_NOMATCH = (1 << IPSET_FLAG_BIT_NOMATCH) + IPSET_FLAG_BIT_WITH_COUNTERS = 3 + IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS) + IPSET_FLAG_BIT_WITH_COMMENT = 4 + IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT) + IPSET_FLAG_BIT_WITH_FORCEADD = 5 + IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD) + IPSET_FLAG_BIT_WITH_SKBINFO = 6 + IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO) + IPSET_FLAG_CADT_MAX = 15 +) + +const ( + IPSET_ERR_PRIVATE = 4096 + iota + IPSET_ERR_PROTOCOL + IPSET_ERR_FIND_TYPE + IPSET_ERR_MAX_SETS + IPSET_ERR_BUSY + IPSET_ERR_EXIST_SETNAME2 + IPSET_ERR_TYPE_MISMATCH + IPSET_ERR_EXIST + IPSET_ERR_INVALID_CIDR + IPSET_ERR_INVALID_NETMASK + IPSET_ERR_INVALID_FAMILY + IPSET_ERR_TIMEOUT + IPSET_ERR_REFERENCED + IPSET_ERR_IPADDR_IPV4 + IPSET_ERR_IPADDR_IPV6 + IPSET_ERR_COUNTER + IPSET_ERR_COMMENT + IPSET_ERR_INVALID_MARKMASK + IPSET_ERR_SKBINFO + + /* Type specific error codes */ + IPSET_ERR_TYPE_SPECIFIC = 4352 +) + +type IPSetError uintptr + +func (e IPSetError) Error() string { + switch int(e) { + case IPSET_ERR_PRIVATE: + return "private" + case IPSET_ERR_PROTOCOL: + return "invalid protocol" + case IPSET_ERR_FIND_TYPE: + return "invalid type" + case IPSET_ERR_MAX_SETS: + return "max sets reached" + case IPSET_ERR_BUSY: + return "busy" + case IPSET_ERR_EXIST_SETNAME2: + return "exist_setname2" + case IPSET_ERR_TYPE_MISMATCH: + return "type mismatch" + case IPSET_ERR_EXIST: + return "exist" + case IPSET_ERR_INVALID_CIDR: + return "invalid cidr" + case IPSET_ERR_INVALID_NETMASK: + return "invalid netmask" + case IPSET_ERR_INVALID_FAMILY: + return "invalid family" + case IPSET_ERR_TIMEOUT: + return "timeout" + case IPSET_ERR_REFERENCED: + return "referenced" + case IPSET_ERR_IPADDR_IPV4: + return "invalid ipv4 address" + case IPSET_ERR_IPADDR_IPV6: + return "invalid ipv6 address" + case IPSET_ERR_COUNTER: + return "invalid counter" + case IPSET_ERR_COMMENT: + return "invalid comment" + case IPSET_ERR_INVALID_MARKMASK: + return "invalid markmask" + case IPSET_ERR_SKBINFO: + return "skbinfo" + default: + return "errno " + strconv.Itoa(int(e)) + } +} + +func GetIpsetFlags(cmd int) int { + switch cmd { + case IPSET_CMD_CREATE: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK | unix.NLM_F_CREATE + case IPSET_CMD_DESTROY, + IPSET_CMD_FLUSH, + IPSET_CMD_RENAME, + IPSET_CMD_SWAP, + IPSET_CMD_TEST: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK + case IPSET_CMD_LIST, + IPSET_CMD_SAVE: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK | unix.NLM_F_ROOT | unix.NLM_F_MATCH | unix.NLM_F_DUMP + case IPSET_CMD_ADD, + IPSET_CMD_DEL: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK + case IPSET_CMD_HEADER, + IPSET_CMD_TYPE, + IPSET_CMD_PROTOCOL: + return unix.NLM_F_REQUEST + default: + return 0 + } +} diff --git a/vendor/github.com/vishvananda/netlink/nl/link_linux.go b/vendor/github.com/vishvananda/netlink/nl/link_linux.go index afb16a9c1..0b5be470c 100644 --- a/vendor/github.com/vishvananda/netlink/nl/link_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/link_linux.go @@ -1,6 +1,9 @@ package nl import ( + "bytes" + "encoding/binary" + "fmt" "unsafe" ) @@ -28,6 +31,16 @@ const ( IFLA_VLAN_MAX = IFLA_VLAN_PROTOCOL ) +const ( + IFLA_NETKIT_UNSPEC = iota + IFLA_NETKIT_PEER_INFO + IFLA_NETKIT_PRIMARY + IFLA_NETKIT_POLICY + IFLA_NETKIT_PEER_POLICY + IFLA_NETKIT_MODE + IFLA_NETKIT_MAX = IFLA_NETKIT_MODE +) + const ( VETH_INFO_UNSPEC = iota VETH_INFO_PEER @@ -83,7 +96,37 @@ const ( IFLA_BRPORT_PROXYARP IFLA_BRPORT_LEARNING_SYNC IFLA_BRPORT_PROXYARP_WIFI - IFLA_BRPORT_MAX = IFLA_BRPORT_PROXYARP_WIFI + IFLA_BRPORT_ROOT_ID + IFLA_BRPORT_BRIDGE_ID + IFLA_BRPORT_DESIGNATED_PORT + IFLA_BRPORT_DESIGNATED_COST + IFLA_BRPORT_ID + IFLA_BRPORT_NO + IFLA_BRPORT_TOPOLOGY_CHANGE_ACK + IFLA_BRPORT_CONFIG_PENDING + IFLA_BRPORT_MESSAGE_AGE_TIMER + IFLA_BRPORT_FORWARD_DELAY_TIMER + IFLA_BRPORT_HOLD_TIMER + IFLA_BRPORT_FLUSH + IFLA_BRPORT_MULTICAST_ROUTER + IFLA_BRPORT_PAD + IFLA_BRPORT_MCAST_FLOOD + IFLA_BRPORT_MCAST_TO_UCAST + IFLA_BRPORT_VLAN_TUNNEL + IFLA_BRPORT_BCAST_FLOOD + IFLA_BRPORT_GROUP_FWD_MASK + IFLA_BRPORT_NEIGH_SUPPRESS + IFLA_BRPORT_ISOLATED + IFLA_BRPORT_BACKUP_PORT + IFLA_BRPORT_MRP_RING_OPEN + IFLA_BRPORT_MRP_IN_OPEN + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT + IFLA_BRPORT_LOCKED + IFLA_BRPORT_MAB + IFLA_BRPORT_MCAST_N_GROUPS + IFLA_BRPORT_MCAST_MAX_GROUPS + IFLA_BRPORT_MAX = IFLA_BRPORT_MCAST_MAX_GROUPS ) const ( @@ -101,7 +144,9 @@ const ( IFLA_MACVLAN_MACADDR IFLA_MACVLAN_MACADDR_DATA IFLA_MACVLAN_MACADDR_COUNT - IFLA_MACVLAN_MAX = IFLA_MACVLAN_FLAGS + IFLA_MACVLAN_BC_QUEUE_LEN + IFLA_MACVLAN_BC_QUEUE_LEN_USED + IFLA_MACVLAN_MAX = IFLA_MACVLAN_BC_QUEUE_LEN_USED ) const ( @@ -171,6 +216,25 @@ const ( IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE ) +const ( + IFLA_GENEVE_UNSPEC = iota + IFLA_GENEVE_ID // vni + IFLA_GENEVE_REMOTE + IFLA_GENEVE_TTL + IFLA_GENEVE_TOS + IFLA_GENEVE_PORT // destination port + IFLA_GENEVE_COLLECT_METADATA + IFLA_GENEVE_REMOTE6 + IFLA_GENEVE_UDP_CSUM + IFLA_GENEVE_UDP_ZERO_CSUM6_TX + IFLA_GENEVE_UDP_ZERO_CSUM6_RX + IFLA_GENEVE_LABEL + IFLA_GENEVE_TTL_INHERIT + IFLA_GENEVE_DF + IFLA_GENEVE_INNER_PROTO_INHERIT + IFLA_GENEVE_MAX = IFLA_GENEVE_INNER_PROTO_INHERIT +) + const ( IFLA_GRE_UNSPEC = iota IFLA_GRE_LINK @@ -226,7 +290,15 @@ const ( IFLA_VF_TRUST /* Trust state of VF */ IFLA_VF_IB_NODE_GUID /* VF Infiniband node GUID */ IFLA_VF_IB_PORT_GUID /* VF Infiniband port GUID */ - IFLA_VF_MAX = IFLA_VF_IB_PORT_GUID + IFLA_VF_VLAN_LIST /* nested list of vlans, option for QinQ */ + + IFLA_VF_MAX = IFLA_VF_IB_PORT_GUID +) + +const ( + IFLA_VF_VLAN_INFO_UNSPEC = iota + IFLA_VF_VLAN_INFO /* VLAN ID, QoS and VLAN protocol */ + __IFLA_VF_VLAN_INFO_MAX ) const ( @@ -243,12 +315,15 @@ const ( IFLA_VF_STATS_TX_BYTES IFLA_VF_STATS_BROADCAST IFLA_VF_STATS_MULTICAST - IFLA_VF_STATS_MAX = IFLA_VF_STATS_MULTICAST + IFLA_VF_STATS_RX_DROPPED + IFLA_VF_STATS_TX_DROPPED + IFLA_VF_STATS_MAX = IFLA_VF_STATS_TX_DROPPED ) const ( SizeofVfMac = 0x24 SizeofVfVlan = 0x0c + SizeofVfVlanInfo = 0x10 SizeofVfTxRate = 0x08 SizeofVfRate = 0x0c SizeofVfSpoofchk = 0x08 @@ -304,6 +379,49 @@ func (msg *VfVlan) Serialize() []byte { return (*(*[SizeofVfVlan]byte)(unsafe.Pointer(msg)))[:] } +func DeserializeVfVlanList(b []byte) ([]*VfVlanInfo, error) { + var vfVlanInfoList []*VfVlanInfo + attrs, err := ParseRouteAttr(b) + if err != nil { + return nil, err + } + + for _, element := range attrs { + if element.Attr.Type == IFLA_VF_VLAN_INFO { + vfVlanInfoList = append(vfVlanInfoList, DeserializeVfVlanInfo(element.Value)) + } + } + + if len(vfVlanInfoList) == 0 { + return nil, fmt.Errorf("VF vlan list is defined but no vf vlan info elements were found") + } + + return vfVlanInfoList, nil +} + +// struct ifla_vf_vlan_info { +// __u32 vf; +// __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ +// __u32 qos; +// __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ +// }; + +type VfVlanInfo struct { + VfVlan + VlanProto uint16 +} + +func DeserializeVfVlanInfo(b []byte) *VfVlanInfo { + return &VfVlanInfo{ + *(*VfVlan)(unsafe.Pointer(&b[0:SizeofVfVlan][0])), + binary.BigEndian.Uint16(b[SizeofVfVlan:SizeofVfVlanInfo]), + } +} + +func (msg *VfVlanInfo) Serialize() []byte { + return (*(*[SizeofVfVlanInfo]byte)(unsafe.Pointer(msg)))[:] +} + // struct ifla_vf_tx_rate { // __u32 vf; // __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ @@ -326,6 +444,59 @@ func (msg *VfTxRate) Serialize() []byte { return (*(*[SizeofVfTxRate]byte)(unsafe.Pointer(msg)))[:] } +//struct ifla_vf_stats { +// __u64 rx_packets; +// __u64 tx_packets; +// __u64 rx_bytes; +// __u64 tx_bytes; +// __u64 broadcast; +// __u64 multicast; +//}; + +type VfStats struct { + RxPackets uint64 + TxPackets uint64 + RxBytes uint64 + TxBytes uint64 + Multicast uint64 + Broadcast uint64 + RxDropped uint64 + TxDropped uint64 +} + +func DeserializeVfStats(b []byte) VfStats { + var vfstat VfStats + stats, err := ParseRouteAttr(b) + if err != nil { + return vfstat + } + var valueVar uint64 + for _, stat := range stats { + if err := binary.Read(bytes.NewBuffer(stat.Value), NativeEndian(), &valueVar); err != nil { + break + } + switch stat.Attr.Type { + case IFLA_VF_STATS_RX_PACKETS: + vfstat.RxPackets = valueVar + case IFLA_VF_STATS_TX_PACKETS: + vfstat.TxPackets = valueVar + case IFLA_VF_STATS_RX_BYTES: + vfstat.RxBytes = valueVar + case IFLA_VF_STATS_TX_BYTES: + vfstat.TxBytes = valueVar + case IFLA_VF_STATS_MULTICAST: + vfstat.Multicast = valueVar + case IFLA_VF_STATS_BROADCAST: + vfstat.Broadcast = valueVar + case IFLA_VF_STATS_RX_DROPPED: + vfstat.RxDropped = valueVar + case IFLA_VF_STATS_TX_DROPPED: + vfstat.TxDropped = valueVar + } + } + return vfstat +} + // struct ifla_vf_rate { // __u32 vf; // __u32 min_tx_rate; /* Min Bandwidth in Mbps */ @@ -478,6 +649,14 @@ const ( IFLA_XDP_MAX = IFLA_XDP_PROG_ID ) +// XDP program attach mode (used as dump value for IFLA_XDP_ATTACHED) +const ( + XDP_ATTACHED_NONE = iota + XDP_ATTACHED_DRV + XDP_ATTACHED_SKB + XDP_ATTACHED_HW +) + const ( IFLA_IPTUN_UNSPEC = iota IFLA_IPTUN_LINK @@ -608,3 +787,32 @@ const ( IFLA_IPOIB_UMCAST IFLA_IPOIB_MAX = IFLA_IPOIB_UMCAST ) + +const ( + IFLA_CAN_UNSPEC = iota + IFLA_CAN_BITTIMING + IFLA_CAN_BITTIMING_CONST + IFLA_CAN_CLOCK + IFLA_CAN_STATE + IFLA_CAN_CTRLMODE + IFLA_CAN_RESTART_MS + IFLA_CAN_RESTART + IFLA_CAN_BERR_COUNTER + IFLA_CAN_DATA_BITTIMING + IFLA_CAN_DATA_BITTIMING_CONST + IFLA_CAN_TERMINATION + IFLA_CAN_TERMINATION_CONST + IFLA_CAN_BITRATE_CONST + IFLA_CAN_DATA_BITRATE_CONST + IFLA_CAN_BITRATE_MAX + IFLA_CAN_MAX = IFLA_CAN_BITRATE_MAX +) + +const ( + IFLA_BAREUDP_UNSPEC = iota + IFLA_BAREUDP_PORT + IFLA_BAREUDP_ETHERTYPE + IFLA_BAREUDP_SRCPORT_MIN + IFLA_BAREUDP_MULTIPROTO_MODE + IFLA_BAREUDP_MAX = IFLA_BAREUDP_MULTIPROTO_MODE +) diff --git a/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go b/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go new file mode 100644 index 000000000..bafd593c4 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go @@ -0,0 +1,29 @@ +package nl + +const ( + LWT_BPF_PROG_UNSPEC = iota + LWT_BPF_PROG_FD + LWT_BPF_PROG_NAME + __LWT_BPF_PROG_MAX +) + +const ( + LWT_BPF_PROG_MAX = __LWT_BPF_PROG_MAX - 1 +) + +const ( + LWT_BPF_UNSPEC = iota + LWT_BPF_IN + LWT_BPF_OUT + LWT_BPF_XMIT + LWT_BPF_XMIT_HEADROOM + __LWT_BPF_MAX +) + +const ( + LWT_BPF_MAX = __LWT_BPF_MAX - 1 +) + +const ( + LWT_BPF_MAX_HEADROOM = 256 +) diff --git a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go index aaf56c671..6cecc4517 100644 --- a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go @@ -6,6 +6,7 @@ import ( "encoding/binary" "fmt" "net" + "os" "runtime" "sync" "sync/atomic" @@ -27,7 +28,8 @@ const ( // tc rules or filters, or other more memory requiring data. RECEIVE_BUFFER_SIZE = 65536 // Kernel netlink pid - PidKernel uint32 = 0 + PidKernel uint32 = 0 + SizeofCnMsgOp = 0x18 ) // SupportedNlFamilies contains the list of netlink families this netlink package supports @@ -35,6 +37,12 @@ var SupportedNlFamilies = []int{unix.NETLINK_ROUTE, unix.NETLINK_XFRM, unix.NETL var nextSeqNr uint32 +// Default netlink socket timeout, 60s +var SocketTimeoutTv = unix.Timeval{Sec: 60, Usec: 0} + +// ErrorMessageReporting is the default error message reporting configuration for the new netlink sockets +var EnableErrorMessageReporting bool = false + // GetIPFamily returns the family type of a net.IP. func GetIPFamily(ip net.IP) int { if len(ip) <= net.IPv4len { @@ -77,11 +85,69 @@ func Swap32(i uint32) uint32 { return (i&0xff000000)>>24 | (i&0xff0000)>>8 | (i&0xff00)<<8 | (i&0xff)<<24 } +const ( + NLMSGERR_ATTR_UNUSED = 0 + NLMSGERR_ATTR_MSG = 1 + NLMSGERR_ATTR_OFFS = 2 + NLMSGERR_ATTR_COOKIE = 3 + NLMSGERR_ATTR_POLICY = 4 +) + type NetlinkRequestData interface { Len() int Serialize() []byte } +const ( + PROC_CN_MCAST_LISTEN = 1 + PROC_CN_MCAST_IGNORE +) + +type CbID struct { + Idx uint32 + Val uint32 +} + +type CnMsg struct { + ID CbID + Seq uint32 + Ack uint32 + Length uint16 + Flags uint16 +} + +type CnMsgOp struct { + CnMsg + // here we differ from the C header + Op uint32 +} + +func NewCnMsg(idx, val, op uint32) *CnMsgOp { + var cm CnMsgOp + + cm.ID.Idx = idx + cm.ID.Val = val + + cm.Ack = 0 + cm.Seq = 1 + cm.Length = uint16(binary.Size(op)) + cm.Op = op + + return &cm +} + +func (msg *CnMsgOp) Serialize() []byte { + return (*(*[SizeofCnMsgOp]byte)(unsafe.Pointer(msg)))[:] +} + +func DeserializeCnMsgOp(b []byte) *CnMsgOp { + return (*CnMsgOp)(unsafe.Pointer(&b[0:SizeofCnMsgOp][0])) +} + +func (msg *CnMsgOp) Len() int { + return SizeofCnMsgOp +} + // IfInfomsg is related to links, but it is used for list requests as well type IfInfomsg struct { unix.IfInfomsg @@ -249,6 +315,12 @@ func (msg *IfInfomsg) EncapType() string { return fmt.Sprintf("unknown%d", msg.Type) } +// Round the length of a netlink message up to align it properly. +// Taken from syscall/netlink_linux.go by The Go Authors under BSD-style license. +func nlmAlignOf(msglen int) int { + return (msglen + syscall.NLMSG_ALIGNTO - 1) & ^(syscall.NLMSG_ALIGNTO - 1) +} + func rtaAlignOf(attrlen int) int { return (attrlen + unix.RTA_ALIGNTO - 1) & ^(unix.RTA_ALIGNTO - 1) } @@ -259,6 +331,42 @@ func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg { return msg } +type Uint32Bitfield struct { + Value uint32 + Selector uint32 +} + +func (a *Uint32Bitfield) Serialize() []byte { + return (*(*[SizeofUint32Bitfield]byte)(unsafe.Pointer(a)))[:] +} + +func DeserializeUint32Bitfield(data []byte) *Uint32Bitfield { + return (*Uint32Bitfield)(unsafe.Pointer(&data[0:SizeofUint32Bitfield][0])) +} + +type Uint32Attribute struct { + Type uint16 + Value uint32 +} + +func (a *Uint32Attribute) Serialize() []byte { + native := NativeEndian() + buf := make([]byte, rtaAlignOf(8)) + native.PutUint16(buf[0:2], 8) + native.PutUint16(buf[2:4], a.Type) + + if a.Type&NLA_F_NET_BYTEORDER != 0 { + binary.BigEndian.PutUint32(buf[4:], a.Value) + } else { + native.PutUint32(buf[4:], a.Value) + } + return buf +} + +func (a *Uint32Attribute) Len() int { + return 8 +} + // Extend RtAttr to handle data and children type RtAttr struct { unix.RtAttr @@ -381,10 +489,30 @@ func (req *NetlinkRequest) AddRawData(data []byte) { req.RawData = append(req.RawData, data...) } -// Execute the request against a the given sockType. +// Execute the request against the given sockType. // Returns a list of netlink messages in serialized format, optionally filtered // by resType. func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) { + var res [][]byte + err := req.ExecuteIter(sockType, resType, func(msg []byte) bool { + res = append(res, msg) + return true + }) + if err != nil { + return nil, err + } + return res, nil +} + +// ExecuteIter executes the request against the given sockType. +// Calls the provided callback func once for each netlink message. +// If the callback returns false, it is not called again, but +// the remaining messages are consumed/discarded. +// +// Thread safety: ExecuteIter holds a lock on the socket until +// it finishes iteration so the callback must not call back into +// the netlink API. +func (req *NetlinkRequest) ExecuteIter(sockType int, resType uint16, f func(msg []byte) bool) error { var ( s *NetlinkSocket err error @@ -401,8 +529,21 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro if s == nil { s, err = getNetlinkSocket(sockType) if err != nil { - return nil, err + return err } + + if err := s.SetSendTimeout(&SocketTimeoutTv); err != nil { + return err + } + if err := s.SetReceiveTimeout(&SocketTimeoutTv); err != nil { + return err + } + if EnableErrorMessageReporting { + if err := s.SetExtAck(true); err != nil { + return err + } + } + defer s.Close() } else { s.Lock() @@ -410,56 +551,94 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro } if err := s.Send(req); err != nil { - return nil, err + return err } pid, err := s.GetPid() if err != nil { - return nil, err + return err } - var res [][]byte - done: for { msgs, from, err := s.Receive() if err != nil { - return nil, err + return err } if from.Pid != PidKernel { - return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel) + return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel) } for _, m := range msgs { if m.Header.Seq != req.Seq { if sharedSocket { continue } - return nil, fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq) + return fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq) } if m.Header.Pid != pid { continue } - if m.Header.Type == unix.NLMSG_DONE { - break done + + if m.Header.Flags&unix.NLM_F_DUMP_INTR != 0 { + return syscall.Errno(unix.EINTR) } - if m.Header.Type == unix.NLMSG_ERROR { + + if m.Header.Type == unix.NLMSG_DONE || m.Header.Type == unix.NLMSG_ERROR { + // NLMSG_DONE might have no payload, if so assume no error. + if m.Header.Type == unix.NLMSG_DONE && len(m.Data) == 0 { + break done + } + native := NativeEndian() - error := int32(native.Uint32(m.Data[0:4])) - if error == 0 { + errno := int32(native.Uint32(m.Data[0:4])) + if errno == 0 { break done } - return nil, syscall.Errno(-error) + var err error + err = syscall.Errno(-errno) + + unreadData := m.Data[4:] + if m.Header.Flags&unix.NLM_F_ACK_TLVS != 0 && len(unreadData) > syscall.SizeofNlMsghdr { + // Skip the echoed request message. + echoReqH := (*syscall.NlMsghdr)(unsafe.Pointer(&unreadData[0])) + unreadData = unreadData[nlmAlignOf(int(echoReqH.Len)):] + + // Annotate `err` using nlmsgerr attributes. + for len(unreadData) >= syscall.SizeofRtAttr { + attr := (*syscall.RtAttr)(unsafe.Pointer(&unreadData[0])) + attrData := unreadData[syscall.SizeofRtAttr:attr.Len] + + switch attr.Type { + case NLMSGERR_ATTR_MSG: + err = fmt.Errorf("%w: %s", err, unix.ByteSliceToString(attrData)) + default: + // TODO: handle other NLMSGERR_ATTR types + } + + unreadData = unreadData[rtaAlignOf(int(attr.Len)):] + } + } + + return err } if resType != 0 && m.Header.Type != resType { continue } - res = append(res, m.Data) + if cont := f(m.Data); !cont { + // Drain the rest of the messages from the kernel but don't + // pass them to the iterator func. + f = dummyMsgIterFunc + } if m.Header.Flags&unix.NLM_F_MULTI == 0 { break done } } } - return res, nil + return nil +} + +func dummyMsgIterFunc(msg []byte) bool { + return true } // Create a new netlink request from proto and flags @@ -477,8 +656,9 @@ func NewNetlinkRequest(proto, flags int) *NetlinkRequest { } type NetlinkSocket struct { - fd int32 - lsa unix.SockaddrNetlink + fd int32 + file *os.File + lsa unix.SockaddrNetlink sync.Mutex } @@ -487,8 +667,13 @@ func getNetlinkSocket(protocol int) (*NetlinkSocket, error) { if err != nil { return nil, err } + err = unix.SetNonblock(fd, true) + if err != nil { + return nil, err + } s := &NetlinkSocket{ - fd: int32(fd), + fd: int32(fd), + file: os.NewFile(uintptr(fd), "netlink"), } s.lsa.Family = unix.AF_NETLINK if err := unix.Bind(fd, &s.lsa); err != nil { @@ -519,12 +704,14 @@ func GetNetlinkSocketAt(newNs, curNs netns.NsHandle, protocol int) (*NetlinkSock // In case of success, the caller is expected to execute the returned function // at the end of the code that needs to be executed in the network namespace. // Example: -// func jobAt(...) error { -// d, err := executeInNetns(...) -// if err != nil { return err} -// defer d() -// < code which needs to be executed in specific netns> -// } +// +// func jobAt(...) error { +// d, err := executeInNetns(...) +// if err != nil { return err} +// defer d() +// < code which needs to be executed in specific netns> +// } +// // TODO: his function probably belongs to netns pkg. func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) { var ( @@ -573,8 +760,13 @@ func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) { if err != nil { return nil, err } + err = unix.SetNonblock(fd, true) + if err != nil { + return nil, err + } s := &NetlinkSocket{ - fd: int32(fd), + fd: int32(fd), + file: os.NewFile(uintptr(fd), "netlink"), } s.lsa.Family = unix.AF_NETLINK @@ -603,33 +795,36 @@ func SubscribeAt(newNs, curNs netns.NsHandle, protocol int, groups ...uint) (*Ne } func (s *NetlinkSocket) Close() { - fd := int(atomic.SwapInt32(&s.fd, -1)) - unix.Close(fd) + s.file.Close() } func (s *NetlinkSocket) GetFd() int { - return int(atomic.LoadInt32(&s.fd)) + return int(s.fd) } func (s *NetlinkSocket) Send(request *NetlinkRequest) error { - fd := int(atomic.LoadInt32(&s.fd)) - if fd < 0 { - return fmt.Errorf("Send called on a closed socket") - } - if err := unix.Sendto(fd, request.Serialize(), 0, &s.lsa); err != nil { - return err - } - return nil + return unix.Sendto(int(s.fd), request.Serialize(), 0, &s.lsa) } func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetlink, error) { - fd := int(atomic.LoadInt32(&s.fd)) - if fd < 0 { - return nil, nil, fmt.Errorf("Receive called on a closed socket") + rawConn, err := s.file.SyscallConn() + if err != nil { + return nil, nil, err + } + var ( + fromAddr *unix.SockaddrNetlink + rb [RECEIVE_BUFFER_SIZE]byte + nr int + from unix.Sockaddr + innerErr error + ) + err = rawConn.Read(func(fd uintptr) (done bool) { + nr, from, innerErr = unix.Recvfrom(int(fd), rb[:], 0) + return innerErr != unix.EWOULDBLOCK + }) + if innerErr != nil { + err = innerErr } - var fromAddr *unix.SockaddrNetlink - var rb [RECEIVE_BUFFER_SIZE]byte - nr, from, err := unix.Recvfrom(fd, rb[:], 0) if err != nil { return nil, nil, err } @@ -640,8 +835,9 @@ func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetli if nr < unix.NLMSG_HDRLEN { return nil, nil, fmt.Errorf("Got short response from netlink") } - rb2 := make([]byte, nr) - copy(rb2, rb[:nr]) + msgLen := nlmAlignOf(nr) + rb2 := make([]byte, msgLen) + copy(rb2, rb[:msgLen]) nl, err := syscall.ParseNetlinkMessage(rb2) if err != nil { return nil, nil, err @@ -663,9 +859,27 @@ func (s *NetlinkSocket) SetReceiveTimeout(timeout *unix.Timeval) error { return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_RCVTIMEO, timeout) } +// SetReceiveBufferSize allows to set a receive buffer size on the socket +func (s *NetlinkSocket) SetReceiveBufferSize(size int, force bool) error { + opt := unix.SO_RCVBUF + if force { + opt = unix.SO_RCVBUFFORCE + } + return unix.SetsockoptInt(int(s.fd), unix.SOL_SOCKET, opt, size) +} + +// SetExtAck requests error messages to be reported on the socket +func (s *NetlinkSocket) SetExtAck(enable bool) error { + var enableN int + if enable { + enableN = 1 + } + + return unix.SetsockoptInt(int(s.fd), unix.SOL_NETLINK, unix.NETLINK_EXT_ACK, enableN) +} + func (s *NetlinkSocket) GetPid() (uint32, error) { - fd := int(atomic.LoadInt32(&s.fd)) - lsa, err := unix.Getsockname(fd) + lsa, err := unix.Getsockname(int(s.fd)) if err != nil { return 0, err } @@ -709,6 +923,12 @@ func Uint16Attr(v uint16) []byte { return bytes } +func BEUint16Attr(v uint16) []byte { + bytes := make([]byte, 2) + binary.BigEndian.PutUint16(bytes, v) + return bytes +} + func Uint32Attr(v uint32) []byte { native := NativeEndian() bytes := make([]byte, 4) @@ -716,6 +936,12 @@ func Uint32Attr(v uint32) []byte { return bytes } +func BEUint32Attr(v uint32) []byte { + bytes := make([]byte, 4) + binary.BigEndian.PutUint32(bytes, v) + return bytes +} + func Uint64Attr(v uint64) []byte { native := NativeEndian() bytes := make([]byte, 8) @@ -723,6 +949,12 @@ func Uint64Attr(v uint64) []byte { return bytes } +func BEUint64Attr(v uint64) []byte { + bytes := make([]byte, 8) + binary.BigEndian.PutUint64(bytes, v) + return bytes +} + func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) { var attrs []syscall.NetlinkRouteAttr for len(b) >= unix.SizeofRtAttr { @@ -737,6 +969,22 @@ func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) { return attrs, nil } +// ParseRouteAttrAsMap parses provided buffer that contains raw RtAttrs and returns a map of parsed +// atttributes indexed by attribute type or error if occured. +func ParseRouteAttrAsMap(b []byte) (map[uint16]syscall.NetlinkRouteAttr, error) { + attrMap := make(map[uint16]syscall.NetlinkRouteAttr) + + attrs, err := ParseRouteAttr(b) + if err != nil { + return nil, err + } + + for _, attr := range attrs { + attrMap[attr.Attr.Type] = attr + } + return attrMap, nil +} + func netlinkRouteAttrAndValue(b []byte) (*unix.RtAttr, []byte, int, error) { a := (*unix.RtAttr)(unsafe.Pointer(&b[0])) if int(a.Len) < unix.SizeofRtAttr || int(a.Len) > len(b) { diff --git a/vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go b/vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go new file mode 100644 index 000000000..7f49125cf --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go @@ -0,0 +1,79 @@ +package nl + +import ( + "encoding/binary" + "fmt" + "log" +) + +type Attribute struct { + Type uint16 + Value []byte +} + +func ParseAttributes(data []byte) <-chan Attribute { + native := NativeEndian() + result := make(chan Attribute) + + go func() { + i := 0 + for i+4 < len(data) { + length := int(native.Uint16(data[i : i+2])) + attrType := native.Uint16(data[i+2 : i+4]) + + if length < 4 { + log.Printf("attribute 0x%02x has invalid length of %d bytes", attrType, length) + break + } + + if len(data) < i+length { + log.Printf("attribute 0x%02x of length %d is truncated, only %d bytes remaining", attrType, length, len(data)-i) + break + } + + result <- Attribute{ + Type: attrType, + Value: data[i+4 : i+length], + } + i += rtaAlignOf(length) + } + close(result) + }() + + return result +} + +func PrintAttributes(data []byte) { + printAttributes(data, 0) +} + +func printAttributes(data []byte, level int) { + for attr := range ParseAttributes(data) { + for i := 0; i < level; i++ { + print("> ") + } + nested := attr.Type&NLA_F_NESTED != 0 + fmt.Printf("type=%d nested=%v len=%v %v\n", attr.Type&NLA_TYPE_MASK, nested, len(attr.Value), attr.Value) + if nested { + printAttributes(attr.Value, level+1) + } + } +} + +// Uint32 returns the uint32 value respecting the NET_BYTEORDER flag +func (attr *Attribute) Uint32() uint32 { + if attr.Type&NLA_F_NET_BYTEORDER != 0 { + return binary.BigEndian.Uint32(attr.Value) + } else { + return NativeEndian().Uint32(attr.Value) + } +} + +// Uint64 returns the uint64 value respecting the NET_BYTEORDER flag +func (attr *Attribute) Uint64() uint64 { + if attr.Type&NLA_F_NET_BYTEORDER != 0 { + return binary.BigEndian.Uint64(attr.Value) + } else { + return NativeEndian().Uint64(attr.Value) + } +} diff --git a/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go b/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go index 1224b747d..ce43ee155 100644 --- a/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go @@ -11,6 +11,8 @@ const ( const ( RDMA_NLDEV_CMD_GET = 1 RDMA_NLDEV_CMD_SET = 2 + RDMA_NLDEV_CMD_NEWLINK = 3 + RDMA_NLDEV_CMD_DELLINK = 4 RDMA_NLDEV_CMD_SYS_GET = 6 RDMA_NLDEV_CMD_SYS_SET = 7 ) @@ -30,6 +32,8 @@ const ( RDMA_NLDEV_ATTR_PORT_STATE = 12 RDMA_NLDEV_ATTR_PORT_PHYS_STATE = 13 RDMA_NLDEV_ATTR_DEV_NODE_TYPE = 14 + RDMA_NLDEV_ATTR_NDEV_NAME = 51 + RDMA_NLDEV_ATTR_LINK_TYPE = 65 RDMA_NLDEV_SYS_ATTR_NETNS_MODE = 66 RDMA_NLDEV_NET_NS_FD = 68 ) diff --git a/vendor/github.com/vishvananda/netlink/nl/route_linux.go b/vendor/github.com/vishvananda/netlink/nl/route_linux.go index 03c1900ff..c26f3bf91 100644 --- a/vendor/github.com/vishvananda/netlink/nl/route_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/route_linux.go @@ -48,7 +48,9 @@ type RtNexthop struct { } func DeserializeRtNexthop(b []byte) *RtNexthop { - return (*RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0])) + return &RtNexthop{ + RtNexthop: *((*unix.RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0]))), + } } func (msg *RtNexthop) Len() int { diff --git a/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go b/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go index 5774cbb15..fe88285f2 100644 --- a/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go @@ -23,7 +23,7 @@ func (s1 *IPv6SrHdr) Equal(s2 IPv6SrHdr) bool { return false } for i := range s1.Segments { - if s1.Segments[i].Equal(s2.Segments[i]) != true { + if !s1.Segments[i].Equal(s2.Segments[i]) { return false } } @@ -89,7 +89,7 @@ func DecodeSEG6Encap(buf []byte) (int, []net.IP, error) { } buf = buf[12:] if len(buf)%16 != 0 { - err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)\n", len(buf)) + err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)", len(buf)) return mode, nil, err } for len(buf) > 0 { diff --git a/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go b/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go index 150017726..8172b8471 100644 --- a/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go @@ -12,6 +12,7 @@ const ( SEG6_LOCAL_NH6 SEG6_LOCAL_IIF SEG6_LOCAL_OIF + SEG6_LOCAL_BPF __SEG6_LOCAL_MAX ) const ( @@ -34,6 +35,7 @@ const ( SEG6_LOCAL_ACTION_END_S // 12 SEG6_LOCAL_ACTION_END_AS // 13 SEG6_LOCAL_ACTION_END_AM // 14 + SEG6_LOCAL_ACTION_END_BPF // 15 __SEG6_LOCAL_ACTION_MAX ) const ( @@ -71,6 +73,8 @@ func SEG6LocalActionString(action int) string { return "End.AS" case SEG6_LOCAL_ACTION_END_AM: return "End.AM" + case SEG6_LOCAL_ACTION_END_BPF: + return "End.BPF" } return "unknown" } diff --git a/vendor/github.com/vishvananda/netlink/nl/syscall.go b/vendor/github.com/vishvananda/netlink/nl/syscall.go index f7f7f92e6..b5ba039ac 100644 --- a/vendor/github.com/vishvananda/netlink/nl/syscall.go +++ b/vendor/github.com/vishvananda/netlink/nl/syscall.go @@ -1,6 +1,6 @@ package nl -// syscall package lack of rule atributes type. +// syscall package lack of rule attributes type. // Thus there are defined below const ( FRA_UNSPEC = iota @@ -21,6 +21,13 @@ const ( FRA_TABLE /* Extended table id */ FRA_FWMASK /* mask for netfilter mark */ FRA_OIFNAME + FRA_PAD + FRA_L3MDEV /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE /* UID range */ + FRA_PROTOCOL /* Originator of the rule */ + FRA_IP_PROTO /* ip proto */ + FRA_SPORT_RANGE /* sport */ + FRA_DPORT_RANGE /* dport */ ) // ip rule netlink request types @@ -39,6 +46,7 @@ const ( // socket diags related const ( SOCK_DIAG_BY_FAMILY = 20 /* linux.sock_diag.h */ + SOCK_DESTROY = 21 TCPDIAG_NOCOOKIE = 0xFFFFFFFF /* TCPDIAG_NOCOOKIE in net/ipv4/tcp_diag.h*/ ) diff --git a/vendor/github.com/vishvananda/netlink/nl/tc_linux.go b/vendor/github.com/vishvananda/netlink/nl/tc_linux.go index 501f554b2..0720729a9 100644 --- a/vendor/github.com/vishvananda/netlink/nl/tc_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/tc_linux.go @@ -1,8 +1,13 @@ package nl import ( + "bytes" "encoding/binary" + "fmt" + "net" "unsafe" + + "golang.org/x/sys/unix" ) // LinkLayer @@ -42,7 +47,14 @@ const ( TCA_FCNT TCA_STATS2 TCA_STAB - TCA_MAX = TCA_STAB + TCA_PAD + TCA_DUMP_INVISIBLE + TCA_CHAIN + TCA_HW_OFFLOAD + TCA_INGRESS_BLOCK + TCA_EGRESS_BLOCK + TCA_DUMP_FLAGS + TCA_MAX = TCA_DUMP_FLAGS ) const ( @@ -56,6 +68,12 @@ const ( TCA_ACT_OPTIONS TCA_ACT_INDEX TCA_ACT_STATS + TCA_ACT_PAD + TCA_ACT_COOKIE + TCA_ACT_FLAGS + TCA_ACT_HW_STATS + TCA_ACT_USED_HW_STATS + TCA_ACT_IN_HW_COUNT TCA_ACT_MAX ) @@ -71,7 +89,11 @@ const ( TCA_STATS_RATE_EST TCA_STATS_QUEUE TCA_STATS_APP - TCA_STATS_MAX = TCA_STATS_APP + TCA_STATS_RATE_EST64 + TCA_STATS_PAD + TCA_STATS_BASIC_HW + TCA_STATS_PKT64 + TCA_STATS_MAX = TCA_STATS_PKT64 ) const ( @@ -83,17 +105,23 @@ const ( SizeofTcNetemCorr = 0x0c SizeofTcNetemReorder = 0x08 SizeofTcNetemCorrupt = 0x08 + SizeOfTcNetemRate = 0x10 SizeofTcTbfQopt = 2*SizeofTcRateSpec + 0x0c SizeofTcHtbCopt = 2*SizeofTcRateSpec + 0x14 SizeofTcHtbGlob = 0x14 SizeofTcU32Key = 0x10 SizeofTcU32Sel = 0x10 // without keys - SizeofTcGen = 0x14 + SizeofTcGen = 0x16 SizeofTcConnmark = SizeofTcGen + 0x04 + SizeofTcCsum = SizeofTcGen + 0x04 SizeofTcMirred = SizeofTcGen + 0x08 SizeofTcTunnelKey = SizeofTcGen + 0x04 SizeofTcSkbEdit = SizeofTcGen SizeofTcPolice = 2*SizeofTcRateSpec + 0x20 + SizeofTcSfqQopt = 0x0b + SizeofTcSfqRedStats = 0x18 + SizeofTcSfqQoptV1 = SizeofTcSfqQopt + SizeofTcSfqRedStats + 0x1c + SizeofUint32Bitfield = 0x8 ) // struct tcmsg { @@ -127,6 +155,18 @@ func (x *TcMsg) Serialize() []byte { return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:] } +type Tcf struct { + Install uint64 + LastUse uint64 + Expires uint64 + FirstUse uint64 +} + +func DeserializeTcf(b []byte) *Tcf { + const size = int(unsafe.Sizeof(Tcf{})) + return (*Tcf)(unsafe.Pointer(&b[0:size][0])) +} + // struct tcamsg { // unsigned char tca_family; // unsigned char tca__pad1; @@ -333,6 +373,26 @@ func (x *TcNetemCorrupt) Serialize() []byte { return (*(*[SizeofTcNetemCorrupt]byte)(unsafe.Pointer(x)))[:] } +// TcNetemRate is a struct that represents the rate of a netem qdisc +type TcNetemRate struct { + Rate uint32 + PacketOverhead int32 + CellSize uint32 + CellOverhead int32 +} + +func (msg *TcNetemRate) Len() int { + return SizeofTcRateSpec +} + +func DeserializeTcNetemRate(b []byte) *TcNetemRate { + return (*TcNetemRate)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0])) +} + +func (msg *TcNetemRate) Serialize() []byte { + return (*(*[SizeOfTcNetemRate]byte)(unsafe.Pointer(msg)))[:] +} + // struct tc_tbf_qopt { // struct tc_ratespec rate; // struct tc_ratespec peakrate; @@ -691,6 +751,36 @@ func (x *TcConnmark) Serialize() []byte { return (*(*[SizeofTcConnmark]byte)(unsafe.Pointer(x)))[:] } +const ( + TCA_CSUM_UNSPEC = iota + TCA_CSUM_PARMS + TCA_CSUM_TM + TCA_CSUM_PAD + TCA_CSUM_MAX = TCA_CSUM_PAD +) + +// struct tc_csum { +// tc_gen; +// __u32 update_flags; +// } + +type TcCsum struct { + TcGen + UpdateFlags uint32 +} + +func (msg *TcCsum) Len() int { + return SizeofTcCsum +} + +func DeserializeTcCsum(b []byte) *TcCsum { + return (*TcCsum)(unsafe.Pointer(&b[0:SizeofTcCsum][0])) +} + +func (x *TcCsum) Serialize() []byte { + return (*(*[SizeofTcCsum]byte)(unsafe.Pointer(x)))[:] +} + const ( TCA_ACT_MIRRED = 8 ) @@ -735,7 +825,13 @@ const ( TCA_TUNNEL_KEY_ENC_IPV6_SRC TCA_TUNNEL_KEY_ENC_IPV6_DST TCA_TUNNEL_KEY_ENC_KEY_ID - TCA_TUNNEL_KEY_MAX = TCA_TUNNEL_KEY_ENC_KEY_ID + TCA_TUNNEL_KEY_PAD + TCA_TUNNEL_KEY_ENC_DST_PORT + TCA_TUNNEL_KEY_NO_CSUM + TCA_TUNNEL_KEY_ENC_OPTS + TCA_TUNNEL_KEY_ENC_TOS + TCA_TUNNEL_KEY_ENC_TTL + TCA_TUNNEL_KEY_MAX ) type TcTunnelKey struct { @@ -764,7 +860,8 @@ const ( TCA_SKBEDIT_MARK TCA_SKBEDIT_PAD TCA_SKBEDIT_PTYPE - TCA_SKBEDIT_MAX = TCA_SKBEDIT_MARK + TCA_SKBEDIT_MASK + TCA_SKBEDIT_MAX ) type TcSkbEdit struct { @@ -851,6 +948,10 @@ const ( TCA_FQ_FLOW_REFILL_DELAY // flow credit refill delay in usec TCA_FQ_ORPHAN_MASK // mask applied to orphaned skb hashes TCA_FQ_LOW_RATE_THRESHOLD // per packet delay under this rate + TCA_FQ_CE_THRESHOLD // DCTCP-like CE-marking threshold + TCA_FQ_TIMER_SLACK // timer slack + TCA_FQ_HORIZON // time horizon in us + TCA_FQ_HORIZON_DROP // drop packets beyond horizon, or cap their EDT ) const ( @@ -872,3 +973,639 @@ const ( TCA_HFSC_FSC TCA_HFSC_USC ) + +const ( + TCA_FLOWER_UNSPEC = iota + TCA_FLOWER_CLASSID + TCA_FLOWER_INDEV + TCA_FLOWER_ACT + TCA_FLOWER_KEY_ETH_DST /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE /* be16 */ + TCA_FLOWER_KEY_IP_PROTO /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK /* be32 */ + TCA_FLOWER_KEY_IPV4_DST /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC /* be16 */ + TCA_FLOWER_KEY_TCP_DST /* be16 */ + TCA_FLOWER_KEY_UDP_SRC /* be16 */ + TCA_FLOWER_KEY_UDP_DST /* be16 */ + + TCA_FLOWER_FLAGS + TCA_FLOWER_KEY_VLAN_ID /* be16 */ + TCA_FLOWER_KEY_VLAN_PRIO /* u8 */ + TCA_FLOWER_KEY_VLAN_ETH_TYPE /* be16 */ + + TCA_FLOWER_KEY_ENC_KEY_ID /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK /* be32 */ + TCA_FLOWER_KEY_ENC_IPV6_SRC /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK /* struct in6_addr */ + + TCA_FLOWER_KEY_TCP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_TCP_DST_MASK /* be16 */ + TCA_FLOWER_KEY_UDP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_UDP_DST_MASK /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC /* be16 */ + TCA_FLOWER_KEY_SCTP_DST /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK /* be16 */ + + TCA_FLOWER_KEY_FLAGS /* be32 */ + TCA_FLOWER_KEY_FLAGS_MASK /* be32 */ + + TCA_FLOWER_KEY_ICMPV4_CODE /* u8 */ + TCA_FLOWER_KEY_ICMPV4_CODE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK /* u8 */ + + TCA_FLOWER_KEY_ARP_SIP /* be32 */ + TCA_FLOWER_KEY_ARP_SIP_MASK /* be32 */ + TCA_FLOWER_KEY_ARP_TIP /* be32 */ + TCA_FLOWER_KEY_ARP_TIP_MASK /* be32 */ + TCA_FLOWER_KEY_ARP_OP /* u8 */ + TCA_FLOWER_KEY_ARP_OP_MASK /* u8 */ + TCA_FLOWER_KEY_ARP_SHA /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_SHA_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA_MASK /* ETH_ALEN */ + + TCA_FLOWER_KEY_MPLS_TTL /* u8 - 8 bits */ + TCA_FLOWER_KEY_MPLS_BOS /* u8 - 1 bit */ + TCA_FLOWER_KEY_MPLS_TC /* u8 - 3 bits */ + TCA_FLOWER_KEY_MPLS_LABEL /* be32 - 20 bits */ + + TCA_FLOWER_KEY_TCP_FLAGS /* be16 */ + TCA_FLOWER_KEY_TCP_FLAGS_MASK /* be16 */ + + TCA_FLOWER_KEY_IP_TOS /* u8 */ + TCA_FLOWER_KEY_IP_TOS_MASK /* u8 */ + TCA_FLOWER_KEY_IP_TTL /* u8 */ + TCA_FLOWER_KEY_IP_TTL_MASK /* u8 */ + + TCA_FLOWER_KEY_CVLAN_ID /* be16 */ + TCA_FLOWER_KEY_CVLAN_PRIO /* u8 */ + TCA_FLOWER_KEY_CVLAN_ETH_TYPE /* be16 */ + + TCA_FLOWER_KEY_ENC_IP_TOS /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TOS_MASK /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL_MASK /* u8 */ + + TCA_FLOWER_KEY_ENC_OPTS + TCA_FLOWER_KEY_ENC_OPTS_MASK + + __TCA_FLOWER_MAX +) + +const TCA_CLS_FLAGS_SKIP_HW = 1 << 0 /* don't offload filter to HW */ +const TCA_CLS_FLAGS_SKIP_SW = 1 << 1 /* don't use filter in SW */ + +// struct tc_sfq_qopt { +// unsigned quantum; /* Bytes per round allocated to flow */ +// int perturb_period; /* Period of hash perturbation */ +// __u32 limit; /* Maximal packets in queue */ +// unsigned divisor; /* Hash divisor */ +// unsigned flows; /* Maximal number of flows */ +// }; + +type TcSfqQopt struct { + Quantum uint8 + Perturb int32 + Limit uint32 + Divisor uint8 + Flows uint8 +} + +func (x *TcSfqQopt) Len() int { + return SizeofTcSfqQopt +} + +func DeserializeTcSfqQopt(b []byte) *TcSfqQopt { + return (*TcSfqQopt)(unsafe.Pointer(&b[0:SizeofTcSfqQopt][0])) +} + +func (x *TcSfqQopt) Serialize() []byte { + return (*(*[SizeofTcSfqQopt]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_sfqred_stats { +// __u32 prob_drop; /* Early drops, below max threshold */ +// __u32 forced_drop; /* Early drops, after max threshold */ +// __u32 prob_mark; /* Marked packets, below max threshold */ +// __u32 forced_mark; /* Marked packets, after max threshold */ +// __u32 prob_mark_head; /* Marked packets, below max threshold */ +// __u32 forced_mark_head;/* Marked packets, after max threshold */ +// }; +type TcSfqRedStats struct { + ProbDrop uint32 + ForcedDrop uint32 + ProbMark uint32 + ForcedMark uint32 + ProbMarkHead uint32 + ForcedMarkHead uint32 +} + +func (x *TcSfqRedStats) Len() int { + return SizeofTcSfqRedStats +} + +func DeserializeTcSfqRedStats(b []byte) *TcSfqRedStats { + return (*TcSfqRedStats)(unsafe.Pointer(&b[0:SizeofTcSfqRedStats][0])) +} + +func (x *TcSfqRedStats) Serialize() []byte { + return (*(*[SizeofTcSfqRedStats]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_sfq_qopt_v1 { +// struct tc_sfq_qopt v0; +// unsigned int depth; /* max number of packets per flow */ +// unsigned int headdrop; +// +// /* SFQRED parameters */ +// +// __u32 limit; /* HARD maximal flow queue length (bytes) */ +// __u32 qth_min; /* Min average length threshold (bytes) */ +// __u32 qth_max; /* Max average length threshold (bytes) */ +// unsigned char Wlog; /* log(W) */ +// unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ +// unsigned char Scell_log; /* cell size for idle damping */ +// unsigned char flags; +// __u32 max_P; /* probability, high resolution */ +// +// /* SFQRED stats */ +// +// struct tc_sfqred_stats stats; +// }; +type TcSfqQoptV1 struct { + TcSfqQopt + Depth uint32 + HeadDrop uint32 + Limit uint32 + QthMin uint32 + QthMax uint32 + Wlog byte + Plog byte + ScellLog byte + Flags byte + MaxP uint32 + TcSfqRedStats +} + +func (x *TcSfqQoptV1) Len() int { + return SizeofTcSfqQoptV1 +} + +func DeserializeTcSfqQoptV1(b []byte) *TcSfqQoptV1 { + return (*TcSfqQoptV1)(unsafe.Pointer(&b[0:SizeofTcSfqQoptV1][0])) +} + +func (x *TcSfqQoptV1) Serialize() []byte { + return (*(*[SizeofTcSfqQoptV1]byte)(unsafe.Pointer(x)))[:] +} + +// IPProto represents Flower ip_proto attribute +type IPProto uint8 + +const ( + IPPROTO_TCP IPProto = unix.IPPROTO_TCP + IPPROTO_UDP IPProto = unix.IPPROTO_UDP + IPPROTO_SCTP IPProto = unix.IPPROTO_SCTP + IPPROTO_ICMP IPProto = unix.IPPROTO_ICMP + IPPROTO_ICMPV6 IPProto = unix.IPPROTO_ICMPV6 +) + +func (i IPProto) Serialize() []byte { + arr := make([]byte, 1) + arr[0] = byte(i) + return arr +} + +func (i IPProto) String() string { + switch i { + case IPPROTO_TCP: + return "tcp" + case IPPROTO_UDP: + return "udp" + case IPPROTO_SCTP: + return "sctp" + case IPPROTO_ICMP: + return "icmp" + case IPPROTO_ICMPV6: + return "icmpv6" + } + return fmt.Sprintf("%d", i) +} + +const ( + MaxOffs = 128 + SizeOfPeditSel = 24 + SizeOfPeditKey = 24 + + TCA_PEDIT_KEY_EX_HTYPE = 1 + TCA_PEDIT_KEY_EX_CMD = 2 +) + +const ( + TCA_PEDIT_UNSPEC = iota + TCA_PEDIT_TM + TCA_PEDIT_PARMS + TCA_PEDIT_PAD + TCA_PEDIT_PARMS_EX + TCA_PEDIT_KEYS_EX + TCA_PEDIT_KEY_EX +) + +// /* TCA_PEDIT_KEY_EX_HDR_TYPE_NETWROK is a special case for legacy users. It +// * means no specific header type - offset is relative to the network layer +// */ +type PeditHeaderType uint16 + +const ( + TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = iota + TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 + TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + TCA_PEDIT_KEY_EX_HDR_TYPE_TCP + TCA_PEDIT_KEY_EX_HDR_TYPE_UDP + __PEDIT_HDR_TYPE_MAX +) + +type PeditCmd uint16 + +const ( + TCA_PEDIT_KEY_EX_CMD_SET = 0 + TCA_PEDIT_KEY_EX_CMD_ADD = 1 +) + +type TcPeditSel struct { + TcGen + NKeys uint8 + Flags uint8 +} + +func DeserializeTcPeditKey(b []byte) *TcPeditKey { + return (*TcPeditKey)(unsafe.Pointer(&b[0:SizeOfPeditKey][0])) +} + +func DeserializeTcPedit(b []byte) (*TcPeditSel, []TcPeditKey) { + x := &TcPeditSel{} + copy((*(*[SizeOfPeditSel]byte)(unsafe.Pointer(x)))[:SizeOfPeditSel], b) + + var keys []TcPeditKey + + next := SizeOfPeditKey + var i uint8 + for i = 0; i < x.NKeys; i++ { + keys = append(keys, *DeserializeTcPeditKey(b[next:])) + next += SizeOfPeditKey + } + + return x, keys +} + +type TcPeditKey struct { + Mask uint32 + Val uint32 + Off uint32 + At uint32 + OffMask uint32 + Shift uint32 +} + +type TcPeditKeyEx struct { + HeaderType PeditHeaderType + Cmd PeditCmd +} + +type TcPedit struct { + Sel TcPeditSel + Keys []TcPeditKey + KeysEx []TcPeditKeyEx + Extend uint8 +} + +func (p *TcPedit) Encode(parent *RtAttr) { + parent.AddRtAttr(TCA_ACT_KIND, ZeroTerminated("pedit")) + actOpts := parent.AddRtAttr(TCA_ACT_OPTIONS, nil) + + bbuf := bytes.NewBuffer(make([]byte, 0, int(unsafe.Sizeof(p.Sel)+unsafe.Sizeof(p.Keys)))) + + bbuf.Write((*(*[SizeOfPeditSel]byte)(unsafe.Pointer(&p.Sel)))[:]) + + for i := uint8(0); i < p.Sel.NKeys; i++ { + bbuf.Write((*(*[SizeOfPeditKey]byte)(unsafe.Pointer(&p.Keys[i])))[:]) + } + actOpts.AddRtAttr(TCA_PEDIT_PARMS_EX, bbuf.Bytes()) + + exAttrs := actOpts.AddRtAttr(int(TCA_PEDIT_KEYS_EX|NLA_F_NESTED), nil) + for i := uint8(0); i < p.Sel.NKeys; i++ { + keyAttr := exAttrs.AddRtAttr(int(TCA_PEDIT_KEY_EX|NLA_F_NESTED), nil) + + htypeBuf := make([]byte, 2) + cmdBuf := make([]byte, 2) + + NativeEndian().PutUint16(htypeBuf, uint16(p.KeysEx[i].HeaderType)) + NativeEndian().PutUint16(cmdBuf, uint16(p.KeysEx[i].Cmd)) + + keyAttr.AddRtAttr(TCA_PEDIT_KEY_EX_HTYPE, htypeBuf) + keyAttr.AddRtAttr(TCA_PEDIT_KEY_EX_CMD, cmdBuf) + } +} + +func (p *TcPedit) SetEthDst(mac net.HardwareAddr) { + u32 := NativeEndian().Uint32(mac) + u16 := NativeEndian().Uint16(mac[4:]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ + + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = uint32(u16) + tKey.Mask = 0xffff0000 + tKey.Off = 4 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ +} + +func (p *TcPedit) SetEthSrc(mac net.HardwareAddr) { + u16 := NativeEndian().Uint16(mac) + u32 := NativeEndian().Uint32(mac[2:]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = uint32(u16) << 16 + tKey.Mask = 0x0000ffff + tKey.Off = 4 + + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ + + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Mask = 0 + tKey.Off = 8 + + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ +} + +func (p *TcPedit) SetIPv6Src(ip6 net.IP) { + u32 := NativeEndian().Uint32(ip6[:4]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 8 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[4:8]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 12 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[8:12]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 16 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[12:16]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 20 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ +} + +func (p *TcPedit) SetDstIP(ip net.IP) { + if ip.To4() != nil { + p.SetIPv4Dst(ip) + } else { + p.SetIPv6Dst(ip) + } +} + +func (p *TcPedit) SetSrcIP(ip net.IP) { + if ip.To4() != nil { + p.SetIPv4Src(ip) + } else { + p.SetIPv6Src(ip) + } +} + +func (p *TcPedit) SetIPv6Dst(ip6 net.IP) { + u32 := NativeEndian().Uint32(ip6[:4]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 24 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[4:8]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 28 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[8:12]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 32 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[12:16]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 36 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ +} + +func (p *TcPedit) SetIPv4Src(ip net.IP) { + u32 := NativeEndian().Uint32(ip[:4]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 12 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ +} + +func (p *TcPedit) SetIPv4Dst(ip net.IP) { + u32 := NativeEndian().Uint32(ip[:4]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 16 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ +} + +// SetDstPort only tcp and udp are supported to set port +func (p *TcPedit) SetDstPort(dstPort uint16, protocol uint8) { + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + switch protocol { + case unix.IPPROTO_TCP: + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP + case unix.IPPROTO_UDP: + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP + default: + return + } + + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + tKey.Val = uint32(Swap16(dstPort)) << 16 + tKey.Mask = 0x0000ffff + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ +} + +// SetSrcPort only tcp and udp are supported to set port +func (p *TcPedit) SetSrcPort(srcPort uint16, protocol uint8) { + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + switch protocol { + case unix.IPPROTO_TCP: + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP + case unix.IPPROTO_UDP: + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP + default: + return + } + + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + tKey.Val = uint32(Swap16(srcPort)) + tKey.Mask = 0xffff0000 + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ +} diff --git a/vendor/github.com/vishvananda/netlink/nl/vdpa_linux.go b/vendor/github.com/vishvananda/netlink/nl/vdpa_linux.go new file mode 100644 index 000000000..f209125df --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/vdpa_linux.go @@ -0,0 +1,41 @@ +package nl + +const ( + VDPA_GENL_NAME = "vdpa" + VDPA_GENL_VERSION = 0x1 +) + +const ( + VDPA_CMD_UNSPEC = iota + VDPA_CMD_MGMTDEV_NEW + VDPA_CMD_MGMTDEV_GET /* can dump */ + VDPA_CMD_DEV_NEW + VDPA_CMD_DEV_DEL + VDPA_CMD_DEV_GET /* can dump */ + VDPA_CMD_DEV_CONFIG_GET /* can dump */ + VDPA_CMD_DEV_VSTATS_GET +) + +const ( + VDPA_ATTR_UNSPEC = iota + VDPA_ATTR_MGMTDEV_BUS_NAME + VDPA_ATTR_MGMTDEV_DEV_NAME + VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES + VDPA_ATTR_DEV_NAME + VDPA_ATTR_DEV_ID + VDPA_ATTR_DEV_VENDOR_ID + VDPA_ATTR_DEV_MAX_VQS + VDPA_ATTR_DEV_MAX_VQ_SIZE + VDPA_ATTR_DEV_MIN_VQ_SIZE + VDPA_ATTR_DEV_NET_CFG_MACADDR + VDPA_ATTR_DEV_NET_STATUS + VDPA_ATTR_DEV_NET_CFG_MAX_VQP + VDPA_ATTR_DEV_NET_CFG_MTU + VDPA_ATTR_DEV_NEGOTIATED_FEATURES + VDPA_ATTR_DEV_MGMTDEV_MAX_VQS + VDPA_ATTR_DEV_SUPPORTED_FEATURES + VDPA_ATTR_DEV_QUEUE_INDEX + VDPA_ATTR_DEV_VENDOR_ATTR_NAME + VDPA_ATTR_DEV_VENDOR_ATTR_VALUE + VDPA_ATTR_DEV_FEATURES +) diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go index dce9073f7..cdb318ba5 100644 --- a/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go @@ -131,7 +131,15 @@ func (x *XfrmAddress) ToIP() net.IP { return ip } -func (x *XfrmAddress) ToIPNet(prefixlen uint8) *net.IPNet { +// family is only used when x and prefixlen are both 0 +func (x *XfrmAddress) ToIPNet(prefixlen uint8, family uint16) *net.IPNet { + empty := [SizeofXfrmAddress]byte{} + if bytes.Equal(x[:], empty[:]) && prefixlen == 0 { + if family == FAMILY_V6 { + return &net.IPNet{IP: net.ParseIP("::"), Mask: net.CIDRMask(int(prefixlen), 128)} + } + return &net.IPNet{IP: net.ParseIP("0.0.0.0"), Mask: net.CIDRMask(int(prefixlen), 32)} + } ip := x.ToIP() if GetIPFamily(ip) == FAMILY_V4 { return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 32)} diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go index b6290fd54..e8920b9a6 100644 --- a/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go @@ -13,8 +13,9 @@ const ( SizeofXfrmAlgoAuth = 0x48 SizeofXfrmAlgoAEAD = 0x48 SizeofXfrmEncapTmpl = 0x18 - SizeofXfrmUsersaFlush = 0x8 + SizeofXfrmUsersaFlush = 0x1 SizeofXfrmReplayStateEsn = 0x18 + SizeofXfrmReplayState = 0x0c ) const ( @@ -28,6 +29,11 @@ const ( XFRM_STATE_ESN = 128 ) +const ( + XFRM_SA_XFLAG_DONT_ENCAP_DSCP = 1 + XFRM_SA_XFLAG_OSEQ_MAY_WRAP = 2 +) + // struct xfrm_usersa_id { // xfrm_address_t daddr; // __be32 spi; @@ -103,6 +109,7 @@ func (msg *XfrmStats) Serialize() []byte { // }; // // #define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1 +// #define XFRM_SA_XFLAG_OSEQ_MAY_WRAP 2 // type XfrmUsersaInfo struct { @@ -332,3 +339,23 @@ func (msg *XfrmReplayStateEsn) Serialize() []byte { // We deliberately do not pass Bmp, as it gets set by the kernel. return (*(*[SizeofXfrmReplayStateEsn]byte)(unsafe.Pointer(msg)))[:] } + +// struct xfrm_replay_state { +// __u32 oseq; +// __u32 seq; +// __u32 bitmap; +// }; + +type XfrmReplayState struct { + OSeq uint32 + Seq uint32 + BitMap uint32 +} + +func DeserializeXfrmReplayState(b []byte) *XfrmReplayState { + return (*XfrmReplayState)(unsafe.Pointer(&b[0:SizeofXfrmReplayState][0])) +} + +func (msg *XfrmReplayState) Serialize() []byte { + return (*(*[SizeofXfrmReplayState]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/proc_event_linux.go b/vendor/github.com/vishvananda/netlink/proc_event_linux.go new file mode 100644 index 000000000..ac8762bd8 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/proc_event_linux.go @@ -0,0 +1,208 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "fmt" + "os" + "syscall" + + "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" +) + +const CN_IDX_PROC = 0x1 + +const ( + PROC_EVENT_NONE = 0x00000000 + PROC_EVENT_FORK = 0x00000001 + PROC_EVENT_EXEC = 0x00000002 + PROC_EVENT_UID = 0x00000004 + PROC_EVENT_GID = 0x00000040 + PROC_EVENT_SID = 0x00000080 + PROC_EVENT_PTRACE = 0x00000100 + PROC_EVENT_COMM = 0x00000200 + PROC_EVENT_COREDUMP = 0x40000000 + PROC_EVENT_EXIT = 0x80000000 +) + +const ( + CN_VAL_PROC = 0x1 + PROC_CN_MCAST_LISTEN = 0x1 +) + +type ProcEventMsg interface { + Pid() uint32 + Tgid() uint32 +} + +type ProcEventHeader struct { + What uint32 + CPU uint32 + Timestamp uint64 +} + +type ProcEvent struct { + ProcEventHeader + Msg ProcEventMsg +} + +func (pe *ProcEvent) setHeader(h ProcEventHeader) { + pe.What = h.What + pe.CPU = h.CPU + pe.Timestamp = h.Timestamp +} + +type ExitProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 + ExitCode uint32 + ExitSignal uint32 + ParentPid uint32 + ParentTgid uint32 +} + +func (e *ExitProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *ExitProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +type ExecProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 +} + +func (e *ExecProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *ExecProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +type ForkProcEvent struct { + ParentPid uint32 + ParentTgid uint32 + ChildPid uint32 + ChildTgid uint32 +} + +func (e *ForkProcEvent) Pid() uint32 { + return e.ParentPid +} + +func (e *ForkProcEvent) Tgid() uint32 { + return e.ParentTgid +} + +type CommProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 + Comm [16]byte +} + +func (e *CommProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *CommProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +func ProcEventMonitor(ch chan<- ProcEvent, done <-chan struct{}, errorChan chan<- error) error { + h, err := NewHandle() + if err != nil { + return err + } + defer h.Delete() + + s, err := nl.SubscribeAt(netns.None(), netns.None(), unix.NETLINK_CONNECTOR, CN_IDX_PROC) + if err != nil { + return err + } + + var nlmsg nl.NetlinkRequest + + nlmsg.Pid = uint32(os.Getpid()) + nlmsg.Type = unix.NLMSG_DONE + nlmsg.Len = uint32(unix.SizeofNlMsghdr) + + cm := nl.NewCnMsg(CN_IDX_PROC, CN_VAL_PROC, PROC_CN_MCAST_LISTEN) + nlmsg.AddData(cm) + + s.Send(&nlmsg) + + if done != nil { + go func() { + <-done + s.Close() + }() + } + + go func() { + defer close(ch) + for { + msgs, from, err := s.Receive() + if err != nil { + errorChan <- err + return + } + if from.Pid != nl.PidKernel { + errorChan <- fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + return + } + + for _, m := range msgs { + e := parseNetlinkMessage(m) + if e != nil { + ch <- *e + } + } + + } + }() + + return nil +} + +func parseNetlinkMessage(m syscall.NetlinkMessage) *ProcEvent { + if m.Header.Type == unix.NLMSG_DONE { + buf := bytes.NewBuffer(m.Data) + msg := &nl.CnMsg{} + hdr := &ProcEventHeader{} + binary.Read(buf, nl.NativeEndian(), msg) + binary.Read(buf, nl.NativeEndian(), hdr) + + pe := &ProcEvent{} + pe.setHeader(*hdr) + switch hdr.What { + case PROC_EVENT_EXIT: + event := &ExitProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_FORK: + event := &ForkProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_EXEC: + event := &ExecProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_COMM: + event := &CommProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + } + return nil + } + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/protinfo.go b/vendor/github.com/vishvananda/netlink/protinfo.go index 60b23b374..0163cba3a 100644 --- a/vendor/github.com/vishvananda/netlink/protinfo.go +++ b/vendor/github.com/vishvananda/netlink/protinfo.go @@ -6,14 +6,16 @@ import ( // Protinfo represents bridge flags from netlink. type Protinfo struct { - Hairpin bool - Guard bool - FastLeave bool - RootBlock bool - Learning bool - Flood bool - ProxyArp bool - ProxyArpWiFi bool + Hairpin bool + Guard bool + FastLeave bool + RootBlock bool + Learning bool + Flood bool + ProxyArp bool + ProxyArpWiFi bool + Isolated bool + NeighSuppress bool } // String returns a list of enabled flags @@ -47,6 +49,12 @@ func (prot *Protinfo) String() string { if prot.ProxyArpWiFi { boolStrings = append(boolStrings, "ProxyArpWiFi") } + if prot.Isolated { + boolStrings = append(boolStrings, "Isolated") + } + if prot.NeighSuppress { + boolStrings = append(boolStrings, "NeighSuppress") + } return strings.Join(boolStrings, " ") } diff --git a/vendor/github.com/vishvananda/netlink/protinfo_linux.go b/vendor/github.com/vishvananda/netlink/protinfo_linux.go index 15b65123c..1ba25d3cd 100644 --- a/vendor/github.com/vishvananda/netlink/protinfo_linux.go +++ b/vendor/github.com/vishvananda/netlink/protinfo_linux.go @@ -68,6 +68,10 @@ func parseProtinfo(infos []syscall.NetlinkRouteAttr) (pi Protinfo) { pi.ProxyArp = byteToBool(info.Value[0]) case nl.IFLA_BRPORT_PROXYARP_WIFI: pi.ProxyArpWiFi = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_ISOLATED: + pi.Isolated = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_NEIGH_SUPPRESS: + pi.NeighSuppress = byteToBool(info.Value[0]) } } return diff --git a/vendor/github.com/vishvananda/netlink/qdisc.go b/vendor/github.com/vishvananda/netlink/qdisc.go index af78305ac..067743d39 100644 --- a/vendor/github.com/vishvananda/netlink/qdisc.go +++ b/vendor/github.com/vishvananda/netlink/qdisc.go @@ -17,19 +17,29 @@ const ( HANDLE_MIN_EGRESS = 0xFFFFFFF3 ) +const ( + HORIZON_DROP_POLICY_CAP = 0 + HORIZON_DROP_POLICY_DROP = 1 + HORIZON_DROP_POLICY_DEFAULT = 255 +) + type Qdisc interface { Attrs() *QdiscAttrs Type() string } +type QdiscStatistics ClassStatistics + // QdiscAttrs represents a netlink qdisc. A qdisc is associated with a link, // has a handle, a parent and a refcnt. The root qdisc of a device should // have parent == HANDLE_ROOT. type QdiscAttrs struct { - LinkIndex int - Handle uint32 - Parent uint32 - Refcnt uint32 // read only + LinkIndex int + Handle uint32 + Parent uint32 + Refcnt uint32 // read only + IngressBlock *uint32 + Statistics *QdiscStatistics } func (q QdiscAttrs) String() string { @@ -113,6 +123,7 @@ type Htb struct { Defcls uint32 Debug uint32 DirectPkts uint32 + DirectQlen *uint32 } func NewHtb(attrs QdiscAttrs) *Htb { @@ -123,6 +134,7 @@ func NewHtb(attrs QdiscAttrs) *Htb { Rate2Quantum: 10, Debug: 0, DirectPkts: 0, + DirectQlen: nil, } } @@ -150,6 +162,7 @@ type NetemQdiscAttrs struct { ReorderCorr float32 // in % CorruptProb float32 // in % CorruptCorr float32 // in % + Rate64 uint64 } func (q NetemQdiscAttrs) String() string { @@ -174,6 +187,7 @@ type Netem struct { ReorderCorr uint32 CorruptProb uint32 CorruptCorr uint32 + Rate64 uint64 } func (netem *Netem) String() string { @@ -210,6 +224,19 @@ func (qdisc *Tbf) Type() string { return "tbf" } +// Clsact is a qdisc for adding filters +type Clsact struct { + QdiscAttrs +} + +func (qdisc *Clsact) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Clsact) Type() string { + return "clsact" +} + // Ingress is a qdisc for adding ingress filters type Ingress struct { QdiscAttrs @@ -278,22 +305,25 @@ type Fq struct { FlowDefaultRate uint32 FlowMaxRate uint32 // called BucketsLog under the hood - Buckets uint32 - FlowRefillDelay uint32 - LowRateThreshold uint32 + Buckets uint32 + FlowRefillDelay uint32 + LowRateThreshold uint32 + Horizon uint32 + HorizonDropPolicy uint8 } func (fq *Fq) String() string { return fmt.Sprintf( - "{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v, LowRateThreshold: %v}", - fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold, + "{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v, LowRateThreshold: %v, Horizon: %v, HorizonDropPolicy: %v}", + fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold, fq.Horizon, fq.HorizonDropPolicy, ) } func NewFq(attrs QdiscAttrs) *Fq { return &Fq{ - QdiscAttrs: attrs, - Pacing: 1, + QdiscAttrs: attrs, + Pacing: 1, + HorizonDropPolicy: HORIZON_DROP_POLICY_DEFAULT, } } @@ -308,13 +338,15 @@ func (qdisc *Fq) Type() string { // FQ_Codel (Fair Queuing Controlled Delay) is queuing discipline that combines Fair Queuing with the CoDel AQM scheme. type FqCodel struct { QdiscAttrs - Target uint32 - Limit uint32 - Interval uint32 - ECN uint32 - Flows uint32 - Quantum uint32 - // There are some more attributes here, but support for them seems not ubiquitous + Target uint32 + Limit uint32 + Interval uint32 + ECN uint32 + Flows uint32 + Quantum uint32 + CEThreshold uint32 + DropBatchSize uint32 + MemoryLimit uint32 } func (fqcodel *FqCodel) String() string { @@ -338,3 +370,27 @@ func (qdisc *FqCodel) Attrs() *QdiscAttrs { func (qdisc *FqCodel) Type() string { return "fq_codel" } + +type Sfq struct { + QdiscAttrs + // TODO: Only the simplified options for SFQ are handled here. Support for the extended one can be added later. + Quantum uint8 + Perturb uint8 + Limit uint32 + Divisor uint8 +} + +func (sfq *Sfq) String() string { + return fmt.Sprintf( + "{%v -- Quantum: %v, Perturb: %v, Limit: %v, Divisor: %v}", + sfq.Attrs(), sfq.Quantum, sfq.Perturb, sfq.Limit, sfq.Divisor, + ) +} + +func (qdisc *Sfq) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Sfq) Type() string { + return "sfq" +} diff --git a/vendor/github.com/vishvananda/netlink/qdisc_linux.go b/vendor/github.com/vishvananda/netlink/qdisc_linux.go index e9eee5908..e732ae3bd 100644 --- a/vendor/github.com/vishvananda/netlink/qdisc_linux.go +++ b/vendor/github.com/vishvananda/netlink/qdisc_linux.go @@ -5,6 +5,7 @@ import ( "io/ioutil" "strconv" "strings" + "sync" "syscall" "github.com/vishvananda/netlink/nl" @@ -17,6 +18,7 @@ func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem { var lossCorr, delayCorr, duplicateCorr uint32 var reorderProb, reorderCorr uint32 var corruptProb, corruptCorr uint32 + var rate64 uint64 latency := nattrs.Latency loss := Percentage2u32(nattrs.Loss) @@ -57,6 +59,7 @@ func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem { corruptProb = Percentage2u32(nattrs.CorruptProb) corruptCorr = Percentage2u32(nattrs.CorruptCorr) + rate64 = nattrs.Rate64 return &Netem{ QdiscAttrs: attrs, @@ -73,6 +76,7 @@ func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem { ReorderCorr: reorderCorr, CorruptProb: corruptProb, CorruptCorr: corruptCorr, + Rate64: rate64, } } @@ -159,6 +163,9 @@ func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error { func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type()))) + if qdisc.Attrs().IngressBlock != nil { + req.AddData(nl.NewRtAttr(nl.TCA_INGRESS_BLOCK, nl.Uint32Attr(*qdisc.Attrs().IngressBlock))) + } options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) @@ -194,7 +201,9 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { opt.Debug = qdisc.Debug opt.DirectPkts = qdisc.DirectPkts options.AddRtAttr(nl.TCA_HTB_INIT, opt.Serialize()) - // options.AddRtAttr(nl.TCA_HTB_DIRECT_QLEN, opt.Serialize()) + if qdisc.DirectQlen != nil { + options.AddRtAttr(nl.TCA_HTB_DIRECT_QLEN, nl.Uint32Attr(*qdisc.DirectQlen)) + } case *Hfsc: opt := nl.TcHfscOpt{} opt.Defcls = qdisc.Defcls @@ -231,6 +240,19 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { if reorder.Probability > 0 { options.AddRtAttr(nl.TCA_NETEM_REORDER, reorder.Serialize()) } + // Rate + if qdisc.Rate64 > 0 { + rate := nl.TcNetemRate{} + if qdisc.Rate64 >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_NETEM_RATE64, nl.Uint64Attr(qdisc.Rate64)) + rate.Rate = ^uint32(0) + } else { + rate.Rate = uint32(qdisc.Rate64) + } + options.AddRtAttr(nl.TCA_NETEM_RATE, rate.Serialize()) + } + case *Clsact: + options = nil case *Ingress: // ingress filters must use the proper handle if qdisc.Attrs().Parent != HANDLE_INGRESS { @@ -250,13 +272,24 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { if qdisc.Quantum > 0 { options.AddRtAttr(nl.TCA_FQ_CODEL_QUANTUM, nl.Uint32Attr((uint32(qdisc.Quantum)))) } - + if qdisc.CEThreshold > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_CE_THRESHOLD, nl.Uint32Attr(qdisc.CEThreshold)) + } + if qdisc.DropBatchSize > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_DROP_BATCH_SIZE, nl.Uint32Attr(qdisc.DropBatchSize)) + } + if qdisc.MemoryLimit > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_MEMORY_LIMIT, nl.Uint32Attr(qdisc.MemoryLimit)) + } case *Fq: options.AddRtAttr(nl.TCA_FQ_RATE_ENABLE, nl.Uint32Attr((uint32(qdisc.Pacing)))) if qdisc.Buckets > 0 { options.AddRtAttr(nl.TCA_FQ_BUCKETS_LOG, nl.Uint32Attr((uint32(qdisc.Buckets)))) } + if qdisc.PacketLimit > 0 { + options.AddRtAttr(nl.TCA_FQ_PLIMIT, nl.Uint32Attr((uint32(qdisc.PacketLimit)))) + } if qdisc.LowRateThreshold > 0 { options.AddRtAttr(nl.TCA_FQ_LOW_RATE_THRESHOLD, nl.Uint32Attr((uint32(qdisc.LowRateThreshold)))) } @@ -278,6 +311,20 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { if qdisc.FlowDefaultRate > 0 { options.AddRtAttr(nl.TCA_FQ_FLOW_DEFAULT_RATE, nl.Uint32Attr((uint32(qdisc.FlowDefaultRate)))) } + if qdisc.Horizon > 0 { + options.AddRtAttr(nl.TCA_FQ_HORIZON, nl.Uint32Attr(qdisc.Horizon)) + } + if qdisc.HorizonDropPolicy != HORIZON_DROP_POLICY_DEFAULT { + options.AddRtAttr(nl.TCA_FQ_HORIZON_DROP, nl.Uint8Attr(qdisc.HorizonDropPolicy)) + } + case *Sfq: + opt := nl.TcSfqQoptV1{} + opt.TcSfqQopt.Quantum = qdisc.Quantum + opt.TcSfqQopt.Perturb = int32(qdisc.Perturb) + opt.TcSfqQopt.Limit = qdisc.Limit + opt.TcSfqQopt.Divisor = qdisc.Divisor + + options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize()) default: options = nil } @@ -362,6 +409,10 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { qdisc = &FqCodel{} case "netem": qdisc = &Netem{} + case "sfq": + qdisc = &Sfq{} + case "clsact": + qdisc = &Clsact{} default: qdisc = &GenericQdisc{QdiscType: qdiscType} } @@ -417,9 +468,29 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { if err := parseNetemData(qdisc, attr.Value); err != nil { return nil, err } + case "sfq": + if err := parseSfqData(qdisc, attr.Value); err != nil { + return nil, err + } // no options for ingress } + case nl.TCA_INGRESS_BLOCK: + ingressBlock := new(uint32) + *ingressBlock = native.Uint32(attr.Value) + base.IngressBlock = ingressBlock + case nl.TCA_STATS: + s, err := parseTcStats(attr.Value) + if err != nil { + return nil, err + } + base.Statistics = (*QdiscStatistics)(s) + case nl.TCA_STATS2: + s, err := parseTcStats2(attr.Value) + if err != nil { + return nil, err + } + base.Statistics = (*QdiscStatistics)(s) } } *qdisc.Attrs() = base @@ -446,7 +517,6 @@ func parsePrioData(qdisc Qdisc, value []byte) error { } func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { - native = nl.NativeEndian() htb := qdisc.(*Htb) for _, datum := range data { switch datum.Attr.Type { @@ -458,15 +528,14 @@ func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { htb.Debug = opt.Debug htb.DirectPkts = opt.DirectPkts case nl.TCA_HTB_DIRECT_QLEN: - // TODO - //htb.DirectQlen = native.uint32(datum.Value) + directQlen := native.Uint32(datum.Value) + htb.DirectQlen = &directQlen } } return nil } func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { - native = nl.NativeEndian() fqCodel := qdisc.(*FqCodel) for _, datum := range data { @@ -483,6 +552,12 @@ func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { fqCodel.Flows = native.Uint32(datum.Value) case nl.TCA_FQ_CODEL_QUANTUM: fqCodel.Quantum = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_CE_THRESHOLD: + fqCodel.CEThreshold = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_DROP_BATCH_SIZE: + fqCodel.DropBatchSize = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_MEMORY_LIMIT: + fqCodel.MemoryLimit = native.Uint32(datum.Value) } } return nil @@ -490,13 +565,11 @@ func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { func parseHfscData(qdisc Qdisc, data []byte) error { Hfsc := qdisc.(*Hfsc) - native = nl.NativeEndian() Hfsc.Defcls = native.Uint16(data) return nil } func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { - native = nl.NativeEndian() fq := qdisc.(*Fq) for _, datum := range data { switch datum.Attr.Type { @@ -522,6 +595,11 @@ func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { fq.FlowMaxRate = native.Uint32(datum.Value) case nl.TCA_FQ_FLOW_DEFAULT_RATE: fq.FlowDefaultRate = native.Uint32(datum.Value) + case nl.TCA_FQ_HORIZON: + fq.Horizon = native.Uint32(datum.Value) + case nl.TCA_FQ_HORIZON_DROP: + fq.HorizonDropPolicy = datum.Value[0] + } } return nil @@ -540,6 +618,8 @@ func parseNetemData(qdisc Qdisc, value []byte) error { if err != nil { return err } + var rate *nl.TcNetemRate + var rate64 uint64 for _, datum := range data { switch datum.Attr.Type { case nl.TCA_NETEM_CORR: @@ -555,13 +635,23 @@ func parseNetemData(qdisc Qdisc, value []byte) error { opt := nl.DeserializeTcNetemReorder(datum.Value) netem.ReorderProb = opt.Probability netem.ReorderCorr = opt.Correlation + case nl.TCA_NETEM_RATE: + rate = nl.DeserializeTcNetemRate(datum.Value) + case nl.TCA_NETEM_RATE64: + rate64 = native.Uint64(datum.Value) } } + if rate != nil { + netem.Rate64 = uint64(rate.Rate) + if rate64 > 0 { + netem.Rate64 = rate64 + } + } + return nil } func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { - native = nl.NativeEndian() tbf := qdisc.(*Tbf) for _, datum := range data { switch datum.Attr.Type { @@ -582,6 +672,17 @@ func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { return nil } +func parseSfqData(qdisc Qdisc, value []byte) error { + sfq := qdisc.(*Sfq) + opt := nl.DeserializeTcSfqQoptV1(value) + sfq.Quantum = opt.TcSfqQopt.Quantum + sfq.Perturb = uint8(opt.TcSfqQopt.Perturb) + sfq.Limit = opt.TcSfqQopt.Limit + sfq.Divisor = opt.TcSfqQopt.Divisor + + return nil +} + const ( TIME_UNITS_PER_SEC = 1000000 ) @@ -590,6 +691,9 @@ var ( tickInUsec float64 clockFactor float64 hz float64 + + // Without this, the go race detector may report races. + initClockMutex sync.Mutex ) func initClock() { @@ -598,10 +702,10 @@ func initClock() { return } parts := strings.Split(strings.TrimSpace(string(data)), " ") - if len(parts) < 3 { + if len(parts) < 4 { return } - var vals [3]uint64 + var vals [4]uint64 for i := range vals { val, err := strconv.ParseUint(parts[i], 16, 32) if err != nil { @@ -615,10 +719,17 @@ func initClock() { } clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor - hz = float64(vals[0]) + if vals[2] == 1000000 { + // ref https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/lib/utils.c#n963 + hz = float64(vals[3]) + } else { + hz = 100 + } } func TickInUsec() float64 { + initClockMutex.Lock() + defer initClockMutex.Unlock() if tickInUsec == 0.0 { initClock() } @@ -626,6 +737,8 @@ func TickInUsec() float64 { } func ClockFactor() float64 { + initClockMutex.Lock() + defer initClockMutex.Unlock() if clockFactor == 0.0 { initClock() } @@ -633,6 +746,8 @@ func ClockFactor() float64 { } func Hz() float64 { + initClockMutex.Lock() + defer initClockMutex.Unlock() if hz == 0.0 { initClock() } @@ -663,6 +778,11 @@ func latency(rate uint64, limit, buffer uint32) float64 { return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer)) } -func Xmittime(rate uint64, size uint32) float64 { - return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate)) +func Xmittime(rate uint64, size uint32) uint32 { + // https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/tc/tc_core.c#n62 + return time2Tick(uint32(TIME_UNITS_PER_SEC * (float64(size) / float64(rate)))) +} + +func Xmitsize(rate uint64, ticks uint32) uint32 { + return uint32((float64(rate) * float64(tick2Time(ticks))) / TIME_UNITS_PER_SEC) } diff --git a/vendor/github.com/vishvananda/netlink/rdma_link_linux.go b/vendor/github.com/vishvananda/netlink/rdma_link_linux.go index 2d0bdc8c3..036399db6 100644 --- a/vendor/github.com/vishvananda/netlink/rdma_link_linux.go +++ b/vendor/github.com/vishvananda/netlink/rdma_link_linux.go @@ -77,28 +77,39 @@ func executeOneGetRdmaLink(data []byte) (*RdmaLink, error) { return &link, nil } -func execRdmaGetLink(req *nl.NetlinkRequest, name string) (*RdmaLink, error) { +func execRdmaSetLink(req *nl.NetlinkRequest) error { + + _, err := req.Execute(unix.NETLINK_RDMA, 0) + return err +} + +// RdmaLinkList gets a list of RDMA link devices. +// Equivalent to: `rdma dev show` +func RdmaLinkList() ([]*RdmaLink, error) { + return pkgHandle.RdmaLinkList() +} + +// RdmaLinkList gets a list of RDMA link devices. +// Equivalent to: `rdma dev show` +func (h *Handle) RdmaLinkList() ([]*RdmaLink, error) { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_GET) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_DUMP) msgs, err := req.Execute(unix.NETLINK_RDMA, 0) if err != nil { return nil, err } + + var res []*RdmaLink for _, m := range msgs { link, err := executeOneGetRdmaLink(m) if err != nil { return nil, err } - if link.Attrs.Name == name { - return link, nil - } + res = append(res, link) } - return nil, fmt.Errorf("Rdma device %v not found", name) -} - -func execRdmaSetLink(req *nl.NetlinkRequest) error { - _, err := req.Execute(unix.NETLINK_RDMA, 0) - return err + return res, nil } // RdmaLinkByName finds a link by name and returns a pointer to the object if @@ -110,11 +121,16 @@ func RdmaLinkByName(name string) (*RdmaLink, error) { // RdmaLinkByName finds a link by name and returns a pointer to the object if // found and nil error, otherwise returns error code. func (h *Handle) RdmaLinkByName(name string) (*RdmaLink, error) { - - proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_GET) - req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_DUMP) - - return execRdmaGetLink(req, name) + links, err := h.RdmaLinkList() + if err != nil { + return nil, err + } + for _, link := range links { + if link.Attrs.Name == name { + return link, nil + } + } + return nil, fmt.Errorf("Rdma device %v not found", name) } // RdmaLinkSetName sets the name of the rdma link device. Return nil on success @@ -262,3 +278,54 @@ func (h *Handle) RdmaLinkSetNsFd(link *RdmaLink, fd uint32) error { return execRdmaSetLink(req) } + +// RdmaLinkDel deletes an rdma link +// +// Similar to: rdma link delete NAME +// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html +func RdmaLinkDel(name string) error { + return pkgHandle.RdmaLinkDel(name) +} + +// RdmaLinkDel deletes an rdma link. +func (h *Handle) RdmaLinkDel(name string) error { + link, err := h.RdmaLinkByName(name) + if err != nil { + return err + } + + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_DELLINK) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + b := make([]byte, 4) + native.PutUint32(b, link.Attrs.Index) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b)) + + _, err = req.Execute(unix.NETLINK_RDMA, 0) + return err +} + +// RdmaLinkAdd adds an rdma link for the specified type to the network device. +// Similar to: rdma link add NAME type TYPE netdev NETDEV +// NAME - specifies the new name of the rdma link to add +// TYPE - specifies which rdma type to use. Link types: +// rxe - Soft RoCE driver +// siw - Soft iWARP driver +// NETDEV - specifies the network device to which the link is bound +// +// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html +func RdmaLinkAdd(linkName, linkType, netdev string) error { + return pkgHandle.RdmaLinkAdd(linkName, linkType, netdev) +} + +// RdmaLinkAdd adds an rdma link for the specified type to the network device. +func (h *Handle) RdmaLinkAdd(linkName string, linkType string, netdev string) error { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_NEWLINK) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_NAME, nl.ZeroTerminated(linkName))) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_LINK_TYPE, nl.ZeroTerminated(linkType))) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_NDEV_NAME, nl.ZeroTerminated(netdev))) + _, err := req.Execute(unix.NETLINK_RDMA, 0) + return err +} diff --git a/vendor/github.com/vishvananda/netlink/route.go b/vendor/github.com/vishvananda/netlink/route.go index 58ff1af60..1b4555d5c 100644 --- a/vendor/github.com/vishvananda/netlink/route.go +++ b/vendor/github.com/vishvananda/netlink/route.go @@ -11,6 +11,24 @@ type Scope uint8 type NextHopFlag int +const ( + RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota) + RT_FILTER_SCOPE + RT_FILTER_TYPE + RT_FILTER_TOS + RT_FILTER_IIF + RT_FILTER_OIF + RT_FILTER_DST + RT_FILTER_SRC + RT_FILTER_GW + RT_FILTER_TABLE + RT_FILTER_HOPLIMIT + RT_FILTER_PRIORITY + RT_FILTER_MARK + RT_FILTER_MASK + RT_FILTER_REALM +) + type Destination interface { Family() int Decode([]byte) error @@ -27,27 +45,46 @@ type Encap interface { Equal(Encap) bool } +//Protocol describe what was the originator of the route +type RouteProtocol int + // Route represents a netlink route. type Route struct { - LinkIndex int - ILinkIndex int - Scope Scope - Dst *net.IPNet - Src net.IP - Gw net.IP - MultiPath []*NexthopInfo - Protocol int - Priority int - Table int - Type int - Tos int - Flags int - MPLSDst *int - NewDst Destination - Encap Encap - MTU int - AdvMSS int - Hoplimit int + LinkIndex int + ILinkIndex int + Scope Scope + Dst *net.IPNet + Src net.IP + Gw net.IP + MultiPath []*NexthopInfo + Protocol RouteProtocol + Priority int + Family int + Table int + Type int + Tos int + Flags int + MPLSDst *int + NewDst Destination + Encap Encap + Via Destination + Realm int + MTU int + Window int + Rtt int + RttVar int + Ssthresh int + Cwnd int + AdvMSS int + Reordering int + Hoplimit int + InitCwnd int + Features int + RtoMin int + InitRwnd int + QuickACK int + Congctl string + FastOpenNoCookie int } func (r Route) String() string { @@ -66,6 +103,9 @@ func (r Route) String() string { if r.Encap != nil { elems = append(elems, fmt.Sprintf("Encap: %s", r.Encap)) } + if r.Via != nil { + elems = append(elems, fmt.Sprintf("Via: %s", r.Via)) + } elems = append(elems, fmt.Sprintf("Src: %s", r.Src)) if len(r.MultiPath) > 0 { elems = append(elems, fmt.Sprintf("Gw: %s", r.MultiPath)) @@ -74,6 +114,7 @@ func (r Route) String() string { } elems = append(elems, fmt.Sprintf("Flags: %s", r.ListFlags())) elems = append(elems, fmt.Sprintf("Table: %d", r.Table)) + elems = append(elems, fmt.Sprintf("Realm: %d", r.Realm)) return fmt.Sprintf("{%s}", strings.Join(elems, " ")) } @@ -87,6 +128,7 @@ func (r Route) Equal(x Route) bool { nexthopInfoSlice(r.MultiPath).Equal(x.MultiPath) && r.Protocol == x.Protocol && r.Priority == x.Priority && + r.Realm == x.Realm && r.Table == x.Table && r.Type == x.Type && r.Tos == x.Tos && @@ -94,6 +136,7 @@ func (r Route) Equal(x Route) bool { r.Flags == x.Flags && (r.MPLSDst == x.MPLSDst || (r.MPLSDst != nil && x.MPLSDst != nil && *r.MPLSDst == *x.MPLSDst)) && (r.NewDst == x.NewDst || (r.NewDst != nil && r.NewDst.Equal(x.NewDst))) && + (r.Via == x.Via || (r.Via != nil && r.Via.Equal(x.Via))) && (r.Encap == x.Encap || (r.Encap != nil && r.Encap.Equal(x.Encap))) } @@ -111,8 +154,15 @@ type flagString struct { } // RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE + +// NlFlags is only non-zero for RTM_NEWROUTE, the following flags can be set: +// - unix.NLM_F_REPLACE - Replace existing matching config object with this request +// - unix.NLM_F_EXCL - Don't replace the config object if it already exists +// - unix.NLM_F_CREATE - Create config object if it doesn't already exist +// - unix.NLM_F_APPEND - Add to the end of the object list type RouteUpdate struct { - Type uint16 + Type uint16 + NlFlags uint16 Route } @@ -123,6 +173,7 @@ type NexthopInfo struct { Flags int NewDst Destination Encap Encap + Via Destination } func (n *NexthopInfo) String() string { @@ -134,6 +185,9 @@ func (n *NexthopInfo) String() string { if n.Encap != nil { elems = append(elems, fmt.Sprintf("Encap: %s", n.Encap)) } + if n.Via != nil { + elems = append(elems, fmt.Sprintf("Via: %s", n.Via)) + } elems = append(elems, fmt.Sprintf("Weight: %d", n.Hops+1)) elems = append(elems, fmt.Sprintf("Gw: %s", n.Gw)) elems = append(elems, fmt.Sprintf("Flags: %s", n.ListFlags())) diff --git a/vendor/github.com/vishvananda/netlink/route_linux.go b/vendor/github.com/vishvananda/netlink/route_linux.go index c69c595ed..0cd4f8363 100644 --- a/vendor/github.com/vishvananda/netlink/route_linux.go +++ b/vendor/github.com/vishvananda/netlink/route_linux.go @@ -1,8 +1,11 @@ package netlink import ( + "bytes" + "encoding/binary" "fmt" "net" + "strconv" "strings" "syscall" @@ -21,19 +24,22 @@ const ( SCOPE_NOWHERE Scope = unix.RT_SCOPE_NOWHERE ) -const ( - RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota) - RT_FILTER_SCOPE - RT_FILTER_TYPE - RT_FILTER_TOS - RT_FILTER_IIF - RT_FILTER_OIF - RT_FILTER_DST - RT_FILTER_SRC - RT_FILTER_GW - RT_FILTER_TABLE - RT_FILTER_HOPLIMIT -) +func (s Scope) String() string { + switch s { + case SCOPE_UNIVERSE: + return "universe" + case SCOPE_SITE: + return "site" + case SCOPE_LINK: + return "link" + case SCOPE_HOST: + return "host" + case SCOPE_NOWHERE: + return "nowhere" + default: + return "unknown" + } +} const ( FLAG_ONLINK NextHopFlag = unix.RTNH_F_ONLINK @@ -128,7 +134,6 @@ func (e *MPLSEncap) Decode(buf []byte) error { if len(buf) < 4 { return fmt.Errorf("lack of bytes") } - native := nl.NativeEndian() l := native.Uint16(buf) if len(buf) < int(l) { return fmt.Errorf("lack of bytes") @@ -144,7 +149,6 @@ func (e *MPLSEncap) Decode(buf []byte) error { func (e *MPLSEncap) Encode() ([]byte, error) { s := nl.EncodeMPLSStack(e.Labels...) - native := nl.NativeEndian() hdr := make([]byte, 4) native.PutUint16(hdr, uint16(len(s)+4)) native.PutUint16(hdr[2:], nl.MPLS_IPTUNNEL_DST) @@ -200,7 +204,6 @@ func (e *SEG6Encap) Decode(buf []byte) error { if len(buf) < 4 { return fmt.Errorf("lack of bytes") } - native := nl.NativeEndian() // Get Length(l) & Type(typ) : 2 + 2 bytes l := native.Uint16(buf) if len(buf) < int(l) { @@ -220,7 +223,6 @@ func (e *SEG6Encap) Decode(buf []byte) error { } func (e *SEG6Encap) Encode() ([]byte, error) { s, err := nl.EncodeSEG6Encap(e.Mode, e.Segments) - native := nl.NativeEndian() hdr := make([]byte, 4) native.PutUint16(hdr, uint16(len(s)+4)) native.PutUint16(hdr[2:], nl.SEG6_IPTUNNEL_SRH) @@ -230,7 +232,7 @@ func (e *SEG6Encap) String() string { segs := make([]string, 0, len(e.Segments)) // append segment backwards (from n to 0) since seg#0 is the last segment. for i := len(e.Segments); i > 0; i-- { - segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1])) + segs = append(segs, e.Segments[i-1].String()) } str := fmt.Sprintf("mode %s segs %d [ %s ]", nl.SEG6EncapModeString(e.Mode), len(e.Segments), strings.Join(segs, " ")) @@ -271,6 +273,16 @@ type SEG6LocalEncap struct { In6Addr net.IP Iif int Oif int + bpf bpfObj +} + +func (e *SEG6LocalEncap) SetProg(progFd int, progName string) error { + if progFd <= 0 { + return fmt.Errorf("seg6local bpf SetProg: invalid fd") + } + e.bpf.progFd = progFd + e.bpf.progName = progName + return nil } func (e *SEG6LocalEncap) Type() int { @@ -281,7 +293,6 @@ func (e *SEG6LocalEncap) Decode(buf []byte) error { if err != nil { return err } - native := nl.NativeEndian() for _, attr := range attrs { switch attr.Attr.Type { case nl.SEG6_LOCAL_ACTION: @@ -305,13 +316,28 @@ func (e *SEG6LocalEncap) Decode(buf []byte) error { case nl.SEG6_LOCAL_OIF: e.Oif = int(native.Uint32(attr.Value[0:4])) e.Flags[nl.SEG6_LOCAL_OIF] = true + case nl.SEG6_LOCAL_BPF: + var bpfAttrs []syscall.NetlinkRouteAttr + bpfAttrs, err = nl.ParseRouteAttr(attr.Value) + bpfobj := bpfObj{} + for _, bpfAttr := range bpfAttrs { + switch bpfAttr.Attr.Type { + case nl.LWT_BPF_PROG_FD: + bpfobj.progFd = int(native.Uint32(bpfAttr.Value)) + case nl.LWT_BPF_PROG_NAME: + bpfobj.progName = string(bpfAttr.Value) + default: + err = fmt.Errorf("seg6local bpf decode: unknown attribute: Type %d", bpfAttr.Attr) + } + } + e.bpf = bpfobj + e.Flags[nl.SEG6_LOCAL_BPF] = true } } return err } func (e *SEG6LocalEncap) Encode() ([]byte, error) { var err error - native := nl.NativeEndian() res := make([]byte, 8) native.PutUint16(res, 8) // length native.PutUint16(res[2:], nl.SEG6_LOCAL_ACTION) @@ -367,6 +393,16 @@ func (e *SEG6LocalEncap) Encode() ([]byte, error) { native.PutUint32(attr[4:], uint32(e.Oif)) res = append(res, attr...) } + if e.Flags[nl.SEG6_LOCAL_BPF] { + attr := nl.NewRtAttr(nl.SEG6_LOCAL_BPF, []byte{}) + if e.bpf.progFd != 0 { + attr.AddRtAttr(nl.LWT_BPF_PROG_FD, nl.Uint32Attr(uint32(e.bpf.progFd))) + } + if e.bpf.progName != "" { + attr.AddRtAttr(nl.LWT_BPF_PROG_NAME, nl.ZeroTerminated(e.bpf.progName)) + } + res = append(res, attr.Serialize()...) + } return res, err } func (e *SEG6LocalEncap) String() string { @@ -400,12 +436,15 @@ func (e *SEG6LocalEncap) String() string { } if e.Flags[nl.SEG6_LOCAL_SRH] { segs := make([]string, 0, len(e.Segments)) - //append segment backwards (from n to 0) since seg#0 is the last segment. + // append segment backwards (from n to 0) since seg#0 is the last segment. for i := len(e.Segments); i > 0; i-- { - segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1])) + segs = append(segs, e.Segments[i-1].String()) } strs = append(strs, fmt.Sprintf("segs %d [ %s ]", len(e.Segments), strings.Join(segs, " "))) } + if e.Flags[nl.SEG6_LOCAL_BPF] { + strs = append(strs, fmt.Sprintf("bpf %s[%d]", e.bpf.progName, e.bpf.progFd)) + } return strings.Join(strs, " ") } func (e *SEG6LocalEncap) Equal(x Encap) bool { @@ -437,12 +476,316 @@ func (e *SEG6LocalEncap) Equal(x Encap) bool { if !e.InAddr.Equal(o.InAddr) || !e.In6Addr.Equal(o.In6Addr) { return false } - if e.Action != o.Action || e.Table != o.Table || e.Iif != o.Iif || e.Oif != o.Oif { + if e.Action != o.Action || e.Table != o.Table || e.Iif != o.Iif || e.Oif != o.Oif || e.bpf != o.bpf { + return false + } + return true +} + +// Encap BPF definitions +type bpfObj struct { + progFd int + progName string +} +type BpfEncap struct { + progs [nl.LWT_BPF_MAX]bpfObj + headroom int +} + +// SetProg adds a bpf function to the route via netlink RTA_ENCAP. The fd must be a bpf +// program loaded with bpf(type=BPF_PROG_TYPE_LWT_*) matching the direction the program should +// be applied to (LWT_BPF_IN, LWT_BPF_OUT, LWT_BPF_XMIT). +func (e *BpfEncap) SetProg(mode, progFd int, progName string) error { + if progFd <= 0 { + return fmt.Errorf("lwt bpf SetProg: invalid fd") + } + if mode <= nl.LWT_BPF_UNSPEC || mode >= nl.LWT_BPF_XMIT_HEADROOM { + return fmt.Errorf("lwt bpf SetProg:invalid mode") + } + e.progs[mode].progFd = progFd + e.progs[mode].progName = fmt.Sprintf("%s[fd:%d]", progName, progFd) + return nil +} + +// SetXmitHeadroom sets the xmit headroom (LWT_BPF_MAX_HEADROOM) via netlink RTA_ENCAP. +// maximum headroom is LWT_BPF_MAX_HEADROOM +func (e *BpfEncap) SetXmitHeadroom(headroom int) error { + if headroom > nl.LWT_BPF_MAX_HEADROOM || headroom < 0 { + return fmt.Errorf("invalid headroom size. range is 0 - %d", nl.LWT_BPF_MAX_HEADROOM) + } + e.headroom = headroom + return nil +} + +func (e *BpfEncap) Type() int { + return nl.LWTUNNEL_ENCAP_BPF +} +func (e *BpfEncap) Decode(buf []byte) error { + if len(buf) < 4 { + return fmt.Errorf("lwt bpf decode: lack of bytes") + } + native := nl.NativeEndian() + attrs, err := nl.ParseRouteAttr(buf) + if err != nil { + return fmt.Errorf("lwt bpf decode: failed parsing attribute. err: %v", err) + } + for _, attr := range attrs { + if int(attr.Attr.Type) < 1 { + // nl.LWT_BPF_UNSPEC + continue + } + if int(attr.Attr.Type) > nl.LWT_BPF_MAX { + return fmt.Errorf("lwt bpf decode: received unknown attribute type: %d", attr.Attr.Type) + } + switch int(attr.Attr.Type) { + case nl.LWT_BPF_MAX_HEADROOM: + e.headroom = int(native.Uint32(attr.Value)) + default: + bpfO := bpfObj{} + parsedAttrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return fmt.Errorf("lwt bpf decode: failed parsing route attribute") + } + for _, parsedAttr := range parsedAttrs { + switch int(parsedAttr.Attr.Type) { + case nl.LWT_BPF_PROG_FD: + bpfO.progFd = int(native.Uint32(parsedAttr.Value)) + case nl.LWT_BPF_PROG_NAME: + bpfO.progName = string(parsedAttr.Value) + default: + return fmt.Errorf("lwt bpf decode: received unknown attribute: type: %d, len: %d", parsedAttr.Attr.Type, parsedAttr.Attr.Len) + } + } + e.progs[attr.Attr.Type] = bpfO + } + } + return nil +} + +func (e *BpfEncap) Encode() ([]byte, error) { + buf := make([]byte, 0) + native = nl.NativeEndian() + for index, attr := range e.progs { + nlMsg := nl.NewRtAttr(index, []byte{}) + if attr.progFd != 0 { + nlMsg.AddRtAttr(nl.LWT_BPF_PROG_FD, nl.Uint32Attr(uint32(attr.progFd))) + } + if attr.progName != "" { + nlMsg.AddRtAttr(nl.LWT_BPF_PROG_NAME, nl.ZeroTerminated(attr.progName)) + } + if nlMsg.Len() > 4 { + buf = append(buf, nlMsg.Serialize()...) + } + } + if len(buf) <= 4 { + return nil, fmt.Errorf("lwt bpf encode: bpf obj definitions returned empty buffer") + } + if e.headroom > 0 { + hRoom := nl.NewRtAttr(nl.LWT_BPF_XMIT_HEADROOM, nl.Uint32Attr(uint32(e.headroom))) + buf = append(buf, hRoom.Serialize()...) + } + return buf, nil +} + +func (e *BpfEncap) String() string { + progs := make([]string, 0) + for index, obj := range e.progs { + empty := bpfObj{} + switch index { + case nl.LWT_BPF_IN: + if obj != empty { + progs = append(progs, fmt.Sprintf("in: %s", obj.progName)) + } + case nl.LWT_BPF_OUT: + if obj != empty { + progs = append(progs, fmt.Sprintf("out: %s", obj.progName)) + } + case nl.LWT_BPF_XMIT: + if obj != empty { + progs = append(progs, fmt.Sprintf("xmit: %s", obj.progName)) + } + } + } + if e.headroom > 0 { + progs = append(progs, fmt.Sprintf("xmit headroom: %d", e.headroom)) + } + return strings.Join(progs, " ") +} + +func (e *BpfEncap) Equal(x Encap) bool { + o, ok := x.(*BpfEncap) + if !ok { + return false + } + if e.headroom != o.headroom { + return false + } + for i := range o.progs { + if o.progs[i] != e.progs[i] { + return false + } + } + return true +} + +// IP6tnlEncap definition +type IP6tnlEncap struct { + ID uint64 + Dst net.IP + Src net.IP + Hoplimit uint8 + TC uint8 + Flags uint16 +} + +func (e *IP6tnlEncap) Type() int { + return nl.LWTUNNEL_ENCAP_IP6 +} + +func (e *IP6tnlEncap) Decode(buf []byte) error { + attrs, err := nl.ParseRouteAttr(buf) + if err != nil { + return err + } + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.LWTUNNEL_IP6_ID: + e.ID = uint64(native.Uint64(attr.Value[0:4])) + case nl.LWTUNNEL_IP6_DST: + e.Dst = net.IP(attr.Value[:]) + case nl.LWTUNNEL_IP6_SRC: + e.Src = net.IP(attr.Value[:]) + case nl.LWTUNNEL_IP6_HOPLIMIT: + e.Hoplimit = attr.Value[0] + case nl.LWTUNNEL_IP6_TC: + // e.TC = attr.Value[0] + err = fmt.Errorf("decoding TC in IP6tnlEncap is not supported") + case nl.LWTUNNEL_IP6_FLAGS: + // e.Flags = uint16(native.Uint16(attr.Value[0:2])) + err = fmt.Errorf("decoding FLAG in IP6tnlEncap is not supported") + case nl.LWTUNNEL_IP6_PAD: + err = fmt.Errorf("decoding PAD in IP6tnlEncap is not supported") + case nl.LWTUNNEL_IP6_OPTS: + err = fmt.Errorf("decoding OPTS in IP6tnlEncap is not supported") + } + } + return err +} + +func (e *IP6tnlEncap) Encode() ([]byte, error) { + + final := []byte{} + + resID := make([]byte, 12) + native.PutUint16(resID, 12) // 2+2+8 + native.PutUint16(resID[2:], nl.LWTUNNEL_IP6_ID) + native.PutUint64(resID[4:], 0) + final = append(final, resID...) + + resDst := make([]byte, 4) + native.PutUint16(resDst, 20) // 2+2+16 + native.PutUint16(resDst[2:], nl.LWTUNNEL_IP6_DST) + resDst = append(resDst, e.Dst...) + final = append(final, resDst...) + + resSrc := make([]byte, 4) + native.PutUint16(resSrc, 20) + native.PutUint16(resSrc[2:], nl.LWTUNNEL_IP6_SRC) + resSrc = append(resSrc, e.Src...) + final = append(final, resSrc...) + + // resTc := make([]byte, 5) + // native.PutUint16(resTc, 5) + // native.PutUint16(resTc[2:], nl.LWTUNNEL_IP6_TC) + // resTc[4] = e.TC + // final = append(final,resTc...) + + resHops := make([]byte, 5) + native.PutUint16(resHops, 5) + native.PutUint16(resHops[2:], nl.LWTUNNEL_IP6_HOPLIMIT) + resHops[4] = e.Hoplimit + final = append(final, resHops...) + + // resFlags := make([]byte, 6) + // native.PutUint16(resFlags, 6) + // native.PutUint16(resFlags[2:], nl.LWTUNNEL_IP6_FLAGS) + // native.PutUint16(resFlags[4:], e.Flags) + // final = append(final,resFlags...) + + return final, nil +} + +func (e *IP6tnlEncap) String() string { + return fmt.Sprintf("id %d src %s dst %s hoplimit %d tc %d flags 0x%.4x", e.ID, e.Src, e.Dst, e.Hoplimit, e.TC, e.Flags) +} + +func (e *IP6tnlEncap) Equal(x Encap) bool { + o, ok := x.(*IP6tnlEncap) + if !ok { + return false + } + + if e.ID != o.ID || e.Flags != o.Flags || e.Hoplimit != o.Hoplimit || e.Src.Equal(o.Src) || e.Dst.Equal(o.Dst) || e.TC != o.TC { return false } return true } +type Via struct { + AddrFamily int + Addr net.IP +} + +func (v *Via) Equal(x Destination) bool { + o, ok := x.(*Via) + if !ok { + return false + } + if v.AddrFamily == x.Family() && v.Addr.Equal(o.Addr) { + return true + } + return false +} + +func (v *Via) String() string { + return fmt.Sprintf("Family: %d, Address: %s", v.AddrFamily, v.Addr.String()) +} + +func (v *Via) Family() int { + return v.AddrFamily +} + +func (v *Via) Encode() ([]byte, error) { + buf := &bytes.Buffer{} + err := binary.Write(buf, native, uint16(v.AddrFamily)) + if err != nil { + return nil, err + } + err = binary.Write(buf, native, v.Addr) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func (v *Via) Decode(b []byte) error { + if len(b) < 6 { + return fmt.Errorf("decoding failed: buffer too small (%d bytes)", len(b)) + } + v.AddrFamily = int(native.Uint16(b[0:2])) + if v.AddrFamily == nl.FAMILY_V4 { + v.Addr = net.IP(b[2:6]) + return nil + } else if v.AddrFamily == nl.FAMILY_V6 { + if len(b) < 18 { + return fmt.Errorf("decoding failed: buffer too small (%d bytes)", len(b)) + } + v.Addr = net.IP(b[2:]) + return nil + } + return fmt.Errorf("decoding failed: address family %d unknown", v.AddrFamily) +} + // RouteAdd will add a route to the system. // Equivalent to: `ip route add $route` func RouteAdd(route *Route) error { @@ -454,7 +797,51 @@ func RouteAdd(route *Route) error { func (h *Handle) RouteAdd(route *Route) error { flags := unix.NLM_F_CREATE | unix.NLM_F_EXCL | unix.NLM_F_ACK req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) - return h.routeHandle(route, req, nl.NewRtMsg()) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err +} + +// RouteAppend will append a route to the system. +// Equivalent to: `ip route append $route` +func RouteAppend(route *Route) error { + return pkgHandle.RouteAppend(route) +} + +// RouteAppend will append a route to the system. +// Equivalent to: `ip route append $route` +func (h *Handle) RouteAppend(route *Route) error { + flags := unix.NLM_F_CREATE | unix.NLM_F_APPEND | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err +} + +// RouteAddEcmp will add a route to the system. +func RouteAddEcmp(route *Route) error { + return pkgHandle.RouteAddEcmp(route) +} + +// RouteAddEcmp will add a route to the system. +func (h *Handle) RouteAddEcmp(route *Route) error { + flags := unix.NLM_F_CREATE | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err +} + +// RouteChange will change an existing route in the system. +// Equivalent to: `ip route change $route` +func RouteChange(route *Route) error { + return pkgHandle.RouteChange(route) +} + +// RouteChange will change an existing route in the system. +// Equivalent to: `ip route change $route` +func (h *Handle) RouteChange(route *Route) error { + flags := unix.NLM_F_REPLACE | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err } // RouteReplace will add a route to the system. @@ -468,7 +855,8 @@ func RouteReplace(route *Route) error { func (h *Handle) RouteReplace(route *Route) error { flags := unix.NLM_F_CREATE | unix.NLM_F_REPLACE | unix.NLM_F_ACK req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) - return h.routeHandle(route, req, nl.NewRtMsg()) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err } // RouteDel will delete a route from the system. @@ -481,12 +869,27 @@ func RouteDel(route *Route) error { // Equivalent to: `ip route del $route` func (h *Handle) RouteDel(route *Route) error { req := h.newNetlinkRequest(unix.RTM_DELROUTE, unix.NLM_F_ACK) - return h.routeHandle(route, req, nl.NewRtDelMsg()) + _, err := h.routeHandle(route, req, nl.NewRtDelMsg()) + return err } -func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error { - if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil && route.MPLSDst == nil { - return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil") +func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) ([][]byte, error) { + if err := h.prepareRouteReq(route, req, msg); err != nil { + return nil, err + } + return req.Execute(unix.NETLINK_ROUTE, 0) +} + +func (h *Handle) routeHandleIter(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg, f func(msg []byte) bool) error { + if err := h.prepareRouteReq(route, req, msg); err != nil { + return err + } + return req.ExecuteIter(unix.NETLINK_ROUTE, 0, f) +} + +func (h *Handle) prepareRouteReq(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error { + if req.NlMsghdr.Type != unix.RTM_GETROUTE && (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil && route.MPLSDst == nil { + return fmt.Errorf("either Dst.IP, Src.IP or Gw must be set") } family := -1 @@ -530,7 +933,13 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg if err != nil { return err } - rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf)) + switch route.Encap.Type() { + case nl.LWTUNNEL_ENCAP_BPF: + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP|unix.NLA_F_NESTED, buf)) + default: + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf)) + } + } if route.Src != nil { @@ -564,6 +973,14 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_GATEWAY, gwData)) } + if route.Via != nil { + buf, err := route.Via.Encode() + if err != nil { + return fmt.Errorf("failed to encode RTA_VIA: %v", err) + } + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_VIA, buf)) + } + if len(route.MultiPath) > 0 { buf := []byte{} for _, nh := range route.MultiPath { @@ -606,6 +1023,13 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg } children = append(children, nl.NewRtAttr(unix.RTA_ENCAP, buf)) } + if nh.Via != nil { + buf, err := nh.Via.Encode() + if err != nil { + return err + } + children = append(children, nl.NewRtAttr(unix.RTA_VIA, buf)) + } rtnh.Children = children buf = append(buf, rtnh.Serialize()...) } @@ -628,6 +1052,11 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg native.PutUint32(b, uint32(route.Priority)) rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PRIORITY, b)) } + if route.Realm > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(route.Realm)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_FLOW, b)) + } if route.Tos > 0 { msg.Tos = uint8(route.Tos) } @@ -639,19 +1068,70 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg } var metrics []*nl.RtAttr - // TODO: support other rta_metric values if route.MTU > 0 { b := nl.Uint32Attr(uint32(route.MTU)) metrics = append(metrics, nl.NewRtAttr(unix.RTAX_MTU, b)) } + if route.Window > 0 { + b := nl.Uint32Attr(uint32(route.Window)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_WINDOW, b)) + } + if route.Rtt > 0 { + b := nl.Uint32Attr(uint32(route.Rtt)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTT, b)) + } + if route.RttVar > 0 { + b := nl.Uint32Attr(uint32(route.RttVar)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTTVAR, b)) + } + if route.Ssthresh > 0 { + b := nl.Uint32Attr(uint32(route.Ssthresh)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_SSTHRESH, b)) + } + if route.Cwnd > 0 { + b := nl.Uint32Attr(uint32(route.Cwnd)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_CWND, b)) + } if route.AdvMSS > 0 { b := nl.Uint32Attr(uint32(route.AdvMSS)) metrics = append(metrics, nl.NewRtAttr(unix.RTAX_ADVMSS, b)) } + if route.Reordering > 0 { + b := nl.Uint32Attr(uint32(route.Reordering)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_REORDERING, b)) + } if route.Hoplimit > 0 { b := nl.Uint32Attr(uint32(route.Hoplimit)) metrics = append(metrics, nl.NewRtAttr(unix.RTAX_HOPLIMIT, b)) } + if route.InitCwnd > 0 { + b := nl.Uint32Attr(uint32(route.InitCwnd)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_INITCWND, b)) + } + if route.Features > 0 { + b := nl.Uint32Attr(uint32(route.Features)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_FEATURES, b)) + } + if route.RtoMin > 0 { + b := nl.Uint32Attr(uint32(route.RtoMin)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTO_MIN, b)) + } + if route.InitRwnd > 0 { + b := nl.Uint32Attr(uint32(route.InitRwnd)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_INITRWND, b)) + } + if route.QuickACK > 0 { + b := nl.Uint32Attr(uint32(route.QuickACK)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_QUICKACK, b)) + } + if route.Congctl != "" { + b := nl.ZeroTerminated(route.Congctl) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_CC_ALGO, b)) + } + if route.FastOpenNoCookie > 0 { + b := nl.Uint32Attr(uint32(route.FastOpenNoCookie)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_FASTOPEN_NO_COOKIE, b)) + } if metrics != nil { attr := nl.NewRtAttr(unix.RTA_METRICS, nil) @@ -663,22 +1143,21 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg msg.Flags = uint32(route.Flags) msg.Scope = uint8(route.Scope) - msg.Family = uint8(family) + // only overwrite family if it was not set in msg + if msg.Family == 0 { + msg.Family = uint8(family) + } req.AddData(msg) for _, attr := range rtAttrs { req.AddData(attr) } - var ( - b = make([]byte, 4) - native = nl.NativeEndian() - ) - native.PutUint32(b, uint32(route.LinkIndex)) - - req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) - - _, err := req.Execute(unix.NETLINK_ROUTE, 0) - return err + if (req.NlMsghdr.Type != unix.RTM_GETROUTE) || (req.NlMsghdr.Type == unix.RTM_GETROUTE && route.LinkIndex > 0) { + b := make([]byte, 4) + native.PutUint32(b, uint32(route.LinkIndex)) + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } + return nil } // RouteList gets a list of routes in the system. @@ -692,13 +1171,13 @@ func RouteList(link Link, family int) ([]Route, error) { // Equivalent to: `ip route show`. // The list can be filtered by link and ip family. func (h *Handle) RouteList(link Link, family int) ([]Route, error) { - var routeFilter *Route + routeFilter := &Route{} if link != nil { - routeFilter = &Route{ - LinkIndex: link.Attrs().Index, - } + routeFilter.LinkIndex = link.Attrs().Index + + return h.RouteListFiltered(family, routeFilter, RT_FILTER_OIF) } - return h.RouteListFiltered(family, routeFilter, RT_FILTER_OIF) + return h.RouteListFiltered(family, routeFilter, 0) } // RouteListFiltered gets a list of routes in the system filtered with specified rules. @@ -710,65 +1189,94 @@ func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, e // RouteListFiltered gets a list of routes in the system filtered with specified rules. // All rules must be defined in RouteFilter struct func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) { - req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_DUMP) - infmsg := nl.NewIfInfomsg(family) - req.AddData(infmsg) - - msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE) + var res []Route + err := h.RouteListFilteredIter(family, filter, filterMask, func(route Route) (cont bool) { + res = append(res, route) + return true + }) if err != nil { return nil, err } + return res, nil +} - var res []Route - for _, m := range msgs { +// RouteListFilteredIter passes each route that matches the filter to the given iterator func. Iteration continues +// until all routes are loaded or the func returns false. +func RouteListFilteredIter(family int, filter *Route, filterMask uint64, f func(Route) (cont bool)) error { + return pkgHandle.RouteListFilteredIter(family, filter, filterMask, f) +} + +func (h *Handle) RouteListFilteredIter(family int, filter *Route, filterMask uint64, f func(Route) (cont bool)) error { + req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_DUMP) + rtmsg := &nl.RtMsg{} + rtmsg.Family = uint8(family) + + var parseErr error + err := h.routeHandleIter(filter, req, rtmsg, func(m []byte) bool { msg := nl.DeserializeRtMsg(m) + if family != FAMILY_ALL && msg.Family != uint8(family) { + // Ignore routes not matching requested family + return true + } if msg.Flags&unix.RTM_F_CLONED != 0 { // Ignore cloned routes - continue + return true } if msg.Table != unix.RT_TABLE_MAIN { - if filter == nil || filter != nil && filterMask&RT_FILTER_TABLE == 0 { + if filter == nil || filterMask&RT_FILTER_TABLE == 0 { // Ignore non-main tables - continue + return true } } route, err := deserializeRoute(m) if err != nil { - return nil, err + parseErr = err + return false } if filter != nil { switch { case filterMask&RT_FILTER_TABLE != 0 && filter.Table != unix.RT_TABLE_UNSPEC && route.Table != filter.Table: - continue + return true case filterMask&RT_FILTER_PROTOCOL != 0 && route.Protocol != filter.Protocol: - continue + return true case filterMask&RT_FILTER_SCOPE != 0 && route.Scope != filter.Scope: - continue + return true case filterMask&RT_FILTER_TYPE != 0 && route.Type != filter.Type: - continue + return true case filterMask&RT_FILTER_TOS != 0 && route.Tos != filter.Tos: - continue + return true + case filterMask&RT_FILTER_REALM != 0 && route.Realm != filter.Realm: + return true case filterMask&RT_FILTER_OIF != 0 && route.LinkIndex != filter.LinkIndex: - continue + return true case filterMask&RT_FILTER_IIF != 0 && route.ILinkIndex != filter.ILinkIndex: - continue + return true case filterMask&RT_FILTER_GW != 0 && !route.Gw.Equal(filter.Gw): - continue + return true case filterMask&RT_FILTER_SRC != 0 && !route.Src.Equal(filter.Src): - continue + return true case filterMask&RT_FILTER_DST != 0: if filter.MPLSDst == nil || route.MPLSDst == nil || (*filter.MPLSDst) != (*route.MPLSDst) { + if filter.Dst == nil { + filter.Dst = genZeroIPNet(family) + } if !ipNetEqual(route.Dst, filter.Dst) { - continue + return true } } case filterMask&RT_FILTER_HOPLIMIT != 0 && route.Hoplimit != filter.Hoplimit: - continue + return true } } - res = append(res, route) + return f(route) + }) + if err != nil { + return err } - return res, nil + if parseErr != nil { + return parseErr + } + return nil } // deserializeRoute decodes a binary netlink message into a Route struct @@ -780,14 +1288,14 @@ func deserializeRoute(m []byte) (Route, error) { } route := Route{ Scope: Scope(msg.Scope), - Protocol: int(msg.Protocol), + Protocol: RouteProtocol(int(msg.Protocol)), Table: int(msg.Table), Type: int(msg.Type), Tos: int(msg.Tos), Flags: int(msg.Flags), + Family: int(msg.Family), } - native := nl.NativeEndian() var encap, encapType syscall.NetlinkRouteAttr for _, attr := range attrs { switch attr.Attr.Type { @@ -814,6 +1322,8 @@ func deserializeRoute(m []byte) (Route, error) { route.ILinkIndex = int(native.Uint32(attr.Value[0:4])) case unix.RTA_PRIORITY: route.Priority = int(native.Uint32(attr.Value[0:4])) + case unix.RTA_FLOW: + route.Realm = int(native.Uint32(attr.Value[0:4])) case unix.RTA_TABLE: route.Table = int(native.Uint32(attr.Value[0:4])) case unix.RTA_MULTIPATH: @@ -853,6 +1363,12 @@ func deserializeRoute(m []byte) (Route, error) { encapType = attr case unix.RTA_ENCAP: encap = attr + case unix.RTA_VIA: + d := &Via{} + if err := d.Decode(attr.Value); err != nil { + return nil, nil, err + } + info.Via = d } } @@ -890,6 +1406,12 @@ func deserializeRoute(m []byte) (Route, error) { return route, err } route.NewDst = d + case unix.RTA_VIA: + v := &Via{} + if err := v.Decode(attr.Value); err != nil { + return route, err + } + route.Via = v case unix.RTA_ENCAP_TYPE: encapType = attr case unix.RTA_ENCAP: @@ -903,15 +1425,62 @@ func deserializeRoute(m []byte) (Route, error) { switch metric.Attr.Type { case unix.RTAX_MTU: route.MTU = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_WINDOW: + route.Window = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_RTT: + route.Rtt = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_RTTVAR: + route.RttVar = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_SSTHRESH: + route.Ssthresh = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_CWND: + route.Cwnd = int(native.Uint32(metric.Value[0:4])) case unix.RTAX_ADVMSS: route.AdvMSS = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_REORDERING: + route.Reordering = int(native.Uint32(metric.Value[0:4])) case unix.RTAX_HOPLIMIT: route.Hoplimit = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_INITCWND: + route.InitCwnd = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_FEATURES: + route.Features = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_RTO_MIN: + route.RtoMin = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_INITRWND: + route.InitRwnd = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_QUICKACK: + route.QuickACK = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_CC_ALGO: + route.Congctl = nl.BytesToString(metric.Value) + case unix.RTAX_FASTOPEN_NO_COOKIE: + route.FastOpenNoCookie = int(native.Uint32(metric.Value[0:4])) } } } } + // Same logic to generate "default" dst with iproute2 implementation + if route.Dst == nil { + var addLen int + var ip net.IP + switch msg.Family { + case FAMILY_V4: + addLen = net.IPv4len + ip = net.IPv4zero + case FAMILY_V6: + addLen = net.IPv6len + ip = net.IPv6zero + } + + if addLen != 0 { + route.Dst = &net.IPNet{ + IP: ip, + Mask: net.CIDRMask(int(msg.Dst_len), 8*addLen), + } + } + } + if len(encap.Value) != 0 && len(encapType.Value) != 0 { typ := int(native.Uint16(encapType.Value[0:2])) var e Encap @@ -931,6 +1500,11 @@ func deserializeRoute(m []byte) (Route, error) { if err := e.Decode(encap.Value); err != nil { return route, err } + case nl.LWTUNNEL_ENCAP_BPF: + e = &BpfEncap{} + if err := e.Decode(encap.Value); err != nil { + return route, err + } } route.Encap = e } @@ -938,15 +1512,34 @@ func deserializeRoute(m []byte) (Route, error) { return route, nil } +// RouteGetOptions contains a set of options to use with +// RouteGetWithOptions +type RouteGetOptions struct { + Iif string + IifIndex int + Oif string + VrfName string + SrcAddr net.IP + UID *uint32 + Mark uint32 + FIBMatch bool +} + +// RouteGetWithOptions gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get <> vrf '. +func RouteGetWithOptions(destination net.IP, options *RouteGetOptions) ([]Route, error) { + return pkgHandle.RouteGetWithOptions(destination, options) +} + // RouteGet gets a route to a specific destination from the host system. // Equivalent to: 'ip route get'. func RouteGet(destination net.IP) ([]Route, error) { return pkgHandle.RouteGet(destination) } -// RouteGet gets a route to a specific destination from the host system. -// Equivalent to: 'ip route get'. -func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { +// RouteGetWithOptions gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get <> vrf '. +func (h *Handle) RouteGetWithOptions(destination net.IP, options *RouteGetOptions) ([]Route, error) { req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_REQUEST) family := nl.GetIPFamily(destination) var destinationData []byte @@ -961,11 +1554,88 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { msg := &nl.RtMsg{} msg.Family = uint8(family) msg.Dst_len = bitlen + if options != nil && options.SrcAddr != nil { + msg.Src_len = bitlen + } + msg.Flags = unix.RTM_F_LOOKUP_TABLE + if options != nil && options.FIBMatch { + msg.Flags |= unix.RTM_F_FIB_MATCH + } req.AddData(msg) rtaDst := nl.NewRtAttr(unix.RTA_DST, destinationData) req.AddData(rtaDst) + if options != nil { + if options.VrfName != "" { + link, err := h.LinkByName(options.VrfName) + if err != nil { + return nil, err + } + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs().Index)) + + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } + + iifIndex := 0 + if len(options.Iif) > 0 { + link, err := h.LinkByName(options.Iif) + if err != nil { + return nil, err + } + + iifIndex = link.Attrs().Index + } else if options.IifIndex > 0 { + iifIndex = options.IifIndex + } + + if iifIndex > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(iifIndex)) + + req.AddData(nl.NewRtAttr(unix.RTA_IIF, b)) + } + + if len(options.Oif) > 0 { + link, err := h.LinkByName(options.Oif) + if err != nil { + return nil, err + } + + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs().Index)) + + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } + + if options.SrcAddr != nil { + var srcAddr []byte + if family == FAMILY_V4 { + srcAddr = options.SrcAddr.To4() + } else { + srcAddr = options.SrcAddr.To16() + } + + req.AddData(nl.NewRtAttr(unix.RTA_SRC, srcAddr)) + } + + if options.UID != nil { + uid := *options.UID + b := make([]byte, 4) + native.PutUint32(b, uid) + + req.AddData(nl.NewRtAttr(unix.RTA_UID, b)) + } + + if options.Mark > 0 { + b := make([]byte, 4) + native.PutUint32(b, options.Mark) + + req.AddData(nl.NewRtAttr(unix.RTA_MARK, b)) + } + } + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE) if err != nil { return nil, err @@ -980,27 +1650,35 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { res = append(res, route) } return res, nil +} +// RouteGet gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get'. +func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { + return h.RouteGetWithOptions(destination, nil) } // RouteSubscribe takes a chan down which notifications will be sent // when routes are added or deleted. Close the 'done' chan to stop subscription. func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error { - return routeSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) + return routeSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false) } // RouteSubscribeAt works like RouteSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func RouteSubscribeAt(ns netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error { - return routeSubscribeAt(ns, netns.None(), ch, done, nil, false) + return routeSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false) } // RouteSubscribeOptions contains a set of options to use with // RouteSubscribeWithOptions. type RouteSubscribeOptions struct { - Namespace *netns.NsHandle - ErrorCallback func(error) - ListExisting bool + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool + ReceiveBufferSize int + ReceiveBufferForceSize bool + ReceiveTimeout *unix.Timeval } // RouteSubscribeWithOptions work like RouteSubscribe but enable to @@ -1011,14 +1689,27 @@ func RouteSubscribeWithOptions(ch chan<- RouteUpdate, done <-chan struct{}, opti none := netns.None() options.Namespace = &none } - return routeSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) + return routeSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, + options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize) } -func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { +func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}, cberr func(error), listExisting bool, + rcvbuf int, rcvTimeout *unix.Timeval, rcvbufForce bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_ROUTE, unix.RTNLGRP_IPV6_ROUTE) if err != nil { return err } + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } + } + if rcvbuf != 0 { + err = s.SetReceiveBufferSize(rcvbuf, rcvbufForce) + if err != nil { + return err + } + } if done != nil { go func() { <-done @@ -1040,7 +1731,8 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done < msgs, from, err := s.Receive() if err != nil { if cberr != nil { - cberr(err) + cberr(fmt.Errorf("Receive failed: %v", + err)) } return } @@ -1055,27 +1747,103 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done < continue } if m.Header.Type == unix.NLMSG_ERROR { - native := nl.NativeEndian() error := int32(native.Uint32(m.Data[0:4])) if error == 0 { continue } if cberr != nil { - cberr(syscall.Errno(-error)) + cberr(fmt.Errorf("error message: %v", + syscall.Errno(-error))) } - return + continue } route, err := deserializeRoute(m.Data) if err != nil { if cberr != nil { cberr(err) } - return + continue + } + ch <- RouteUpdate{ + Type: m.Header.Type, + NlFlags: m.Header.Flags & (unix.NLM_F_REPLACE | unix.NLM_F_EXCL | unix.NLM_F_CREATE | unix.NLM_F_APPEND), + Route: route, } - ch <- RouteUpdate{Type: m.Header.Type, Route: route} } } }() return nil } + +func (p RouteProtocol) String() string { + switch int(p) { + case unix.RTPROT_BABEL: + return "babel" + case unix.RTPROT_BGP: + return "bgp" + case unix.RTPROT_BIRD: + return "bird" + case unix.RTPROT_BOOT: + return "boot" + case unix.RTPROT_DHCP: + return "dhcp" + case unix.RTPROT_DNROUTED: + return "dnrouted" + case unix.RTPROT_EIGRP: + return "eigrp" + case unix.RTPROT_GATED: + return "gated" + case unix.RTPROT_ISIS: + return "isis" + // case unix.RTPROT_KEEPALIVED: + // return "keepalived" + case unix.RTPROT_KERNEL: + return "kernel" + case unix.RTPROT_MROUTED: + return "mrouted" + case unix.RTPROT_MRT: + return "mrt" + case unix.RTPROT_NTK: + return "ntk" + case unix.RTPROT_OSPF: + return "ospf" + case unix.RTPROT_RA: + return "ra" + case unix.RTPROT_REDIRECT: + return "redirect" + case unix.RTPROT_RIP: + return "rip" + case unix.RTPROT_STATIC: + return "static" + case unix.RTPROT_UNSPEC: + return "unspec" + case unix.RTPROT_XORP: + return "xorp" + case unix.RTPROT_ZEBRA: + return "zebra" + default: + return strconv.Itoa(int(p)) + } +} + +// genZeroIPNet returns 0.0.0.0/0 or ::/0 for IPv4 or IPv6, otherwise nil +func genZeroIPNet(family int) *net.IPNet { + var addLen int + var ip net.IP + switch family { + case FAMILY_V4: + addLen = net.IPv4len + ip = net.IPv4zero + case FAMILY_V6: + addLen = net.IPv6len + ip = net.IPv6zero + } + if addLen != 0 { + return &net.IPNet{ + IP: ip, + Mask: net.CIDRMask(0, 8*addLen), + } + } + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/route_unspecified.go b/vendor/github.com/vishvananda/netlink/route_unspecified.go index 2701862b4..db7372689 100644 --- a/vendor/github.com/vishvananda/netlink/route_unspecified.go +++ b/vendor/github.com/vishvananda/netlink/route_unspecified.go @@ -2,6 +2,8 @@ package netlink +import "strconv" + func (r *Route) ListFlags() []string { return []string{} } @@ -9,3 +11,11 @@ func (r *Route) ListFlags() []string { func (n *NexthopInfo) ListFlags() []string { return []string{} } + +func (s Scope) String() string { + return "unknown" +} + +func (p RouteProtocol) String() string { + return strconv.Itoa(int(p)) +} diff --git a/vendor/github.com/vishvananda/netlink/rule.go b/vendor/github.com/vishvananda/netlink/rule.go index 7fc8ae5df..9d74c7cd8 100644 --- a/vendor/github.com/vishvananda/netlink/rule.go +++ b/vendor/github.com/vishvananda/netlink/rule.go @@ -10,8 +10,9 @@ type Rule struct { Priority int Family int Table int - Mark int - Mask int + Mark uint32 + Mask *uint32 + Tos uint TunID uint Goto int Src *net.IPNet @@ -22,10 +23,27 @@ type Rule struct { SuppressIfgroup int SuppressPrefixlen int Invert bool + Dport *RulePortRange + Sport *RulePortRange + IPProto int + UIDRange *RuleUIDRange + Protocol uint8 + Type uint8 } func (r Rule) String() string { - return fmt.Sprintf("ip rule %d: from %s table %d", r.Priority, r.Src, r.Table) + from := "all" + if r.Src != nil && r.Src.String() != "" { + from = r.Src.String() + } + + to := "all" + if r.Dst != nil && r.Dst.String() != "" { + to = r.Dst.String() + } + + return fmt.Sprintf("ip rule %d: from %s to %s table %d %s", + r.Priority, from, to, r.Table, r.typeString()) } // NewRule return empty rules. @@ -34,9 +52,31 @@ func NewRule() *Rule { SuppressIfgroup: -1, SuppressPrefixlen: -1, Priority: -1, - Mark: -1, - Mask: -1, + Mark: 0, + Mask: nil, Goto: -1, Flow: -1, } } + +// NewRulePortRange creates rule sport/dport range. +func NewRulePortRange(start, end uint16) *RulePortRange { + return &RulePortRange{Start: start, End: end} +} + +// RulePortRange represents rule sport/dport range. +type RulePortRange struct { + Start uint16 + End uint16 +} + +// NewRuleUIDRange creates rule uid range. +func NewRuleUIDRange(start, end uint32) *RuleUIDRange { + return &RuleUIDRange{Start: start, End: end} +} + +// RuleUIDRange represents rule uid range. +type RuleUIDRange struct { + Start uint32 + End uint32 +} diff --git a/vendor/github.com/vishvananda/netlink/rule_linux.go b/vendor/github.com/vishvananda/netlink/rule_linux.go index e12569fe4..ddff99cfa 100644 --- a/vendor/github.com/vishvananda/netlink/rule_linux.go +++ b/vendor/github.com/vishvananda/netlink/rule_linux.go @@ -1,6 +1,7 @@ package netlink import ( + "bytes" "fmt" "net" @@ -42,8 +43,8 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { msg.Protocol = unix.RTPROT_BOOT msg.Scope = unix.RT_SCOPE_UNIVERSE msg.Table = unix.RT_TABLE_UNSPEC - msg.Type = unix.RTN_UNSPEC - if req.NlMsghdr.Flags&unix.NLM_F_CREATE > 0 { + msg.Type = rule.Type // usually 0, same as unix.RTN_UNSPEC + if msg.Type == 0 && req.NlMsghdr.Flags&unix.NLM_F_CREATE > 0 { msg.Type = unix.RTN_UNICAST } if rule.Invert { @@ -55,6 +56,9 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { if rule.Table >= 0 && rule.Table < 256 { msg.Table = uint8(rule.Table) } + if rule.Tos != 0 { + msg.Tos = uint8(rule.Tos) + } var dstFamily uint8 var rtAttrs []*nl.RtAttr @@ -93,21 +97,19 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { req.AddData(rtAttrs[i]) } - native := nl.NativeEndian() - if rule.Priority >= 0 { b := make([]byte, 4) native.PutUint32(b, uint32(rule.Priority)) req.AddData(nl.NewRtAttr(nl.FRA_PRIORITY, b)) } - if rule.Mark >= 0 { + if rule.Mark != 0 || rule.Mask != nil { b := make([]byte, 4) - native.PutUint32(b, uint32(rule.Mark)) + native.PutUint32(b, rule.Mark) req.AddData(nl.NewRtAttr(nl.FRA_FWMARK, b)) } - if rule.Mask >= 0 { + if rule.Mask != nil { b := make([]byte, 4) - native.PutUint32(b, uint32(rule.Mask)) + native.PutUint32(b, *rule.Mask) req.AddData(nl.NewRtAttr(nl.FRA_FWMASK, b)) } if rule.Flow >= 0 { @@ -138,10 +140,10 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { } } if rule.IifName != "" { - req.AddData(nl.NewRtAttr(nl.FRA_IIFNAME, []byte(rule.IifName))) + req.AddData(nl.NewRtAttr(nl.FRA_IIFNAME, []byte(rule.IifName+"\x00"))) } if rule.OifName != "" { - req.AddData(nl.NewRtAttr(nl.FRA_OIFNAME, []byte(rule.OifName))) + req.AddData(nl.NewRtAttr(nl.FRA_OIFNAME, []byte(rule.OifName+"\x00"))) } if rule.Goto >= 0 { msg.Type = nl.FR_ACT_GOTO @@ -150,6 +152,31 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { req.AddData(nl.NewRtAttr(nl.FRA_GOTO, b)) } + if rule.IPProto > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.IPProto)) + req.AddData(nl.NewRtAttr(nl.FRA_IP_PROTO, b)) + } + + if rule.Dport != nil { + b := rule.Dport.toRtAttrData() + req.AddData(nl.NewRtAttr(nl.FRA_DPORT_RANGE, b)) + } + + if rule.Sport != nil { + b := rule.Sport.toRtAttrData() + req.AddData(nl.NewRtAttr(nl.FRA_SPORT_RANGE, b)) + } + + if rule.UIDRange != nil { + b := rule.UIDRange.toRtAttrData() + req.AddData(nl.NewRtAttr(nl.FRA_UID_RANGE, b)) + } + + if rule.Protocol > 0 { + req.AddData(nl.NewRtAttr(nl.FRA_PROTOCOL, nl.Uint8Attr(rule.Protocol))) + } + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -163,6 +190,19 @@ func RuleList(family int) ([]Rule, error) { // RuleList lists rules in the system. // Equivalent to: ip rule list func (h *Handle) RuleList(family int) ([]Rule, error) { + return h.RuleListFiltered(family, nil, 0) +} + +// RuleListFiltered gets a list of rules in the system filtered by the +// specified rule template `filter`. +// Equivalent to: ip rule list +func RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) { + return pkgHandle.RuleListFiltered(family, filter, filterMask) +} + +// RuleListFiltered lists rules in the system. +// Equivalent to: ip rule list +func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) { req := h.newNetlinkRequest(unix.RTM_GETRULE, unix.NLM_F_DUMP|unix.NLM_F_REQUEST) msg := nl.NewIfInfomsg(family) req.AddData(msg) @@ -172,7 +212,6 @@ func (h *Handle) RuleList(family int) ([]Rule, error) { return nil, err } - native := nl.NativeEndian() var res = make([]Rule, 0) for i := range msgs { msg := nl.DeserializeRtMsg(msgs[i]) @@ -182,8 +221,11 @@ func (h *Handle) RuleList(family int) ([]Rule, error) { } rule := NewRule() + rule.Priority = 0 // The default priority from kernel rule.Invert = msg.Flags&FibRuleInvert > 0 + rule.Family = int(msg.Family) + rule.Tos = uint(msg.Tos) for j := range attrs { switch attrs[j].Attr.Type { @@ -200,11 +242,12 @@ func (h *Handle) RuleList(family int) ([]Rule, error) { Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attrs[j].Value)), } case nl.FRA_FWMARK: - rule.Mark = int(native.Uint32(attrs[j].Value[0:4])) + rule.Mark = native.Uint32(attrs[j].Value[0:4]) case nl.FRA_FWMASK: - rule.Mask = int(native.Uint32(attrs[j].Value[0:4])) + mask := native.Uint32(attrs[j].Value[0:4]) + rule.Mask = &mask case nl.FRA_TUN_ID: - rule.TunID = uint(native.Uint64(attrs[j].Value[0:4])) + rule.TunID = uint(native.Uint64(attrs[j].Value[0:8])) case nl.FRA_IIFNAME: rule.IifName = string(attrs[j].Value[:len(attrs[j].Value)-1]) case nl.FRA_OIFNAME: @@ -225,10 +268,98 @@ func (h *Handle) RuleList(family int) ([]Rule, error) { rule.Goto = int(native.Uint32(attrs[j].Value[0:4])) case nl.FRA_PRIORITY: rule.Priority = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_IP_PROTO: + rule.IPProto = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_DPORT_RANGE: + rule.Dport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4])) + case nl.FRA_SPORT_RANGE: + rule.Sport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4])) + case nl.FRA_UID_RANGE: + rule.UIDRange = NewRuleUIDRange(native.Uint32(attrs[j].Value[0:4]), native.Uint32(attrs[j].Value[4:8])) + case nl.FRA_PROTOCOL: + rule.Protocol = uint8(attrs[j].Value[0]) + } + } + + if filter != nil { + switch { + case filterMask&RT_FILTER_SRC != 0 && + (rule.Src == nil || rule.Src.String() != filter.Src.String()): + continue + case filterMask&RT_FILTER_DST != 0 && + (rule.Dst == nil || rule.Dst.String() != filter.Dst.String()): + continue + case filterMask&RT_FILTER_TABLE != 0 && + filter.Table != unix.RT_TABLE_UNSPEC && rule.Table != filter.Table: + continue + case filterMask&RT_FILTER_TOS != 0 && rule.Tos != filter.Tos: + continue + case filterMask&RT_FILTER_PRIORITY != 0 && rule.Priority != filter.Priority: + continue + case filterMask&RT_FILTER_MARK != 0 && rule.Mark != filter.Mark: + continue + case filterMask&RT_FILTER_MASK != 0 && !ptrEqual(rule.Mask, filter.Mask): + continue } } + res = append(res, *rule) } return res, nil } + +func (pr *RulePortRange) toRtAttrData() []byte { + b := [][]byte{make([]byte, 2), make([]byte, 2)} + native.PutUint16(b[0], pr.Start) + native.PutUint16(b[1], pr.End) + return bytes.Join(b, []byte{}) +} + +func (pr *RuleUIDRange) toRtAttrData() []byte { + b := [][]byte{make([]byte, 4), make([]byte, 4)} + native.PutUint32(b[0], pr.Start) + native.PutUint32(b[1], pr.End) + return bytes.Join(b, []byte{}) +} + +func ptrEqual(a, b *uint32) bool { + if a == b { + return true + } + if (a == nil) || (b == nil) { + return false + } + return *a == *b +} + +func (r Rule) typeString() string { + switch r.Type { + case unix.RTN_UNSPEC: // zero + return "" + case unix.RTN_UNICAST: + return "" + case unix.RTN_LOCAL: + return "local" + case unix.RTN_BROADCAST: + return "broadcast" + case unix.RTN_ANYCAST: + return "anycast" + case unix.RTN_MULTICAST: + return "multicast" + case unix.RTN_BLACKHOLE: + return "blackhole" + case unix.RTN_UNREACHABLE: + return "unreachable" + case unix.RTN_PROHIBIT: + return "prohibit" + case unix.RTN_THROW: + return "throw" + case unix.RTN_NAT: + return "nat" + case unix.RTN_XRESOLVE: + return "xresolve" + default: + return fmt.Sprintf("type(0x%x)", r.Type) + } +} diff --git a/vendor/github.com/vishvananda/netlink/rule_nonlinux.go b/vendor/github.com/vishvananda/netlink/rule_nonlinux.go new file mode 100644 index 000000000..2b19aa64c --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/rule_nonlinux.go @@ -0,0 +1,8 @@ +//go:build !linux +// +build !linux + +package netlink + +func (r Rule) typeString() string { + return "" +} diff --git a/vendor/github.com/vishvananda/netlink/socket.go b/vendor/github.com/vishvananda/netlink/socket.go index 41aa72624..e65efb130 100644 --- a/vendor/github.com/vishvananda/netlink/socket.go +++ b/vendor/github.com/vishvananda/netlink/socket.go @@ -25,3 +25,80 @@ type Socket struct { UID uint32 INode uint32 } + +// UnixSocket represents a netlink unix socket. +type UnixSocket struct { + Type uint8 + Family uint8 + State uint8 + pad uint8 + INode uint32 + Cookie [2]uint32 +} + +// XDPSocket represents an XDP socket (and the common diagnosis part in +// particular). Please note that in contrast to [UnixSocket] the XDPSocket type +// does not feature “State” information. +type XDPSocket struct { + // xdp_diag_msg + // https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L21 + Family uint8 + Type uint8 + pad uint16 + Ino uint32 + Cookie [2]uint32 +} + +type XDPInfo struct { + // XDP_DIAG_INFO/xdp_diag_info + // https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L51 + Ifindex uint32 + QueueID uint32 + + // XDP_DIAG_UID + UID uint32 + + // XDP_RX_RING + // https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L56 + RxRingEntries uint32 + TxRingEntries uint32 + UmemFillRingEntries uint32 + UmemCompletionRingEntries uint32 + + // XDR_DIAG_UMEM + Umem *XDPDiagUmem + + // XDR_DIAG_STATS + Stats *XDPDiagStats +} + +const ( + XDP_DU_F_ZEROCOPY = 1 << iota +) + +// XDPDiagUmem describes the umem attached to an XDP socket. +// +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L62 +type XDPDiagUmem struct { + Size uint64 + ID uint32 + NumPages uint32 + ChunkSize uint32 + Headroom uint32 + Ifindex uint32 + QueueID uint32 + Flags uint32 + Refs uint32 +} + +// XDPDiagStats contains ring statistics for an XDP socket. +// +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L74 +type XDPDiagStats struct { + RxDropped uint64 + RxInvalid uint64 + RxFull uint64 + FillRingEmpty uint64 + TxInvalid uint64 + TxRingEmpty uint64 +} diff --git a/vendor/github.com/vishvananda/netlink/socket_linux.go b/vendor/github.com/vishvananda/netlink/socket_linux.go index c4d89c17e..4eb4aeafb 100644 --- a/vendor/github.com/vishvananda/netlink/socket_linux.go +++ b/vendor/github.com/vishvananda/netlink/socket_linux.go @@ -4,15 +4,18 @@ import ( "errors" "fmt" "net" + "syscall" "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" ) const ( - sizeofSocketID = 0x30 - sizeofSocketRequest = sizeofSocketID + 0x8 - sizeofSocket = sizeofSocketID + 0x18 + sizeofSocketID = 0x30 + sizeofSocketRequest = sizeofSocketID + 0x8 + sizeofSocket = sizeofSocketID + 0x18 + sizeofUnixSocketRequest = 0x18 // 24 byte + sizeofUnixSocket = 0x10 // 16 byte ) type socketRequest struct { @@ -49,10 +52,13 @@ func (r *socketRequest) Serialize() []byte { native.PutUint32(b.Next(4), r.States) networkOrder.PutUint16(b.Next(2), r.ID.SourcePort) networkOrder.PutUint16(b.Next(2), r.ID.DestinationPort) - copy(b.Next(4), r.ID.Source.To4()) - b.Next(12) - copy(b.Next(4), r.ID.Destination.To4()) - b.Next(12) + if r.Family == unix.AF_INET6 { + copy(b.Next(16), r.ID.Source) + copy(b.Next(16), r.ID.Destination) + } else { + copy(b.Next(16), r.ID.Source.To4()) + copy(b.Next(16), r.ID.Destination.To4()) + } native.PutUint32(b.Next(4), r.ID.Interface) native.PutUint32(b.Next(4), r.ID.Cookie[0]) native.PutUint32(b.Next(4), r.ID.Cookie[1]) @@ -61,6 +67,32 @@ func (r *socketRequest) Serialize() []byte { func (r *socketRequest) Len() int { return sizeofSocketRequest } +// According to linux/include/uapi/linux/unix_diag.h +type unixSocketRequest struct { + Family uint8 + Protocol uint8 + pad uint16 + States uint32 + INode uint32 + Show uint32 + Cookie [2]uint32 +} + +func (r *unixSocketRequest) Serialize() []byte { + b := writeBuffer{Bytes: make([]byte, sizeofUnixSocketRequest)} + b.Write(r.Family) + b.Write(r.Protocol) + native.PutUint16(b.Next(2), r.pad) + native.PutUint32(b.Next(4), r.States) + native.PutUint32(b.Next(4), r.INode) + native.PutUint32(b.Next(4), r.Show) + native.PutUint32(b.Next(4), r.Cookie[0]) + native.PutUint32(b.Next(4), r.Cookie[1]) + return b.Bytes +} + +func (r *unixSocketRequest) Len() int { return sizeofUnixSocketRequest } + type readBuffer struct { Bytes []byte pos int @@ -89,10 +121,15 @@ func (s *Socket) deserialize(b []byte) error { s.Retrans = rb.Read() s.ID.SourcePort = networkOrder.Uint16(rb.Next(2)) s.ID.DestinationPort = networkOrder.Uint16(rb.Next(2)) - s.ID.Source = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) - rb.Next(12) - s.ID.Destination = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) - rb.Next(12) + if s.Family == unix.AF_INET6 { + s.ID.Source = net.IP(rb.Next(16)) + s.ID.Destination = net.IP(rb.Next(16)) + } else { + s.ID.Source = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) + rb.Next(12) + s.ID.Destination = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) + rb.Next(12) + } s.ID.Interface = native.Uint32(rb.Next(4)) s.ID.Cookie[0] = native.Uint32(rb.Next(4)) s.ID.Cookie[1] = native.Uint32(rb.Next(4)) @@ -104,31 +141,126 @@ func (s *Socket) deserialize(b []byte) error { return nil } +func (u *UnixSocket) deserialize(b []byte) error { + if len(b) < sizeofUnixSocket { + return fmt.Errorf("unix diag data short read (%d); want %d", len(b), sizeofUnixSocket) + } + rb := readBuffer{Bytes: b} + u.Type = rb.Read() + u.Family = rb.Read() + u.State = rb.Read() + u.pad = rb.Read() + u.INode = native.Uint32(rb.Next(4)) + u.Cookie[0] = native.Uint32(rb.Next(4)) + u.Cookie[1] = native.Uint32(rb.Next(4)) + return nil +} + +// SocketGet returns the Socket identified by its local and remote addresses. +func (h *Handle) SocketGet(local, remote net.Addr) (*Socket, error) { + var protocol uint8 + var localIP, remoteIP net.IP + var localPort, remotePort uint16 + switch l := local.(type) { + case *net.TCPAddr: + r, ok := remote.(*net.TCPAddr) + if !ok { + return nil, ErrNotImplemented + } + localIP = l.IP + localPort = uint16(l.Port) + remoteIP = r.IP + remotePort = uint16(r.Port) + protocol = unix.IPPROTO_TCP + case *net.UDPAddr: + r, ok := remote.(*net.UDPAddr) + if !ok { + return nil, ErrNotImplemented + } + localIP = l.IP + localPort = uint16(l.Port) + remoteIP = r.IP + remotePort = uint16(r.Port) + protocol = unix.IPPROTO_UDP + default: + return nil, ErrNotImplemented + } + + var family uint8 + if localIP.To4() != nil && remoteIP.To4() != nil { + family = unix.AF_INET + } + + if family == 0 && localIP.To16() != nil && remoteIP.To16() != nil { + family = unix.AF_INET6 + } + + if family == 0 { + return nil, ErrNotImplemented + } + + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: protocol, + States: 0xffffffff, + ID: SocketID{ + SourcePort: localPort, + DestinationPort: remotePort, + Source: localIP, + Destination: remoteIP, + Cookie: [2]uint32{nl.TCPDIAG_NOCOOKIE, nl.TCPDIAG_NOCOOKIE}, + }, + }) + + msgs, err := req.Execute(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY) + if err != nil { + return nil, err + } + if len(msgs) == 0 { + return nil, errors.New("no message nor error from netlink") + } + if len(msgs) > 2 { + return nil, fmt.Errorf("multiple (%d) matching sockets", len(msgs)) + } + + sock := &Socket{} + if err := sock.deserialize(msgs[0]); err != nil { + return nil, err + } + return sock, nil +} + // SocketGet returns the Socket identified by its local and remote addresses. func SocketGet(local, remote net.Addr) (*Socket, error) { + return pkgHandle.SocketGet(local, remote) +} + +// SocketDestroy kills the Socket identified by its local and remote addresses. +func (h *Handle) SocketDestroy(local, remote net.Addr) error { localTCP, ok := local.(*net.TCPAddr) if !ok { - return nil, ErrNotImplemented + return ErrNotImplemented } remoteTCP, ok := remote.(*net.TCPAddr) if !ok { - return nil, ErrNotImplemented + return ErrNotImplemented } localIP := localTCP.IP.To4() if localIP == nil { - return nil, ErrNotImplemented + return ErrNotImplemented } remoteIP := remoteTCP.IP.To4() if remoteIP == nil { - return nil, ErrNotImplemented + return ErrNotImplemented } s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) if err != nil { - return nil, err + return err } defer s.Close() - req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, 0) + req := h.newNetlinkRequest(nl.SOCK_DESTROY, unix.NLM_F_ACK) req.AddData(&socketRequest{ Family: unix.AF_INET, Protocol: unix.IPPROTO_TCP, @@ -140,23 +272,319 @@ func SocketGet(local, remote net.Addr) (*Socket, error) { Cookie: [2]uint32{nl.TCPDIAG_NOCOOKIE, nl.TCPDIAG_NOCOOKIE}, }, }) - s.Send(req) - msgs, from, err := s.Receive() + + _, err = req.Execute(unix.NETLINK_INET_DIAG, 0) + return err +} + +// SocketDestroy kills the Socket identified by its local and remote addresses. +func SocketDestroy(local, remote net.Addr) error { + return pkgHandle.SocketDestroy(local, remote) +} + +// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info. +func (h *Handle) SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) { + // Construct the request + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_TCP, + Ext: (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)), + States: uint32(0xfff), // all states + }) + + // Do the query and parse the result + var result []*InetDiagTCPInfoResp + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &Socket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + var attrs []syscall.NetlinkRouteAttr + if attrs, err = nl.ParseRouteAttr(msg[sizeofSocket:]); err != nil { + return false + } + + var res *InetDiagTCPInfoResp + if res, err = attrsToInetDiagTCPInfoResp(attrs, sockInfo); err != nil { + return false + } + + result = append(result, res) + return true + }) + + if err != nil { + return nil, err + } + return result, nil +} + +// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info. +func SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) { + return pkgHandle.SocketDiagTCPInfo(family) +} + +// SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket. +func (h *Handle) SocketDiagTCP(family uint8) ([]*Socket, error) { + // Construct the request + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_TCP, + Ext: (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)), + States: uint32(0xfff), // all states + }) + + // Do the query and parse the result + var result []*Socket + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &Socket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + result = append(result, sockInfo) + return true + }) if err != nil { return nil, err } - if from.Pid != nl.PidKernel { - return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + return result, nil +} + +// SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket. +func SocketDiagTCP(family uint8) ([]*Socket, error) { + return pkgHandle.SocketDiagTCP(family) +} + +// SocketDiagUDPInfo requests INET_DIAG_INFO for UDP protocol for specified family type and return with extension info. +func (h *Handle) SocketDiagUDPInfo(family uint8) ([]*InetDiagUDPInfoResp, error) { + // Construct the request + var extensions uint8 + extensions = 1 << (INET_DIAG_VEGASINFO - 1) + extensions |= 1 << (INET_DIAG_INFO - 1) + extensions |= 1 << (INET_DIAG_MEMINFO - 1) + + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_UDP, + Ext: extensions, + States: uint32(0xfff), // all states + }) + + // Do the query and parse the result + var result []*InetDiagUDPInfoResp + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &Socket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + + var attrs []syscall.NetlinkRouteAttr + if attrs, err = nl.ParseRouteAttr(msg[sizeofSocket:]); err != nil { + return false + } + + var res *InetDiagUDPInfoResp + if res, err = attrsToInetDiagUDPInfoResp(attrs, sockInfo); err != nil { + return false + } + + result = append(result, res) + return true + }) + if err != nil { + return nil, err } - if len(msgs) == 0 { - return nil, errors.New("no message nor error from netlink") + return result, nil +} + +// SocketDiagUDPInfo requests INET_DIAG_INFO for UDP protocol for specified family type and return with extension info. +func SocketDiagUDPInfo(family uint8) ([]*InetDiagUDPInfoResp, error) { + return pkgHandle.SocketDiagUDPInfo(family) +} + +// SocketDiagUDP requests INET_DIAG_INFO for UDP protocol for specified family type and return related socket. +func (h *Handle) SocketDiagUDP(family uint8) ([]*Socket, error) { + // Construct the request + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_UDP, + Ext: (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)), + States: uint32(0xfff), // all states + }) + + // Do the query and parse the result + var result []*Socket + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &Socket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + result = append(result, sockInfo) + return true + }) + if err != nil { + return nil, err } - if len(msgs) > 2 { - return nil, fmt.Errorf("multiple (%d) matching sockets", len(msgs)) + return result, nil +} + +// SocketDiagUDP requests INET_DIAG_INFO for UDP protocol for specified family type and return related socket. +func SocketDiagUDP(family uint8) ([]*Socket, error) { + return pkgHandle.SocketDiagUDP(family) +} + +// UnixSocketDiagInfo requests UNIX_DIAG_INFO for unix sockets and return with extension info. +func (h *Handle) UnixSocketDiagInfo() ([]*UnixDiagInfoResp, error) { + // Construct the request + var extensions uint8 + extensions = 1 << UNIX_DIAG_NAME + extensions |= 1 << UNIX_DIAG_PEER + extensions |= 1 << UNIX_DIAG_RQLEN + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&unixSocketRequest{ + Family: unix.AF_UNIX, + States: ^uint32(0), // all states + Show: uint32(extensions), + }) + + var result []*UnixDiagInfoResp + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &UnixSocket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + + // Diagnosis also delivers sockets with AF_INET family, filter those + if sockInfo.Family != unix.AF_UNIX { + return false + } + + var attrs []syscall.NetlinkRouteAttr + if attrs, err = nl.ParseRouteAttr(msg[sizeofSocket:]); err != nil { + return false + } + + var res *UnixDiagInfoResp + if res, err = attrsToUnixDiagInfoResp(attrs, sockInfo); err != nil { + return false + } + result = append(result, res) + return true + }) + if err != nil { + return nil, err } - sock := &Socket{} - if err := sock.deserialize(msgs[0].Data); err != nil { + return result, nil +} + +// UnixSocketDiagInfo requests UNIX_DIAG_INFO for unix sockets and return with extension info. +func UnixSocketDiagInfo() ([]*UnixDiagInfoResp, error) { + return pkgHandle.UnixSocketDiagInfo() +} + +// UnixSocketDiag requests UNIX_DIAG_INFO for unix sockets. +func (h *Handle) UnixSocketDiag() ([]*UnixSocket, error) { + // Construct the request + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&unixSocketRequest{ + Family: unix.AF_UNIX, + States: ^uint32(0), // all states + }) + + var result []*UnixSocket + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &UnixSocket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + + // Diagnosis also delivers sockets with AF_INET family, filter those + if sockInfo.Family == unix.AF_UNIX { + result = append(result, sockInfo) + } + return true + }) + if err != nil { return nil, err } - return sock, nil + return result, nil +} + +// UnixSocketDiag requests UNIX_DIAG_INFO for unix sockets. +func UnixSocketDiag() ([]*UnixSocket, error) { + return pkgHandle.UnixSocketDiag() +} + +func attrsToInetDiagTCPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagTCPInfoResp, error) { + info := &InetDiagTCPInfoResp{ + InetDiagMsg: sockInfo, + } + for _, a := range attrs { + switch a.Attr.Type { + case INET_DIAG_INFO: + info.TCPInfo = &TCPInfo{} + if err := info.TCPInfo.deserialize(a.Value); err != nil { + return nil, err + } + case INET_DIAG_BBRINFO: + info.TCPBBRInfo = &TCPBBRInfo{} + if err := info.TCPBBRInfo.deserialize(a.Value); err != nil { + return nil, err + } + } + } + + return info, nil +} + +func attrsToInetDiagUDPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagUDPInfoResp, error) { + info := &InetDiagUDPInfoResp{ + InetDiagMsg: sockInfo, + } + for _, a := range attrs { + switch a.Attr.Type { + case INET_DIAG_MEMINFO: + info.Memory = &MemInfo{} + if err := info.Memory.deserialize(a.Value); err != nil { + return nil, err + } + } + } + + return info, nil +} + +func attrsToUnixDiagInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *UnixSocket) (*UnixDiagInfoResp, error) { + info := &UnixDiagInfoResp{ + DiagMsg: sockInfo, + } + for _, a := range attrs { + switch a.Attr.Type { + case UNIX_DIAG_NAME: + name := string(a.Value[:a.Attr.Len]) + info.Name = &name + case UNIX_DIAG_PEER: + peer := native.Uint32(a.Value) + info.Peer = &peer + case UNIX_DIAG_RQLEN: + info.Queue = &QueueInfo{ + RQueue: native.Uint32(a.Value[:4]), + WQueue: native.Uint32(a.Value[4:]), + } + // default: + // fmt.Println("unknown unix attribute type", a.Attr.Type, "with data", a.Value) + } + } + + return info, nil } diff --git a/vendor/github.com/vishvananda/netlink/socket_xdp_linux.go b/vendor/github.com/vishvananda/netlink/socket_xdp_linux.go new file mode 100644 index 000000000..20c82f9c7 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/socket_xdp_linux.go @@ -0,0 +1,195 @@ +package netlink + +import ( + "errors" + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +const ( + sizeofXDPSocketRequest = 1 + 1 + 2 + 4 + 4 + 2*4 + sizeofXDPSocket = 0x10 +) + +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L12 +type xdpSocketRequest struct { + Family uint8 + Protocol uint8 + pad uint16 + Ino uint32 + Show uint32 + Cookie [2]uint32 +} + +func (r *xdpSocketRequest) Serialize() []byte { + b := writeBuffer{Bytes: make([]byte, sizeofSocketRequest)} + b.Write(r.Family) + b.Write(r.Protocol) + native.PutUint16(b.Next(2), r.pad) + native.PutUint32(b.Next(4), r.Ino) + native.PutUint32(b.Next(4), r.Show) + native.PutUint32(b.Next(4), r.Cookie[0]) + native.PutUint32(b.Next(4), r.Cookie[1]) + return b.Bytes +} + +func (r *xdpSocketRequest) Len() int { return sizeofXDPSocketRequest } + +func (s *XDPSocket) deserialize(b []byte) error { + if len(b) < sizeofXDPSocket { + return fmt.Errorf("XDP socket data short read (%d); want %d", len(b), sizeofXDPSocket) + } + rb := readBuffer{Bytes: b} + s.Family = rb.Read() + s.Type = rb.Read() + s.pad = native.Uint16(rb.Next(2)) + s.Ino = native.Uint32(rb.Next(4)) + s.Cookie[0] = native.Uint32(rb.Next(4)) + s.Cookie[1] = native.Uint32(rb.Next(4)) + return nil +} + +// XDPSocketGet returns the XDP socket identified by its inode number and/or +// socket cookie. Specify the cookie as SOCK_ANY_COOKIE if +func SocketXDPGetInfo(ino uint32, cookie uint64) (*XDPDiagInfoResp, error) { + // We have a problem here: dumping AF_XDP sockets currently does not support + // filtering. We thus need to dump all XSKs and then only filter afterwards + // :( + xsks, err := SocketDiagXDP() + if err != nil { + return nil, err + } + checkCookie := cookie != SOCK_ANY_COOKIE && cookie != 0 + crumblingCookie := [2]uint32{uint32(cookie), uint32(cookie >> 32)} + checkIno := ino != 0 + var xskinfo *XDPDiagInfoResp + for _, xsk := range xsks { + if checkIno && xsk.XDPDiagMsg.Ino != ino { + continue + } + if checkCookie && xsk.XDPDiagMsg.Cookie != crumblingCookie { + continue + } + if xskinfo != nil { + return nil, errors.New("multiple matching XDP sockets") + } + xskinfo = xsk + } + if xskinfo == nil { + return nil, errors.New("no matching XDP socket") + } + return xskinfo, nil +} + +// SocketDiagXDP requests XDP_DIAG_INFO for XDP family sockets. +func SocketDiagXDP() ([]*XDPDiagInfoResp, error) { + var result []*XDPDiagInfoResp + err := socketDiagXDPExecutor(func(m syscall.NetlinkMessage) error { + sockInfo := &XDPSocket{} + if err := sockInfo.deserialize(m.Data); err != nil { + return err + } + attrs, err := nl.ParseRouteAttr(m.Data[sizeofXDPSocket:]) + if err != nil { + return err + } + + res, err := attrsToXDPDiagInfoResp(attrs, sockInfo) + if err != nil { + return err + } + + result = append(result, res) + return nil + }) + if err != nil { + return nil, err + } + return result, nil +} + +// socketDiagXDPExecutor requests XDP_DIAG_INFO for XDP family sockets. +func socketDiagXDPExecutor(receiver func(syscall.NetlinkMessage) error) error { + s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) + if err != nil { + return err + } + defer s.Close() + + req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&xdpSocketRequest{ + Family: unix.AF_XDP, + Show: XDP_SHOW_INFO | XDP_SHOW_RING_CFG | XDP_SHOW_UMEM | XDP_SHOW_STATS, + }) + if err := s.Send(req); err != nil { + return err + } + +loop: + for { + msgs, from, err := s.Receive() + if err != nil { + return err + } + if from.Pid != nl.PidKernel { + return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + } + if len(msgs) == 0 { + return errors.New("no message nor error from netlink") + } + + for _, m := range msgs { + switch m.Header.Type { + case unix.NLMSG_DONE: + break loop + case unix.NLMSG_ERROR: + error := int32(native.Uint32(m.Data[0:4])) + return syscall.Errno(-error) + } + if err := receiver(m); err != nil { + return err + } + } + } + return nil +} + +func attrsToXDPDiagInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *XDPSocket) (*XDPDiagInfoResp, error) { + resp := &XDPDiagInfoResp{ + XDPDiagMsg: sockInfo, + XDPInfo: &XDPInfo{}, + } + for _, a := range attrs { + switch a.Attr.Type { + case XDP_DIAG_INFO: + resp.XDPInfo.Ifindex = native.Uint32(a.Value[0:4]) + resp.XDPInfo.QueueID = native.Uint32(a.Value[4:8]) + case XDP_DIAG_UID: + resp.XDPInfo.UID = native.Uint32(a.Value[0:4]) + case XDP_DIAG_RX_RING: + resp.XDPInfo.RxRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_TX_RING: + resp.XDPInfo.TxRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_UMEM_FILL_RING: + resp.XDPInfo.UmemFillRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_UMEM_COMPLETION_RING: + resp.XDPInfo.UmemCompletionRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_UMEM: + umem := &XDPDiagUmem{} + if err := umem.deserialize(a.Value); err != nil { + return nil, err + } + resp.XDPInfo.Umem = umem + case XDP_DIAG_STATS: + stats := &XDPDiagStats{} + if err := stats.deserialize(a.Value); err != nil { + return nil, err + } + resp.XDPInfo.Stats = stats + } + } + return resp, nil +} diff --git a/vendor/github.com/vishvananda/netlink/tcp.go b/vendor/github.com/vishvananda/netlink/tcp.go new file mode 100644 index 000000000..43f80a0fc --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/tcp.go @@ -0,0 +1,92 @@ +package netlink + +// TCP States +const ( + TCP_ESTABLISHED = iota + 0x01 + TCP_SYN_SENT + TCP_SYN_RECV + TCP_FIN_WAIT1 + TCP_FIN_WAIT2 + TCP_TIME_WAIT + TCP_CLOSE + TCP_CLOSE_WAIT + TCP_LAST_ACK + TCP_LISTEN + TCP_CLOSING + TCP_NEW_SYN_REC + TCP_MAX_STATES +) + +type TCPInfo struct { + State uint8 + Ca_state uint8 + Retransmits uint8 + Probes uint8 + Backoff uint8 + Options uint8 + Snd_wscale uint8 // no uint4 + Rcv_wscale uint8 + Delivery_rate_app_limited uint8 + Fastopen_client_fail uint8 + Rto uint32 + Ato uint32 + Snd_mss uint32 + Rcv_mss uint32 + Unacked uint32 + Sacked uint32 + Lost uint32 + Retrans uint32 + Fackets uint32 + Last_data_sent uint32 + Last_ack_sent uint32 + Last_data_recv uint32 + Last_ack_recv uint32 + Pmtu uint32 + Rcv_ssthresh uint32 + Rtt uint32 + Rttvar uint32 + Snd_ssthresh uint32 + Snd_cwnd uint32 + Advmss uint32 + Reordering uint32 + Rcv_rtt uint32 + Rcv_space uint32 + Total_retrans uint32 + Pacing_rate uint64 + Max_pacing_rate uint64 + Bytes_acked uint64 /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ + Bytes_received uint64 /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ + Segs_out uint32 /* RFC4898 tcpEStatsPerfSegsOut */ + Segs_in uint32 /* RFC4898 tcpEStatsPerfSegsIn */ + Notsent_bytes uint32 + Min_rtt uint32 + Data_segs_in uint32 /* RFC4898 tcpEStatsDataSegsIn */ + Data_segs_out uint32 /* RFC4898 tcpEStatsDataSegsOut */ + Delivery_rate uint64 + Busy_time uint64 /* Time (usec) busy sending data */ + Rwnd_limited uint64 /* Time (usec) limited by receive window */ + Sndbuf_limited uint64 /* Time (usec) limited by send buffer */ + Delivered uint32 + Delivered_ce uint32 + Bytes_sent uint64 /* RFC4898 tcpEStatsPerfHCDataOctetsOut */ + Bytes_retrans uint64 /* RFC4898 tcpEStatsPerfOctetsRetrans */ + Dsack_dups uint32 /* RFC4898 tcpEStatsStackDSACKDups */ + Reord_seen uint32 /* reordering events seen */ + Rcv_ooopack uint32 /* Out-of-order packets received */ + Snd_wnd uint32 /* peer's advertised receive window after * scaling (bytes) */ +} + +type TCPBBRInfo struct { + BBRBW uint64 + BBRMinRTT uint32 + BBRPacingGain uint32 + BBRCwndGain uint32 +} + +// According to https://man7.org/linux/man-pages/man7/sock_diag.7.html +type MemInfo struct { + RMem uint32 + WMem uint32 + FMem uint32 + TMem uint32 +} diff --git a/vendor/github.com/vishvananda/netlink/tcp_linux.go b/vendor/github.com/vishvananda/netlink/tcp_linux.go new file mode 100644 index 000000000..e98036da5 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/tcp_linux.go @@ -0,0 +1,368 @@ +package netlink + +import ( + "bytes" + "errors" + "io" +) + +const ( + tcpBBRInfoLen = 20 + memInfoLen = 16 +) + +func checkDeserErr(err error) error { + if err == io.EOF { + return nil + } + return err +} + +func (t *TCPInfo) deserialize(b []byte) error { + var err error + rb := bytes.NewBuffer(b) + + t.State, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Ca_state, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Retransmits, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Probes, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Backoff, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + t.Options, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + scales, err := rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + t.Snd_wscale = scales >> 4 // first 4 bits + t.Rcv_wscale = scales & 0xf // last 4 bits + + rateLimAndFastOpen, err := rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + t.Delivery_rate_app_limited = rateLimAndFastOpen >> 7 // get first bit + t.Fastopen_client_fail = rateLimAndFastOpen >> 5 & 3 // get next two bits + + next := rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rto = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Ato = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_mss = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_mss = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Unacked = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Sacked = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Lost = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Retrans = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Fackets = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_data_sent = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_ack_sent = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_data_recv = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_ack_recv = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Pmtu = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_ssthresh = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rtt = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rttvar = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_ssthresh = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_cwnd = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Advmss = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Reordering = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_rtt = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_space = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Total_retrans = native.Uint32(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Pacing_rate = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Max_pacing_rate = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_acked = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_received = native.Uint64(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Segs_out = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Segs_in = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Notsent_bytes = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Min_rtt = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Data_segs_in = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Data_segs_out = native.Uint32(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Delivery_rate = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Busy_time = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Rwnd_limited = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Sndbuf_limited = native.Uint64(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Delivered = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Delivered_ce = native.Uint32(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_sent = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_retrans = native.Uint64(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Dsack_dups = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Reord_seen = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_ooopack = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_wnd = native.Uint32(next) + return nil +} + +func (t *TCPBBRInfo) deserialize(b []byte) error { + if len(b) != tcpBBRInfoLen { + return errors.New("Invalid length") + } + + rb := bytes.NewBuffer(b) + t.BBRBW = native.Uint64(rb.Next(8)) + t.BBRMinRTT = native.Uint32(rb.Next(4)) + t.BBRPacingGain = native.Uint32(rb.Next(4)) + t.BBRCwndGain = native.Uint32(rb.Next(4)) + + return nil +} + +func (m *MemInfo) deserialize(b []byte) error { + if len(b) != memInfoLen { + return errors.New("Invalid length") + } + + rb := bytes.NewBuffer(b) + m.RMem = native.Uint32(rb.Next(4)) + m.WMem = native.Uint32(rb.Next(4)) + m.FMem = native.Uint32(rb.Next(4)) + m.TMem = native.Uint32(rb.Next(4)) + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/unix_diag.go b/vendor/github.com/vishvananda/netlink/unix_diag.go new file mode 100644 index 000000000..d81776f36 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/unix_diag.go @@ -0,0 +1,27 @@ +package netlink + +// According to linux/include/uapi/linux/unix_diag.h +const ( + UNIX_DIAG_NAME = iota + UNIX_DIAG_VFS + UNIX_DIAG_PEER + UNIX_DIAG_ICONS + UNIX_DIAG_RQLEN + UNIX_DIAG_MEMINFO + UNIX_DIAG_SHUTDOWN + UNIX_DIAG_UID + UNIX_DIAG_MAX +) + +type UnixDiagInfoResp struct { + DiagMsg *UnixSocket + Name *string + Peer *uint32 + Queue *QueueInfo + Shutdown *uint8 +} + +type QueueInfo struct { + RQueue uint32 + WQueue uint32 +} diff --git a/vendor/github.com/vishvananda/netlink/vdpa_linux.go b/vendor/github.com/vishvananda/netlink/vdpa_linux.go new file mode 100644 index 000000000..7c15986d0 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/vdpa_linux.go @@ -0,0 +1,463 @@ +package netlink + +import ( + "fmt" + "net" + "syscall" + + "golang.org/x/sys/unix" + + "github.com/vishvananda/netlink/nl" +) + +type vdpaDevID struct { + Name string + ID uint32 +} + +// VDPADev contains info about VDPA device +type VDPADev struct { + vdpaDevID + VendorID uint32 + MaxVQS uint32 + MaxVQSize uint16 + MinVQSize uint16 +} + +// VDPADevConfig contains configuration of the VDPA device +type VDPADevConfig struct { + vdpaDevID + Features uint64 + NegotiatedFeatures uint64 + Net VDPADevConfigNet +} + +// VDPADevVStats conatins vStats for the VDPA device +type VDPADevVStats struct { + vdpaDevID + QueueIndex uint32 + Vendor []VDPADevVStatsVendor + NegotiatedFeatures uint64 +} + +// VDPADevVStatsVendor conatins name and value for vendor specific vstat option +type VDPADevVStatsVendor struct { + Name string + Value uint64 +} + +// VDPADevConfigNet conatins status and net config for the VDPA device +type VDPADevConfigNet struct { + Status VDPADevConfigNetStatus + Cfg VDPADevConfigNetCfg +} + +// VDPADevConfigNetStatus contains info about net status +type VDPADevConfigNetStatus struct { + LinkUp bool + Announce bool +} + +// VDPADevConfigNetCfg contains net config for the VDPA device +type VDPADevConfigNetCfg struct { + MACAddr net.HardwareAddr + MaxVQP uint16 + MTU uint16 +} + +// VDPAMGMTDev conatins info about VDPA management device +type VDPAMGMTDev struct { + BusName string + DevName string + SupportedClasses uint64 + SupportedFeatures uint64 + MaxVQS uint32 +} + +// VDPANewDevParams contains parameters for new VDPA device +// use SetBits to configure requried features for the device +// example: +// +// VDPANewDevParams{Features: SetBits(0, VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_MAC_ADDR)} +type VDPANewDevParams struct { + MACAddr net.HardwareAddr + MaxVQP uint16 + MTU uint16 + Features uint64 +} + +// SetBits set provided bits in the uint64 input value +// usage example: +// features := SetBits(0, VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_MAC_ADDR) +func SetBits(input uint64, pos ...int) uint64 { + for _, p := range pos { + input |= 1 << uint64(p) + } + return input +} + +// IsBitSet check if specific bit is set in the uint64 input value +// usage example: +// hasNetClass := IsBitSet(mgmtDev, VIRTIO_ID_NET) +func IsBitSet(input uint64, pos int) bool { + val := input & (1 << uint64(pos)) + return val > 0 +} + +// VDPANewDev adds new VDPA device +// Equivalent to: `vdpa dev add name mgmtdev /mgmtName [params]` +func VDPANewDev(name, mgmtBus, mgmtName string, params VDPANewDevParams) error { + return pkgHandle.VDPANewDev(name, mgmtBus, mgmtName, params) +} + +// VDPADelDev removes VDPA device +// Equivalent to: `vdpa dev del ` +func VDPADelDev(name string) error { + return pkgHandle.VDPADelDev(name) +} + +// VDPAGetDevList returns list of VDPA devices +// Equivalent to: `vdpa dev show` +func VDPAGetDevList() ([]*VDPADev, error) { + return pkgHandle.VDPAGetDevList() +} + +// VDPAGetDevByName returns VDPA device selected by name +// Equivalent to: `vdpa dev show ` +func VDPAGetDevByName(name string) (*VDPADev, error) { + return pkgHandle.VDPAGetDevByName(name) +} + +// VDPAGetDevConfigList returns list of VDPA devices configurations +// Equivalent to: `vdpa dev config show` +func VDPAGetDevConfigList() ([]*VDPADevConfig, error) { + return pkgHandle.VDPAGetDevConfigList() +} + +// VDPAGetDevConfigByName returns VDPA device configuration selected by name +// Equivalent to: `vdpa dev config show ` +func VDPAGetDevConfigByName(name string) (*VDPADevConfig, error) { + return pkgHandle.VDPAGetDevConfigByName(name) +} + +// VDPAGetDevVStats returns vstats for VDPA device +// Equivalent to: `vdpa dev vstats show qidx ` +func VDPAGetDevVStats(name string, queueIndex uint32) (*VDPADevVStats, error) { + return pkgHandle.VDPAGetDevVStats(name, queueIndex) +} + +// VDPAGetMGMTDevList returns list of mgmt devices +// Equivalent to: `vdpa mgmtdev show` +func VDPAGetMGMTDevList() ([]*VDPAMGMTDev, error) { + return pkgHandle.VDPAGetMGMTDevList() +} + +// VDPAGetMGMTDevByBusAndName returns mgmt devices selected by bus and name +// Equivalent to: `vdpa mgmtdev show /` +func VDPAGetMGMTDevByBusAndName(bus, name string) (*VDPAMGMTDev, error) { + return pkgHandle.VDPAGetMGMTDevByBusAndName(bus, name) +} + +type vdpaNetlinkMessage []syscall.NetlinkRouteAttr + +func (id *vdpaDevID) parseIDAttribute(attr syscall.NetlinkRouteAttr) { + switch attr.Attr.Type { + case nl.VDPA_ATTR_DEV_NAME: + id.Name = nl.BytesToString(attr.Value) + case nl.VDPA_ATTR_DEV_ID: + id.ID = native.Uint32(attr.Value) + } +} + +func (netStatus *VDPADevConfigNetStatus) parseStatusAttribute(value []byte) { + a := native.Uint16(value) + netStatus.Announce = (a & VIRTIO_NET_S_ANNOUNCE) > 0 + netStatus.LinkUp = (a & VIRTIO_NET_S_LINK_UP) > 0 +} + +func (d *VDPADev) parseAttributes(attrs vdpaNetlinkMessage) { + for _, a := range attrs { + d.parseIDAttribute(a) + switch a.Attr.Type { + case nl.VDPA_ATTR_DEV_VENDOR_ID: + d.VendorID = native.Uint32(a.Value) + case nl.VDPA_ATTR_DEV_MAX_VQS: + d.MaxVQS = native.Uint32(a.Value) + case nl.VDPA_ATTR_DEV_MAX_VQ_SIZE: + d.MaxVQSize = native.Uint16(a.Value) + case nl.VDPA_ATTR_DEV_MIN_VQ_SIZE: + d.MinVQSize = native.Uint16(a.Value) + } + } +} + +func (c *VDPADevConfig) parseAttributes(attrs vdpaNetlinkMessage) { + for _, a := range attrs { + c.parseIDAttribute(a) + switch a.Attr.Type { + case nl.VDPA_ATTR_DEV_NET_CFG_MACADDR: + c.Net.Cfg.MACAddr = a.Value + case nl.VDPA_ATTR_DEV_NET_STATUS: + c.Net.Status.parseStatusAttribute(a.Value) + case nl.VDPA_ATTR_DEV_NET_CFG_MAX_VQP: + c.Net.Cfg.MaxVQP = native.Uint16(a.Value) + case nl.VDPA_ATTR_DEV_NET_CFG_MTU: + c.Net.Cfg.MTU = native.Uint16(a.Value) + case nl.VDPA_ATTR_DEV_FEATURES: + c.Features = native.Uint64(a.Value) + case nl.VDPA_ATTR_DEV_NEGOTIATED_FEATURES: + c.NegotiatedFeatures = native.Uint64(a.Value) + } + } +} + +func (s *VDPADevVStats) parseAttributes(attrs vdpaNetlinkMessage) { + for _, a := range attrs { + s.parseIDAttribute(a) + switch a.Attr.Type { + case nl.VDPA_ATTR_DEV_QUEUE_INDEX: + s.QueueIndex = native.Uint32(a.Value) + case nl.VDPA_ATTR_DEV_VENDOR_ATTR_NAME: + s.Vendor = append(s.Vendor, VDPADevVStatsVendor{Name: nl.BytesToString(a.Value)}) + case nl.VDPA_ATTR_DEV_VENDOR_ATTR_VALUE: + if len(s.Vendor) == 0 { + break + } + s.Vendor[len(s.Vendor)-1].Value = native.Uint64(a.Value) + case nl.VDPA_ATTR_DEV_NEGOTIATED_FEATURES: + s.NegotiatedFeatures = native.Uint64(a.Value) + } + } +} + +func (d *VDPAMGMTDev) parseAttributes(attrs vdpaNetlinkMessage) { + for _, a := range attrs { + switch a.Attr.Type { + case nl.VDPA_ATTR_MGMTDEV_BUS_NAME: + d.BusName = nl.BytesToString(a.Value) + case nl.VDPA_ATTR_MGMTDEV_DEV_NAME: + d.DevName = nl.BytesToString(a.Value) + case nl.VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES: + d.SupportedClasses = native.Uint64(a.Value) + case nl.VDPA_ATTR_DEV_SUPPORTED_FEATURES: + d.SupportedFeatures = native.Uint64(a.Value) + case nl.VDPA_ATTR_DEV_MGMTDEV_MAX_VQS: + d.MaxVQS = native.Uint32(a.Value) + } + } +} + +func (h *Handle) vdpaRequest(command uint8, extraFlags int, attrs []*nl.RtAttr) ([]vdpaNetlinkMessage, error) { + f, err := h.GenlFamilyGet(nl.VDPA_GENL_NAME) + if err != nil { + return nil, err + } + req := h.newNetlinkRequest(int(f.ID), unix.NLM_F_ACK|extraFlags) + req.AddData(&nl.Genlmsg{ + Command: command, + Version: nl.VDPA_GENL_VERSION, + }) + for _, a := range attrs { + req.AddData(a) + } + + resp, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + messages := make([]vdpaNetlinkMessage, 0, len(resp)) + for _, m := range resp { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + messages = append(messages, attrs) + } + return messages, nil +} + +// dump all devices if dev is nil +func (h *Handle) vdpaDevGet(dev *string) ([]*VDPADev, error) { + var extraFlags int + var attrs []*nl.RtAttr + if dev != nil { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(*dev))) + } else { + extraFlags = extraFlags | unix.NLM_F_DUMP + } + messages, err := h.vdpaRequest(nl.VDPA_CMD_DEV_GET, extraFlags, attrs) + if err != nil { + return nil, err + } + devs := make([]*VDPADev, 0, len(messages)) + for _, m := range messages { + d := &VDPADev{} + d.parseAttributes(m) + devs = append(devs, d) + } + return devs, nil +} + +// dump all devices if dev is nil +func (h *Handle) vdpaDevConfigGet(dev *string) ([]*VDPADevConfig, error) { + var extraFlags int + var attrs []*nl.RtAttr + if dev != nil { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(*dev))) + } else { + extraFlags = extraFlags | unix.NLM_F_DUMP + } + messages, err := h.vdpaRequest(nl.VDPA_CMD_DEV_CONFIG_GET, extraFlags, attrs) + if err != nil { + return nil, err + } + cfgs := make([]*VDPADevConfig, 0, len(messages)) + for _, m := range messages { + cfg := &VDPADevConfig{} + cfg.parseAttributes(m) + cfgs = append(cfgs, cfg) + } + return cfgs, nil +} + +// dump all devices if dev is nil +func (h *Handle) vdpaMGMTDevGet(bus, dev *string) ([]*VDPAMGMTDev, error) { + var extraFlags int + var attrs []*nl.RtAttr + if dev != nil { + attrs = append(attrs, + nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_DEV_NAME, nl.ZeroTerminated(*dev)), + ) + if bus != nil { + attrs = append(attrs, + nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_BUS_NAME, nl.ZeroTerminated(*bus)), + ) + } + } else { + extraFlags = extraFlags | unix.NLM_F_DUMP + } + messages, err := h.vdpaRequest(nl.VDPA_CMD_MGMTDEV_GET, extraFlags, attrs) + if err != nil { + return nil, err + } + cfgs := make([]*VDPAMGMTDev, 0, len(messages)) + for _, m := range messages { + cfg := &VDPAMGMTDev{} + cfg.parseAttributes(m) + cfgs = append(cfgs, cfg) + } + return cfgs, nil +} + +// VDPANewDev adds new VDPA device +// Equivalent to: `vdpa dev add name mgmtdev /mgmtName [params]` +func (h *Handle) VDPANewDev(name, mgmtBus, mgmtName string, params VDPANewDevParams) error { + attrs := []*nl.RtAttr{ + nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(name)), + nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_DEV_NAME, nl.ZeroTerminated(mgmtName)), + } + if mgmtBus != "" { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_BUS_NAME, nl.ZeroTerminated(mgmtBus))) + } + if len(params.MACAddr) != 0 { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NET_CFG_MACADDR, params.MACAddr)) + } + if params.MaxVQP > 0 { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NET_CFG_MAX_VQP, nl.Uint16Attr(params.MaxVQP))) + } + if params.MTU > 0 { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NET_CFG_MTU, nl.Uint16Attr(params.MTU))) + } + if params.Features > 0 { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_FEATURES, nl.Uint64Attr(params.Features))) + } + _, err := h.vdpaRequest(nl.VDPA_CMD_DEV_NEW, 0, attrs) + return err +} + +// VDPADelDev removes VDPA device +// Equivalent to: `vdpa dev del ` +func (h *Handle) VDPADelDev(name string) error { + _, err := h.vdpaRequest(nl.VDPA_CMD_DEV_DEL, 0, []*nl.RtAttr{ + nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(name))}) + return err +} + +// VDPAGetDevList returns list of VDPA devices +// Equivalent to: `vdpa dev show` +func (h *Handle) VDPAGetDevList() ([]*VDPADev, error) { + return h.vdpaDevGet(nil) +} + +// VDPAGetDevByName returns VDPA device selected by name +// Equivalent to: `vdpa dev show ` +func (h *Handle) VDPAGetDevByName(name string) (*VDPADev, error) { + devs, err := h.vdpaDevGet(&name) + if err != nil { + return nil, err + } + if len(devs) == 0 { + return nil, fmt.Errorf("device not found") + } + return devs[0], nil +} + +// VDPAGetDevConfigList returns list of VDPA devices configurations +// Equivalent to: `vdpa dev config show` +func (h *Handle) VDPAGetDevConfigList() ([]*VDPADevConfig, error) { + return h.vdpaDevConfigGet(nil) +} + +// VDPAGetDevConfigByName returns VDPA device configuration selected by name +// Equivalent to: `vdpa dev config show ` +func (h *Handle) VDPAGetDevConfigByName(name string) (*VDPADevConfig, error) { + cfgs, err := h.vdpaDevConfigGet(&name) + if err != nil { + return nil, err + } + if len(cfgs) == 0 { + return nil, fmt.Errorf("configuration not found") + } + return cfgs[0], nil +} + +// VDPAGetDevVStats returns vstats for VDPA device +// Equivalent to: `vdpa dev vstats show qidx ` +func (h *Handle) VDPAGetDevVStats(name string, queueIndex uint32) (*VDPADevVStats, error) { + messages, err := h.vdpaRequest(nl.VDPA_CMD_DEV_VSTATS_GET, 0, []*nl.RtAttr{ + nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(name)), + nl.NewRtAttr(nl.VDPA_ATTR_DEV_QUEUE_INDEX, nl.Uint32Attr(queueIndex)), + }) + if err != nil { + return nil, err + } + if len(messages) == 0 { + return nil, fmt.Errorf("stats not found") + } + stats := &VDPADevVStats{} + stats.parseAttributes(messages[0]) + return stats, nil +} + +// VDPAGetMGMTDevList returns list of mgmt devices +// Equivalent to: `vdpa mgmtdev show` +func (h *Handle) VDPAGetMGMTDevList() ([]*VDPAMGMTDev, error) { + return h.vdpaMGMTDevGet(nil, nil) +} + +// VDPAGetMGMTDevByBusAndName returns mgmt devices selected by bus and name +// Equivalent to: `vdpa mgmtdev show /` +func (h *Handle) VDPAGetMGMTDevByBusAndName(bus, name string) (*VDPAMGMTDev, error) { + var busPtr *string + if bus != "" { + busPtr = &bus + } + devs, err := h.vdpaMGMTDevGet(busPtr, &name) + if err != nil { + return nil, err + } + if len(devs) == 0 { + return nil, fmt.Errorf("mgmtdev not found") + } + return devs[0], nil +} diff --git a/vendor/github.com/vishvananda/netlink/virtio.go b/vendor/github.com/vishvananda/netlink/virtio.go new file mode 100644 index 000000000..78a497bbc --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/virtio.go @@ -0,0 +1,132 @@ +package netlink + +// features for virtio net +const ( + VIRTIO_NET_F_CSUM = 0 // Host handles pkts w/ partial csum + VIRTIO_NET_F_GUEST_CSUM = 1 // Guest handles pkts w/ partial csum + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS = 2 // Dynamic offload configuration. + VIRTIO_NET_F_MTU = 3 // Initial MTU advice + VIRTIO_NET_F_MAC = 5 // Host has given MAC address. + VIRTIO_NET_F_GUEST_TSO4 = 7 // Guest can handle TSOv4 in. + VIRTIO_NET_F_GUEST_TSO6 = 8 // Guest can handle TSOv6 in. + VIRTIO_NET_F_GUEST_ECN = 9 // Guest can handle TSO[6] w/ ECN in. + VIRTIO_NET_F_GUEST_UFO = 10 // Guest can handle UFO in. + VIRTIO_NET_F_HOST_TSO4 = 11 // Host can handle TSOv4 in. + VIRTIO_NET_F_HOST_TSO6 = 12 // Host can handle TSOv6 in. + VIRTIO_NET_F_HOST_ECN = 13 // Host can handle TSO[6] w/ ECN in. + VIRTIO_NET_F_HOST_UFO = 14 // Host can handle UFO in. + VIRTIO_NET_F_MRG_RXBUF = 15 // Host can merge receive buffers. + VIRTIO_NET_F_STATUS = 16 // virtio_net_config.status available + VIRTIO_NET_F_CTRL_VQ = 17 // Control channel available + VIRTIO_NET_F_CTRL_RX = 18 // Control channel RX mode support + VIRTIO_NET_F_CTRL_VLAN = 19 // Control channel VLAN filtering + VIRTIO_NET_F_CTRL_RX_EXTRA = 20 // Extra RX mode control support + VIRTIO_NET_F_GUEST_ANNOUNCE = 21 // Guest can announce device on the* network + VIRTIO_NET_F_MQ = 22 // Device supports Receive Flow Steering + VIRTIO_NET_F_CTRL_MAC_ADDR = 23 // Set MAC address + VIRTIO_NET_F_VQ_NOTF_COAL = 52 // Device supports virtqueue notification coalescing + VIRTIO_NET_F_NOTF_COAL = 53 // Device supports notifications coalescing + VIRTIO_NET_F_GUEST_USO4 = 54 // Guest can handle USOv4 in. + VIRTIO_NET_F_GUEST_USO6 = 55 // Guest can handle USOv6 in. + VIRTIO_NET_F_HOST_USO = 56 // Host can handle USO in. + VIRTIO_NET_F_HASH_REPORT = 57 // Supports hash report + VIRTIO_NET_F_GUEST_HDRLEN = 59 // Guest provides the exact hdr_len value. + VIRTIO_NET_F_RSS = 60 // Supports RSS RX steering + VIRTIO_NET_F_RSC_EXT = 61 // extended coalescing info + VIRTIO_NET_F_STANDBY = 62 // Act as standby for another device with the same MAC. + VIRTIO_NET_F_SPEED_DUPLEX = 63 // Device set linkspeed and duplex + VIRTIO_NET_F_GSO = 6 // Host handles pkts any GSO type +) + +// virtio net status +const ( + VIRTIO_NET_S_LINK_UP = 1 // Link is up + VIRTIO_NET_S_ANNOUNCE = 2 // Announcement is needed +) + +// virtio config +const ( + // Do we get callbacks when the ring is completely used, even if we've + // suppressed them? + VIRTIO_F_NOTIFY_ON_EMPTY = 24 + // Can the device handle any descriptor layout? + VIRTIO_F_ANY_LAYOUT = 27 + // v1.0 compliant + VIRTIO_F_VERSION_1 = 32 + // If clear - device has the platform DMA (e.g. IOMMU) bypass quirk feature. + // If set - use platform DMA tools to access the memory. + // Note the reverse polarity (compared to most other features), + // this is for compatibility with legacy systems. + VIRTIO_F_ACCESS_PLATFORM = 33 + // Legacy name for VIRTIO_F_ACCESS_PLATFORM (for compatibility with old userspace) + VIRTIO_F_IOMMU_PLATFORM = VIRTIO_F_ACCESS_PLATFORM + // This feature indicates support for the packed virtqueue layout. + VIRTIO_F_RING_PACKED = 34 + // Inorder feature indicates that all buffers are used by the device + // in the same order in which they have been made available. + VIRTIO_F_IN_ORDER = 35 + // This feature indicates that memory accesses by the driver and the + // device are ordered in a way described by the platform. + VIRTIO_F_ORDER_PLATFORM = 36 + // Does the device support Single Root I/O Virtualization? + VIRTIO_F_SR_IOV = 37 + // This feature indicates that the driver passes extra data (besides + // identifying the virtqueue) in its device notifications. + VIRTIO_F_NOTIFICATION_DATA = 38 + // This feature indicates that the driver uses the data provided by the device + // as a virtqueue identifier in available buffer notifications. + VIRTIO_F_NOTIF_CONFIG_DATA = 39 + // This feature indicates that the driver can reset a queue individually. + VIRTIO_F_RING_RESET = 40 +) + +// virtio device ids +const ( + VIRTIO_ID_NET = 1 // virtio net + VIRTIO_ID_BLOCK = 2 // virtio block + VIRTIO_ID_CONSOLE = 3 // virtio console + VIRTIO_ID_RNG = 4 // virtio rng + VIRTIO_ID_BALLOON = 5 // virtio balloon + VIRTIO_ID_IOMEM = 6 // virtio ioMemory + VIRTIO_ID_RPMSG = 7 // virtio remote processor messaging + VIRTIO_ID_SCSI = 8 // virtio scsi + VIRTIO_ID_9P = 9 // 9p virtio console + VIRTIO_ID_MAC80211_WLAN = 10 // virtio WLAN MAC + VIRTIO_ID_RPROC_SERIAL = 11 // virtio remoteproc serial link + VIRTIO_ID_CAIF = 12 // Virtio caif + VIRTIO_ID_MEMORY_BALLOON = 13 // virtio memory balloon + VIRTIO_ID_GPU = 16 // virtio GPU + VIRTIO_ID_CLOCK = 17 // virtio clock/timer + VIRTIO_ID_INPUT = 18 // virtio input + VIRTIO_ID_VSOCK = 19 // virtio vsock transport + VIRTIO_ID_CRYPTO = 20 // virtio crypto + VIRTIO_ID_SIGNAL_DIST = 21 // virtio signal distribution device + VIRTIO_ID_PSTORE = 22 // virtio pstore device + VIRTIO_ID_IOMMU = 23 // virtio IOMMU + VIRTIO_ID_MEM = 24 // virtio mem + VIRTIO_ID_SOUND = 25 // virtio sound + VIRTIO_ID_FS = 26 // virtio filesystem + VIRTIO_ID_PMEM = 27 // virtio pmem + VIRTIO_ID_RPMB = 28 // virtio rpmb + VIRTIO_ID_MAC80211_HWSIM = 29 // virtio mac80211-hwsim + VIRTIO_ID_VIDEO_ENCODER = 30 // virtio video encoder + VIRTIO_ID_VIDEO_DECODER = 31 // virtio video decoder + VIRTIO_ID_SCMI = 32 // virtio SCMI + VIRTIO_ID_NITRO_SEC_MOD = 33 // virtio nitro secure module + VIRTIO_ID_I2C_ADAPTER = 34 // virtio i2c adapter + VIRTIO_ID_WATCHDOG = 35 // virtio watchdog + VIRTIO_ID_CAN = 36 // virtio can + VIRTIO_ID_DMABUF = 37 // virtio dmabuf + VIRTIO_ID_PARAM_SERV = 38 // virtio parameter server + VIRTIO_ID_AUDIO_POLICY = 39 // virtio audio policy + VIRTIO_ID_BT = 40 // virtio bluetooth + VIRTIO_ID_GPIO = 41 // virtio gpio + // Virtio Transitional IDs + VIRTIO_TRANS_ID_NET = 0x1000 // transitional virtio net + VIRTIO_TRANS_ID_BLOCK = 0x1001 // transitional virtio block + VIRTIO_TRANS_ID_BALLOON = 0x1002 // transitional virtio balloon + VIRTIO_TRANS_ID_CONSOLE = 0x1003 // transitional virtio console + VIRTIO_TRANS_ID_SCSI = 0x1004 // transitional virtio SCSI + VIRTIO_TRANS_ID_RNG = 0x1005 // transitional virtio rng + VIRTIO_TRANS_ID_9P = 0x1009 // transitional virtio 9p console +) diff --git a/vendor/github.com/vishvananda/netlink/xdp_diag.go b/vendor/github.com/vishvananda/netlink/xdp_diag.go new file mode 100644 index 000000000..e88825bf5 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xdp_diag.go @@ -0,0 +1,34 @@ +package netlink + +import "github.com/vishvananda/netlink/nl" + +const SOCK_ANY_COOKIE = uint64(nl.TCPDIAG_NOCOOKIE)<<32 + uint64(nl.TCPDIAG_NOCOOKIE) + +// XDP diagnosis show flag constants to request particular information elements. +const ( + XDP_SHOW_INFO = 1 << iota + XDP_SHOW_RING_CFG + XDP_SHOW_UMEM + XDP_SHOW_MEMINFO + XDP_SHOW_STATS +) + +// XDP diag element constants +const ( + XDP_DIAG_NONE = iota + XDP_DIAG_INFO // when using XDP_SHOW_INFO + XDP_DIAG_UID // when using XDP_SHOW_INFO + XDP_DIAG_RX_RING // when using XDP_SHOW_RING_CFG + XDP_DIAG_TX_RING // when using XDP_SHOW_RING_CFG + XDP_DIAG_UMEM // when using XDP_SHOW_UMEM + XDP_DIAG_UMEM_FILL_RING // when using XDP_SHOW_UMEM + XDP_DIAG_UMEM_COMPLETION_RING // when using XDP_SHOW_UMEM + XDP_DIAG_MEMINFO // when using XDP_SHOW_MEMINFO + XDP_DIAG_STATS // when using XDP_SHOW_STATS +) + +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L21 +type XDPDiagInfoResp struct { + XDPDiagMsg *XDPSocket + XDPInfo *XDPInfo +} diff --git a/vendor/github.com/vishvananda/netlink/xdp_linux.go b/vendor/github.com/vishvananda/netlink/xdp_linux.go new file mode 100644 index 000000000..896a406de --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xdp_linux.go @@ -0,0 +1,46 @@ +package netlink + +import ( + "bytes" + "fmt" +) + +const ( + xdrDiagUmemLen = 8 + 8*4 + xdrDiagStatsLen = 6 * 8 +) + +func (x *XDPDiagUmem) deserialize(b []byte) error { + if len(b) < xdrDiagUmemLen { + return fmt.Errorf("XDP umem diagnosis data short read (%d); want %d", len(b), xdrDiagUmemLen) + } + + rb := bytes.NewBuffer(b) + x.Size = native.Uint64(rb.Next(8)) + x.ID = native.Uint32(rb.Next(4)) + x.NumPages = native.Uint32(rb.Next(4)) + x.ChunkSize = native.Uint32(rb.Next(4)) + x.Headroom = native.Uint32(rb.Next(4)) + x.Ifindex = native.Uint32(rb.Next(4)) + x.QueueID = native.Uint32(rb.Next(4)) + x.Flags = native.Uint32(rb.Next(4)) + x.Refs = native.Uint32(rb.Next(4)) + + return nil +} + +func (x *XDPDiagStats) deserialize(b []byte) error { + if len(b) < xdrDiagStatsLen { + return fmt.Errorf("XDP diagnosis statistics short read (%d); want %d", len(b), xdrDiagStatsLen) + } + + rb := bytes.NewBuffer(b) + x.RxDropped = native.Uint64(rb.Next(8)) + x.RxInvalid = native.Uint64(rb.Next(8)) + x.RxFull = native.Uint64(rb.Next(8)) + x.FillRingEmpty = native.Uint64(rb.Next(8)) + x.TxInvalid = native.Uint64(rb.Next(8)) + x.TxRingEmpty = native.Uint64(rb.Next(8)) + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm.go b/vendor/github.com/vishvananda/netlink/xfrm_linux.go similarity index 95% rename from vendor/github.com/vishvananda/netlink/xfrm.go rename to vendor/github.com/vishvananda/netlink/xfrm_linux.go index 02b41842e..dd38ed8e0 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_linux.go @@ -14,7 +14,7 @@ const ( XFRM_PROTO_ESP Proto = unix.IPPROTO_ESP XFRM_PROTO_AH Proto = unix.IPPROTO_AH XFRM_PROTO_HAO Proto = unix.IPPROTO_DSTOPTS - XFRM_PROTO_COMP Proto = 0x6c // NOTE not defined on darwin + XFRM_PROTO_COMP Proto = unix.IPPROTO_COMP XFRM_PROTO_IPSEC_ANY Proto = unix.IPPROTO_RAW ) diff --git a/vendor/github.com/vishvananda/netlink/xfrm_policy.go b/vendor/github.com/vishvananda/netlink/xfrm_policy.go deleted file mode 100644 index 6219d2772..000000000 --- a/vendor/github.com/vishvananda/netlink/xfrm_policy.go +++ /dev/null @@ -1,96 +0,0 @@ -package netlink - -import ( - "fmt" - "net" -) - -// Dir is an enum representing an ipsec template direction. -type Dir uint8 - -const ( - XFRM_DIR_IN Dir = iota - XFRM_DIR_OUT - XFRM_DIR_FWD - XFRM_SOCKET_IN - XFRM_SOCKET_OUT - XFRM_SOCKET_FWD -) - -func (d Dir) String() string { - switch d { - case XFRM_DIR_IN: - return "dir in" - case XFRM_DIR_OUT: - return "dir out" - case XFRM_DIR_FWD: - return "dir fwd" - case XFRM_SOCKET_IN: - return "socket in" - case XFRM_SOCKET_OUT: - return "socket out" - case XFRM_SOCKET_FWD: - return "socket fwd" - } - return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN) -} - -// PolicyAction is an enum representing an ipsec policy action. -type PolicyAction uint8 - -const ( - XFRM_POLICY_ALLOW PolicyAction = 0 - XFRM_POLICY_BLOCK PolicyAction = 1 -) - -func (a PolicyAction) String() string { - switch a { - case XFRM_POLICY_ALLOW: - return "allow" - case XFRM_POLICY_BLOCK: - return "block" - default: - return fmt.Sprintf("action %d", a) - } -} - -// XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec -// policy. These rules are matched with XfrmState to determine encryption -// and authentication algorithms. -type XfrmPolicyTmpl struct { - Dst net.IP - Src net.IP - Proto Proto - Mode Mode - Spi int - Reqid int -} - -func (t XfrmPolicyTmpl) String() string { - return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, Mode: %s, Spi: 0x%x, Reqid: 0x%x}", - t.Dst, t.Src, t.Proto, t.Mode, t.Spi, t.Reqid) -} - -// XfrmPolicy represents an ipsec policy. It represents the overlay network -// and has a list of XfrmPolicyTmpls representing the base addresses of -// the policy. -type XfrmPolicy struct { - Dst *net.IPNet - Src *net.IPNet - Proto Proto - DstPort int - SrcPort int - Dir Dir - Priority int - Index int - Action PolicyAction - Ifindex int - Ifid int - Mark *XfrmMark - Tmpls []XfrmPolicyTmpl -} - -func (p XfrmPolicy) String() string { - return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Action: %s, Ifindex: %d, Ifid: %d, Mark: %s, Tmpls: %s}", - p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Action, p.Ifindex, p.Ifid, p.Mark, p.Tmpls) -} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go index a4e132ef5..d526739ce 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go @@ -1,10 +1,104 @@ package netlink import ( + "fmt" + "net" + "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" ) +// Dir is an enum representing an ipsec template direction. +type Dir uint8 + +const ( + XFRM_DIR_IN Dir = iota + XFRM_DIR_OUT + XFRM_DIR_FWD + XFRM_SOCKET_IN + XFRM_SOCKET_OUT + XFRM_SOCKET_FWD +) + +func (d Dir) String() string { + switch d { + case XFRM_DIR_IN: + return "dir in" + case XFRM_DIR_OUT: + return "dir out" + case XFRM_DIR_FWD: + return "dir fwd" + case XFRM_SOCKET_IN: + return "socket in" + case XFRM_SOCKET_OUT: + return "socket out" + case XFRM_SOCKET_FWD: + return "socket fwd" + } + return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN) +} + +// PolicyAction is an enum representing an ipsec policy action. +type PolicyAction uint8 + +const ( + XFRM_POLICY_ALLOW PolicyAction = 0 + XFRM_POLICY_BLOCK PolicyAction = 1 +) + +func (a PolicyAction) String() string { + switch a { + case XFRM_POLICY_ALLOW: + return "allow" + case XFRM_POLICY_BLOCK: + return "block" + default: + return fmt.Sprintf("action %d", a) + } +} + +// XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec +// policy. These rules are matched with XfrmState to determine encryption +// and authentication algorithms. +type XfrmPolicyTmpl struct { + Dst net.IP + Src net.IP + Proto Proto + Mode Mode + Spi int + Reqid int + Optional int +} + +func (t XfrmPolicyTmpl) String() string { + return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, Mode: %s, Spi: 0x%x, Reqid: 0x%x}", + t.Dst, t.Src, t.Proto, t.Mode, t.Spi, t.Reqid) +} + +// XfrmPolicy represents an ipsec policy. It represents the overlay network +// and has a list of XfrmPolicyTmpls representing the base addresses of +// the policy. +type XfrmPolicy struct { + Dst *net.IPNet + Src *net.IPNet + Proto Proto + DstPort int + SrcPort int + Dir Dir + Priority int + Index int + Action PolicyAction + Ifindex int + Ifid int + Mark *XfrmMark + Tmpls []XfrmPolicyTmpl +} + +func (p XfrmPolicy) String() string { + return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Action: %s, Ifindex: %d, Ifid: %d, Mark: %s, Tmpls: %s}", + p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Action, p.Ifindex, p.Ifid, p.Mark, p.Tmpls) +} + func selFromPolicy(sel *nl.XfrmSelector, policy *XfrmPolicy) { sel.Family = uint16(nl.FAMILY_V4) if policy.Dst != nil { @@ -75,10 +169,12 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error { userTmpl := nl.DeserializeXfrmUserTmpl(tmplData[start : start+nl.SizeofXfrmUserTmpl]) userTmpl.XfrmId.Daddr.FromIP(tmpl.Dst) userTmpl.Saddr.FromIP(tmpl.Src) + userTmpl.Family = uint16(nl.GetIPFamily(tmpl.Dst)) userTmpl.XfrmId.Proto = uint8(tmpl.Proto) userTmpl.XfrmId.Spi = nl.Swap32(uint32(tmpl.Spi)) userTmpl.Mode = uint8(tmpl.Mode) userTmpl.Reqid = uint32(tmpl.Reqid) + userTmpl.Optional = uint8(tmpl.Optional) userTmpl.Aalgos = ^uint32(0) userTmpl.Ealgos = ^uint32(0) userTmpl.Calgos = ^uint32(0) @@ -92,8 +188,10 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error { req.AddData(out) } - ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) - req.AddData(ifId) + if policy.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) + req.AddData(ifId) + } _, err := req.Execute(unix.NETLINK_XFRM, 0) return err @@ -188,8 +286,10 @@ func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPo req.AddData(out) } - ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) - req.AddData(ifId) + if policy.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) + req.AddData(ifId) + } resType := nl.XFRM_MSG_NEWPOLICY if nlProto == nl.XFRM_MSG_DELPOLICY { @@ -218,8 +318,8 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) { var policy XfrmPolicy - policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD) - policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS) + policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD, uint16(family)) + policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS, uint16(family)) policy.Proto = Proto(msg.Sel.Proto) policy.DstPort = int(nl.Swap16(msg.Sel.Dport)) policy.SrcPort = int(nl.Swap16(msg.Sel.Sport)) @@ -247,6 +347,7 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) { resTmpl.Mode = Mode(tmpl.Mode) resTmpl.Spi = int(nl.Swap32(tmpl.XfrmId.Spi)) resTmpl.Reqid = int(tmpl.Reqid) + resTmpl.Optional = int(tmpl.Optional) policy.Tmpls = append(policy.Tmpls, resTmpl) } case nl.XFRMA_MARK: diff --git a/vendor/github.com/vishvananda/netlink/xfrm_state.go b/vendor/github.com/vishvananda/netlink/xfrm_state.go deleted file mode 100644 index 483d8934a..000000000 --- a/vendor/github.com/vishvananda/netlink/xfrm_state.go +++ /dev/null @@ -1,131 +0,0 @@ -package netlink - -import ( - "fmt" - "net" - "time" -) - -// XfrmStateAlgo represents the algorithm to use for the ipsec encryption. -type XfrmStateAlgo struct { - Name string - Key []byte - TruncateLen int // Auth only - ICVLen int // AEAD only -} - -func (a XfrmStateAlgo) String() string { - base := fmt.Sprintf("{Name: %s, Key: 0x%x", a.Name, a.Key) - if a.TruncateLen != 0 { - base = fmt.Sprintf("%s, Truncate length: %d", base, a.TruncateLen) - } - if a.ICVLen != 0 { - base = fmt.Sprintf("%s, ICV length: %d", base, a.ICVLen) - } - return fmt.Sprintf("%s}", base) -} - -// EncapType is an enum representing the optional packet encapsulation. -type EncapType uint8 - -const ( - XFRM_ENCAP_ESPINUDP_NONIKE EncapType = iota + 1 - XFRM_ENCAP_ESPINUDP -) - -func (e EncapType) String() string { - switch e { - case XFRM_ENCAP_ESPINUDP_NONIKE: - return "espinudp-non-ike" - case XFRM_ENCAP_ESPINUDP: - return "espinudp" - } - return "unknown" -} - -// XfrmStateEncap represents the encapsulation to use for the ipsec encryption. -type XfrmStateEncap struct { - Type EncapType - SrcPort int - DstPort int - OriginalAddress net.IP -} - -func (e XfrmStateEncap) String() string { - return fmt.Sprintf("{Type: %s, Srcport: %d, DstPort: %d, OriginalAddress: %v}", - e.Type, e.SrcPort, e.DstPort, e.OriginalAddress) -} - -// XfrmStateLimits represents the configured limits for the state. -type XfrmStateLimits struct { - ByteSoft uint64 - ByteHard uint64 - PacketSoft uint64 - PacketHard uint64 - TimeSoft uint64 - TimeHard uint64 - TimeUseSoft uint64 - TimeUseHard uint64 -} - -// XfrmStateStats represents the current number of bytes/packets -// processed by this State, the State's installation and first use -// time and the replay window counters. -type XfrmStateStats struct { - ReplayWindow uint32 - Replay uint32 - Failed uint32 - Bytes uint64 - Packets uint64 - AddTime uint64 - UseTime uint64 -} - -// XfrmState represents the state of an ipsec policy. It optionally -// contains an XfrmStateAlgo for encryption and one for authentication. -type XfrmState struct { - Dst net.IP - Src net.IP - Proto Proto - Mode Mode - Spi int - Reqid int - ReplayWindow int - Limits XfrmStateLimits - Statistics XfrmStateStats - Mark *XfrmMark - OutputMark int - Ifid int - Auth *XfrmStateAlgo - Crypt *XfrmStateAlgo - Aead *XfrmStateAlgo - Encap *XfrmStateEncap - ESN bool -} - -func (sa XfrmState) String() string { - return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %d, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t", - sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN) -} -func (sa XfrmState) Print(stats bool) string { - if !stats { - return sa.String() - } - at := time.Unix(int64(sa.Statistics.AddTime), 0).Format(time.UnixDate) - ut := "-" - if sa.Statistics.UseTime > 0 { - ut = time.Unix(int64(sa.Statistics.UseTime), 0).Format(time.UnixDate) - } - return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d, Bytes: %d, Packets: %d, "+ - "AddTime: %s, UseTime: %s, ReplayWindow: %d, Replay: %d, Failed: %d", - sa.String(), printLimit(sa.Limits.ByteSoft), printLimit(sa.Limits.ByteHard), printLimit(sa.Limits.PacketSoft), printLimit(sa.Limits.PacketHard), - sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard, sa.Statistics.Bytes, sa.Statistics.Packets, at, ut, - sa.Statistics.ReplayWindow, sa.Statistics.Replay, sa.Statistics.Failed) -} - -func printLimit(lmt uint64) string { - if lmt == ^uint64(0) { - return "(INF)" - } - return fmt.Sprintf("%d", lmt) -} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go index 66c99423c..554f2498c 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go @@ -2,12 +2,154 @@ package netlink import ( "fmt" + "net" + "time" "unsafe" "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" ) +// XfrmStateAlgo represents the algorithm to use for the ipsec encryption. +type XfrmStateAlgo struct { + Name string + Key []byte + TruncateLen int // Auth only + ICVLen int // AEAD only +} + +func (a XfrmStateAlgo) String() string { + base := fmt.Sprintf("{Name: %s, Key: 0x%x", a.Name, a.Key) + if a.TruncateLen != 0 { + base = fmt.Sprintf("%s, Truncate length: %d", base, a.TruncateLen) + } + if a.ICVLen != 0 { + base = fmt.Sprintf("%s, ICV length: %d", base, a.ICVLen) + } + return fmt.Sprintf("%s}", base) +} + +// EncapType is an enum representing the optional packet encapsulation. +type EncapType uint8 + +const ( + XFRM_ENCAP_ESPINUDP_NONIKE EncapType = iota + 1 + XFRM_ENCAP_ESPINUDP +) + +func (e EncapType) String() string { + switch e { + case XFRM_ENCAP_ESPINUDP_NONIKE: + return "espinudp-non-ike" + case XFRM_ENCAP_ESPINUDP: + return "espinudp" + } + return "unknown" +} + +// XfrmStateEncap represents the encapsulation to use for the ipsec encryption. +type XfrmStateEncap struct { + Type EncapType + SrcPort int + DstPort int + OriginalAddress net.IP +} + +func (e XfrmStateEncap) String() string { + return fmt.Sprintf("{Type: %s, Srcport: %d, DstPort: %d, OriginalAddress: %v}", + e.Type, e.SrcPort, e.DstPort, e.OriginalAddress) +} + +// XfrmStateLimits represents the configured limits for the state. +type XfrmStateLimits struct { + ByteSoft uint64 + ByteHard uint64 + PacketSoft uint64 + PacketHard uint64 + TimeSoft uint64 + TimeHard uint64 + TimeUseSoft uint64 + TimeUseHard uint64 +} + +// XfrmStateStats represents the current number of bytes/packets +// processed by this State, the State's installation and first use +// time and the replay window counters. +type XfrmStateStats struct { + ReplayWindow uint32 + Replay uint32 + Failed uint32 + Bytes uint64 + Packets uint64 + AddTime uint64 + UseTime uint64 +} + +// XfrmReplayState represents the sequence number states for +// "legacy" anti-replay mode. +type XfrmReplayState struct { + OSeq uint32 + Seq uint32 + BitMap uint32 +} + +func (r XfrmReplayState) String() string { + return fmt.Sprintf("{OSeq: 0x%x, Seq: 0x%x, BitMap: 0x%x}", + r.OSeq, r.Seq, r.BitMap) +} + +// XfrmState represents the state of an ipsec policy. It optionally +// contains an XfrmStateAlgo for encryption and one for authentication. +type XfrmState struct { + Dst net.IP + Src net.IP + Proto Proto + Mode Mode + Spi int + Reqid int + ReplayWindow int + Limits XfrmStateLimits + Statistics XfrmStateStats + Mark *XfrmMark + OutputMark *XfrmMark + Ifid int + Auth *XfrmStateAlgo + Crypt *XfrmStateAlgo + Aead *XfrmStateAlgo + Encap *XfrmStateEncap + ESN bool + DontEncapDSCP bool + OSeqMayWrap bool + Replay *XfrmReplayState + Selector *XfrmPolicy +} + +func (sa XfrmState) String() string { + return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %v, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t, DontEncapDSCP: %t, OSeqMayWrap: %t, Replay: %v", + sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN, sa.DontEncapDSCP, sa.OSeqMayWrap, sa.Replay) +} +func (sa XfrmState) Print(stats bool) string { + if !stats { + return sa.String() + } + at := time.Unix(int64(sa.Statistics.AddTime), 0).Format(time.UnixDate) + ut := "-" + if sa.Statistics.UseTime > 0 { + ut = time.Unix(int64(sa.Statistics.UseTime), 0).Format(time.UnixDate) + } + return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d, Bytes: %d, Packets: %d, "+ + "AddTime: %s, UseTime: %s, ReplayWindow: %d, Replay: %d, Failed: %d", + sa.String(), printLimit(sa.Limits.ByteSoft), printLimit(sa.Limits.ByteHard), printLimit(sa.Limits.PacketSoft), printLimit(sa.Limits.PacketHard), + sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard, sa.Statistics.Bytes, sa.Statistics.Packets, at, ut, + sa.Statistics.ReplayWindow, sa.Statistics.Replay, sa.Statistics.Failed) +} + +func printLimit(lmt uint64) string { + if lmt == ^uint64(0) { + return "(INF)" + } + return fmt.Sprintf("%d", lmt) +} func writeStateAlgo(a *XfrmStateAlgo) []byte { algo := nl.XfrmAlgo{ AlgKeyLen: uint32(len(a.Key) * 8), @@ -77,6 +219,14 @@ func writeReplayEsn(replayWindow int) []byte { return replayEsn.Serialize() } +func writeReplay(r *XfrmReplayState) []byte { + return (&nl.XfrmReplayState{ + OSeq: r.OSeq, + Seq: r.Seq, + BitMap: r.BitMap, + }).Serialize() +} + // XfrmStateAdd will add an xfrm state to the system. // Equivalent to: `ip xfrm state add $state` func XfrmStateAdd(state *XfrmState) error { @@ -111,7 +261,7 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error { // A state with spi 0 can't be deleted so don't allow it to be set if state.Spi == 0 { - return fmt.Errorf("Spi must be set when adding xfrm state.") + return fmt.Errorf("Spi must be set when adding xfrm state") } req := h.newNetlinkRequest(nlProto, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) @@ -158,13 +308,34 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error { out := nl.NewRtAttr(nl.XFRMA_REPLAY_ESN_VAL, writeReplayEsn(state.ReplayWindow)) req.AddData(out) } - if state.OutputMark != 0 { - out := nl.NewRtAttr(nl.XFRMA_OUTPUT_MARK, nl.Uint32Attr(uint32(state.OutputMark))) + if state.OutputMark != nil { + out := nl.NewRtAttr(nl.XFRMA_SET_MARK, nl.Uint32Attr(state.OutputMark.Value)) + req.AddData(out) + if state.OutputMark.Mask != 0 { + out = nl.NewRtAttr(nl.XFRMA_SET_MARK_MASK, nl.Uint32Attr(state.OutputMark.Mask)) + req.AddData(out) + } + } + if state.OSeqMayWrap || state.DontEncapDSCP { + var flags uint32 + if state.DontEncapDSCP { + flags |= nl.XFRM_SA_XFLAG_DONT_ENCAP_DSCP + } + if state.OSeqMayWrap { + flags |= nl.XFRM_SA_XFLAG_OSEQ_MAY_WRAP + } + out := nl.NewRtAttr(nl.XFRMA_SA_EXTRA_FLAGS, nl.Uint32Attr(flags)) + req.AddData(out) + } + if state.Replay != nil { + out := nl.NewRtAttr(nl.XFRMA_REPLAY_VAL, writeReplay(state.Replay)) req.AddData(out) } - ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) - req.AddData(ifId) + if state.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) + req.AddData(ifId) + } _, err := req.Execute(unix.NETLINK_XFRM, 0) return err @@ -180,7 +351,6 @@ func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) { msg.Min = 0x100 msg.Max = 0xffffffff req.AddData(msg) - if state.Mark != nil { out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark)) req.AddData(out) @@ -277,8 +447,10 @@ func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState req.AddData(out) } - ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) - req.AddData(ifId) + if state.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) + req.AddData(ifId) + } resType := nl.XFRM_MSG_NEWSA if nlProto == nl.XFRM_MSG_DELSA { @@ -306,7 +478,6 @@ var familyError = fmt.Errorf("family error") func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState { var state XfrmState - state.Dst = msg.Id.Daddr.ToIP() state.Src = msg.Saddr.ToIP() state.Proto = Proto(msg.Id.Proto) @@ -316,20 +487,25 @@ func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState { state.ReplayWindow = int(msg.ReplayWindow) lftToLimits(&msg.Lft, &state.Limits) curToStats(&msg.Curlft, &msg.Stats, &state.Statistics) + state.Selector = &XfrmPolicy{ + Dst: msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD, msg.Sel.Family), + Src: msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS, msg.Sel.Family), + Proto: Proto(msg.Sel.Proto), + DstPort: int(nl.Swap16(msg.Sel.Dport)), + SrcPort: int(nl.Swap16(msg.Sel.Sport)), + Ifindex: int(msg.Sel.Ifindex), + } return &state } func parseXfrmState(m []byte, family int) (*XfrmState, error) { msg := nl.DeserializeXfrmUsersaInfo(m) - // This is mainly for the state dump if family != FAMILY_ALL && family != int(msg.Family) { return nil, familyError } - state := xfrmStateFromXfrmUsersaInfo(msg) - attrs, err := nl.ParseRouteAttr(m[nl.SizeofXfrmUsersaInfo:]) if err != nil { return nil, err @@ -377,10 +553,37 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) { state.Mark = new(XfrmMark) state.Mark.Value = mark.Value state.Mark.Mask = mark.Mask - case nl.XFRMA_OUTPUT_MARK: - state.OutputMark = int(native.Uint32(attr.Value)) + case nl.XFRMA_SA_EXTRA_FLAGS: + flags := native.Uint32(attr.Value) + if (flags & nl.XFRM_SA_XFLAG_DONT_ENCAP_DSCP) != 0 { + state.DontEncapDSCP = true + } + if (flags & nl.XFRM_SA_XFLAG_OSEQ_MAY_WRAP) != 0 { + state.OSeqMayWrap = true + } + case nl.XFRMA_SET_MARK: + if state.OutputMark == nil { + state.OutputMark = new(XfrmMark) + } + state.OutputMark.Value = native.Uint32(attr.Value) + case nl.XFRMA_SET_MARK_MASK: + if state.OutputMark == nil { + state.OutputMark = new(XfrmMark) + } + state.OutputMark.Mask = native.Uint32(attr.Value) + if state.OutputMark.Mask == 0xffffffff { + state.OutputMark.Mask = 0 + } case nl.XFRMA_IF_ID: state.Ifid = int(native.Uint32(attr.Value)) + case nl.XFRMA_REPLAY_VAL: + if state.Replay == nil { + state.Replay = new(XfrmReplayState) + } + replay := nl.DeserializeXfrmReplayState(attr.Value[:]) + state.Replay.OSeq = replay.OSeq + state.Replay.Seq = replay.Seq + state.Replay.BitMap = replay.BitMap } } @@ -457,6 +660,9 @@ func xfrmUsersaInfoFromXfrmState(state *XfrmState) *nl.XfrmUsersaInfo { msg.Id.Spi = nl.Swap32(uint32(state.Spi)) msg.Reqid = uint32(state.Reqid) msg.ReplayWindow = uint8(state.ReplayWindow) - + msg.Sel = nl.XfrmSelector{} + if state.Selector != nil { + selFromPolicy(&msg.Sel, state.Selector) + } return msg } diff --git a/vendor/github.com/vishvananda/netlink/xfrm_unspecified.go b/vendor/github.com/vishvananda/netlink/xfrm_unspecified.go new file mode 100644 index 000000000..12fdd26d7 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm_unspecified.go @@ -0,0 +1,7 @@ +//go:build !linux +// +build !linux + +package netlink + +type XfrmPolicy struct{} +type XfrmState struct{} diff --git a/vendor/github.com/vishvananda/netns/.golangci.yml b/vendor/github.com/vishvananda/netns/.golangci.yml new file mode 100644 index 000000000..600bef78e --- /dev/null +++ b/vendor/github.com/vishvananda/netns/.golangci.yml @@ -0,0 +1,2 @@ +run: + timeout: 5m diff --git a/vendor/github.com/vishvananda/netns/README.md b/vendor/github.com/vishvananda/netns/README.md index 6b45cfb89..bdfedbe81 100644 --- a/vendor/github.com/vishvananda/netns/README.md +++ b/vendor/github.com/vishvananda/netns/README.md @@ -23,6 +23,7 @@ import ( "fmt" "net" "runtime" + "github.com/vishvananda/netns" ) diff --git a/vendor/github.com/vishvananda/netns/doc.go b/vendor/github.com/vishvananda/netns/doc.go new file mode 100644 index 000000000..cd4093a4d --- /dev/null +++ b/vendor/github.com/vishvananda/netns/doc.go @@ -0,0 +1,9 @@ +// Package netns allows ultra-simple network namespace handling. NsHandles +// can be retrieved and set. Note that the current namespace is thread +// local so actions that set and reset namespaces should use LockOSThread +// to make sure the namespace doesn't change due to a goroutine switch. +// It is best to close NsHandles when you are done with them. This can be +// accomplished via a `defer ns.Close()` on the handle. Changing namespaces +// requires elevated privileges, so in most cases this code needs to be run +// as root. +package netns diff --git a/vendor/github.com/vishvananda/netns/netns_linux.go b/vendor/github.com/vishvananda/netns/netns_linux.go index b1e3b07c0..2ed7c7e2f 100644 --- a/vendor/github.com/vishvananda/netns/netns_linux.go +++ b/vendor/github.com/vishvananda/netns/netns_linux.go @@ -1,77 +1,105 @@ -// +build linux - package netns import ( "fmt" - "io/ioutil" "os" + "path" "path/filepath" - "runtime" "strconv" "strings" - "syscall" + + "golang.org/x/sys/unix" ) -// SYS_SETNS syscall allows changing the namespace of the current process. -var SYS_SETNS = map[string]uintptr{ - "386": 346, - "amd64": 308, - "arm64": 268, - "arm": 375, - "mips": 4344, - "mipsle": 4344, - "mips64le": 4344, - "ppc64": 350, - "ppc64le": 350, - "riscv64": 268, - "s390x": 339, -}[runtime.GOARCH] - -// Deprecated: use syscall pkg instead (go >= 1.5 needed). +// Deprecated: use golang.org/x/sys/unix pkg instead. const ( - CLONE_NEWUTS = 0x04000000 /* New utsname group? */ - CLONE_NEWIPC = 0x08000000 /* New ipcs */ - CLONE_NEWUSER = 0x10000000 /* New user namespace */ - CLONE_NEWPID = 0x20000000 /* New pid namespace */ - CLONE_NEWNET = 0x40000000 /* New network namespace */ - CLONE_IO = 0x80000000 /* Get io context */ + CLONE_NEWUTS = unix.CLONE_NEWUTS /* New utsname group? */ + CLONE_NEWIPC = unix.CLONE_NEWIPC /* New ipcs */ + CLONE_NEWUSER = unix.CLONE_NEWUSER /* New user namespace */ + CLONE_NEWPID = unix.CLONE_NEWPID /* New pid namespace */ + CLONE_NEWNET = unix.CLONE_NEWNET /* New network namespace */ + CLONE_IO = unix.CLONE_IO /* Get io context */ ) -// Setns sets namespace using syscall. Note that this should be a method -// in syscall but it has not been added. +const bindMountPath = "/run/netns" /* Bind mount path for named netns */ + +// Setns sets namespace using golang.org/x/sys/unix.Setns. +// +// Deprecated: Use golang.org/x/sys/unix.Setns instead. func Setns(ns NsHandle, nstype int) (err error) { - _, _, e1 := syscall.Syscall(SYS_SETNS, uintptr(ns), uintptr(nstype), 0) - if e1 != 0 { - err = e1 - } - return + return unix.Setns(int(ns), nstype) } // Set sets the current network namespace to the namespace represented // by NsHandle. func Set(ns NsHandle) (err error) { - return Setns(ns, CLONE_NEWNET) + return unix.Setns(int(ns), unix.CLONE_NEWNET) } // New creates a new network namespace, sets it as current and returns // a handle to it. func New() (ns NsHandle, err error) { - if err := syscall.Unshare(CLONE_NEWNET); err != nil { + if err := unix.Unshare(unix.CLONE_NEWNET); err != nil { return -1, err } return Get() } +// NewNamed creates a new named network namespace, sets it as current, +// and returns a handle to it +func NewNamed(name string) (NsHandle, error) { + if _, err := os.Stat(bindMountPath); os.IsNotExist(err) { + err = os.MkdirAll(bindMountPath, 0755) + if err != nil { + return None(), err + } + } + + newNs, err := New() + if err != nil { + return None(), err + } + + namedPath := path.Join(bindMountPath, name) + + f, err := os.OpenFile(namedPath, os.O_CREATE|os.O_EXCL, 0444) + if err != nil { + newNs.Close() + return None(), err + } + f.Close() + + nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid()) + err = unix.Mount(nsPath, namedPath, "bind", unix.MS_BIND, "") + if err != nil { + newNs.Close() + return None(), err + } + + return newNs, nil +} + +// DeleteNamed deletes a named network namespace +func DeleteNamed(name string) error { + namedPath := path.Join(bindMountPath, name) + + err := unix.Unmount(namedPath, unix.MNT_DETACH) + if err != nil { + return err + } + + return os.Remove(namedPath) +} + // Get gets a handle to the current threads network namespace. func Get() (NsHandle, error) { - return GetFromThread(os.Getpid(), syscall.Gettid()) + return GetFromThread(os.Getpid(), unix.Gettid()) } // GetFromPath gets a handle to a network namespace // identified by the path func GetFromPath(path string) (NsHandle, error) { - fd, err := syscall.Open(path, syscall.O_RDONLY, 0) + fd, err := unix.Open(path, unix.O_RDONLY|unix.O_CLOEXEC, 0) if err != nil { return -1, err } @@ -81,7 +109,7 @@ func GetFromPath(path string) (NsHandle, error) { // GetFromName gets a handle to a named network namespace such as one // created by `ip netns add`. func GetFromName(name string) (NsHandle, error) { - return GetFromPath(fmt.Sprintf("/var/run/netns/%s", name)) + return GetFromPath(filepath.Join(bindMountPath, name)) } // GetFromPid gets a handle to the network namespace of a given pid. @@ -106,33 +134,38 @@ func GetFromDocker(id string) (NsHandle, error) { } // borrowed from docker/utils/utils.go -func findCgroupMountpoint(cgroupType string) (string, error) { - output, err := ioutil.ReadFile("/proc/mounts") +func findCgroupMountpoint(cgroupType string) (int, string, error) { + output, err := os.ReadFile("/proc/mounts") if err != nil { - return "", err + return -1, "", err } // /proc/mounts has 6 fields per line, one mount per line, e.g. // cgroup /sys/fs/cgroup/devices cgroup rw,relatime,devices 0 0 for _, line := range strings.Split(string(output), "\n") { parts := strings.Split(line, " ") - if len(parts) == 6 && parts[2] == "cgroup" { - for _, opt := range strings.Split(parts[3], ",") { - if opt == cgroupType { - return parts[1], nil + if len(parts) == 6 { + switch parts[2] { + case "cgroup2": + return 2, parts[1], nil + case "cgroup": + for _, opt := range strings.Split(parts[3], ",") { + if opt == cgroupType { + return 1, parts[1], nil + } } } } } - return "", fmt.Errorf("cgroup mountpoint not found for %s", cgroupType) + return -1, "", fmt.Errorf("cgroup mountpoint not found for %s", cgroupType) } // Returns the relative path to the cgroup docker is running in. // borrowed from docker/utils/utils.go // modified to get the docker pid instead of using /proc/self -func getThisCgroup(cgroupType string) (string, error) { - dockerpid, err := ioutil.ReadFile("/var/run/docker.pid") +func getDockerCgroup(cgroupVer int, cgroupType string) (string, error) { + dockerpid, err := os.ReadFile("/var/run/docker.pid") if err != nil { return "", err } @@ -144,14 +177,15 @@ func getThisCgroup(cgroupType string) (string, error) { if err != nil { return "", err } - output, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + output, err := os.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) if err != nil { return "", err } for _, line := range strings.Split(string(output), "\n") { parts := strings.Split(line, ":") // any type used by docker should work - if parts[1] == cgroupType { + if (cgroupVer == 1 && parts[1] == cgroupType) || + (cgroupVer == 2 && parts[1] == "") { return parts[2], nil } } @@ -163,40 +197,56 @@ func getThisCgroup(cgroupType string) (string, error) { // modified to only return the first pid // modified to glob with id // modified to search for newer docker containers +// modified to look for cgroups v2 func getPidForContainer(id string) (int, error) { pid := 0 // memory is chosen randomly, any cgroup used by docker works cgroupType := "memory" - cgroupRoot, err := findCgroupMountpoint(cgroupType) + cgroupVer, cgroupRoot, err := findCgroupMountpoint(cgroupType) if err != nil { return pid, err } - cgroupThis, err := getThisCgroup(cgroupType) + cgroupDocker, err := getDockerCgroup(cgroupVer, cgroupType) if err != nil { return pid, err } id += "*" + var pidFile string + if cgroupVer == 1 { + pidFile = "tasks" + } else if cgroupVer == 2 { + pidFile = "cgroup.procs" + } else { + return -1, fmt.Errorf("Invalid cgroup version '%d'", cgroupVer) + } + attempts := []string{ - filepath.Join(cgroupRoot, cgroupThis, id, "tasks"), + filepath.Join(cgroupRoot, cgroupDocker, id, pidFile), // With more recent lxc versions use, cgroup will be in lxc/ - filepath.Join(cgroupRoot, cgroupThis, "lxc", id, "tasks"), + filepath.Join(cgroupRoot, cgroupDocker, "lxc", id, pidFile), // With more recent docker, cgroup will be in docker/ - filepath.Join(cgroupRoot, cgroupThis, "docker", id, "tasks"), + filepath.Join(cgroupRoot, cgroupDocker, "docker", id, pidFile), // Even more recent docker versions under systemd use docker-.scope/ - filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", "tasks"), + filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", pidFile), // Even more recent docker versions under cgroup/systemd/docker// - filepath.Join(cgroupRoot, "..", "systemd", "docker", id, "tasks"), - // Kubernetes with docker and CNI is even more different - filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "*", "pod*", id, "tasks"), - // Another flavor of containers location in recent kubernetes 1.11+ - filepath.Join(cgroupRoot, cgroupThis, "kubepods.slice", "kubepods-besteffort.slice", "*", "docker-"+id+".scope", "tasks"), - // When runs inside of a container with recent kubernetes 1.11+ - filepath.Join(cgroupRoot, "kubepods.slice", "kubepods-besteffort.slice", "*", "docker-"+id+".scope", "tasks"), + filepath.Join(cgroupRoot, "..", "systemd", "docker", id, pidFile), + // Kubernetes with docker and CNI is even more different. Works for BestEffort and Burstable QoS + filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "*", "pod*", id, pidFile), + // Same as above but for Guaranteed QoS + filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "pod*", id, pidFile), + // Another flavor of containers location in recent kubernetes 1.11+. Works for BestEffort and Burstable QoS + filepath.Join(cgroupRoot, cgroupDocker, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", pidFile), + // Same as above but for Guaranteed QoS + filepath.Join(cgroupRoot, cgroupDocker, "kubepods.slice", "*", "docker-"+id+".scope", pidFile), + // When runs inside of a container with recent kubernetes 1.11+. Works for BestEffort and Burstable QoS + filepath.Join(cgroupRoot, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", pidFile), + // Same as above but for Guaranteed QoS + filepath.Join(cgroupRoot, "kubepods.slice", "*", "docker-"+id+".scope", pidFile), } var filename string @@ -214,7 +264,7 @@ func getPidForContainer(id string) (int, error) { return pid, fmt.Errorf("Unable to find container: %v", id[:len(id)-1]) } - output, err := ioutil.ReadFile(filename) + output, err := os.ReadFile(filename) if err != nil { return pid, err } diff --git a/vendor/github.com/vishvananda/netns/netns_unspecified.go b/vendor/github.com/vishvananda/netns/netns_others.go similarity index 63% rename from vendor/github.com/vishvananda/netns/netns_unspecified.go rename to vendor/github.com/vishvananda/netns/netns_others.go index d06af62b6..048983774 100644 --- a/vendor/github.com/vishvananda/netns/netns_unspecified.go +++ b/vendor/github.com/vishvananda/netns/netns_others.go @@ -1,3 +1,4 @@ +//go:build !linux // +build !linux package netns @@ -10,6 +11,14 @@ var ( ErrNotImplemented = errors.New("not implemented") ) +// Setns sets namespace using golang.org/x/sys/unix.Setns on Linux. It +// is not implemented on other platforms. +// +// Deprecated: Use golang.org/x/sys/unix.Setns instead. +func Setns(ns NsHandle, nstype int) (err error) { + return ErrNotImplemented +} + func Set(ns NsHandle) (err error) { return ErrNotImplemented } @@ -18,6 +27,14 @@ func New() (ns NsHandle, err error) { return -1, ErrNotImplemented } +func NewNamed(name string) (NsHandle, error) { + return -1, ErrNotImplemented +} + +func DeleteNamed(name string) error { + return ErrNotImplemented +} + func Get() (NsHandle, error) { return -1, ErrNotImplemented } diff --git a/vendor/github.com/vishvananda/netns/netns.go b/vendor/github.com/vishvananda/netns/nshandle_linux.go similarity index 60% rename from vendor/github.com/vishvananda/netns/netns.go rename to vendor/github.com/vishvananda/netns/nshandle_linux.go index dd2f21570..1baffb66a 100644 --- a/vendor/github.com/vishvananda/netns/netns.go +++ b/vendor/github.com/vishvananda/netns/nshandle_linux.go @@ -1,16 +1,9 @@ -// Package netns allows ultra-simple network namespace handling. NsHandles -// can be retrieved and set. Note that the current namespace is thread -// local so actions that set and reset namespaces should use LockOSThread -// to make sure the namespace doesn't change due to a goroutine switch. -// It is best to close NsHandles when you are done with them. This can be -// accomplished via a `defer ns.Close()` on the handle. Changing namespaces -// requires elevated privileges, so in most cases this code needs to be run -// as root. package netns import ( "fmt" - "syscall" + + "golang.org/x/sys/unix" ) // NsHandle is a handle to a network namespace. It can be cast directly @@ -24,11 +17,11 @@ func (ns NsHandle) Equal(other NsHandle) bool { if ns == other { return true } - var s1, s2 syscall.Stat_t - if err := syscall.Fstat(int(ns), &s1); err != nil { + var s1, s2 unix.Stat_t + if err := unix.Fstat(int(ns), &s1); err != nil { return false } - if err := syscall.Fstat(int(other), &s2); err != nil { + if err := unix.Fstat(int(other), &s2); err != nil { return false } return (s1.Dev == s2.Dev) && (s1.Ino == s2.Ino) @@ -36,11 +29,11 @@ func (ns NsHandle) Equal(other NsHandle) bool { // String shows the file descriptor number and its dev and inode. func (ns NsHandle) String() string { - var s syscall.Stat_t if ns == -1 { - return "NS(None)" + return "NS(none)" } - if err := syscall.Fstat(int(ns), &s); err != nil { + var s unix.Stat_t + if err := unix.Fstat(int(ns), &s); err != nil { return fmt.Sprintf("NS(%d: unknown)", ns) } return fmt.Sprintf("NS(%d: %d, %d)", ns, s.Dev, s.Ino) @@ -49,11 +42,11 @@ func (ns NsHandle) String() string { // UniqueId returns a string which uniquely identifies the namespace // associated with the network handle. func (ns NsHandle) UniqueId() string { - var s syscall.Stat_t if ns == -1 { return "NS(none)" } - if err := syscall.Fstat(int(ns), &s); err != nil { + var s unix.Stat_t + if err := unix.Fstat(int(ns), &s); err != nil { return "NS(unknown)" } return fmt.Sprintf("NS(%d:%d)", s.Dev, s.Ino) @@ -67,10 +60,10 @@ func (ns NsHandle) IsOpen() bool { // Close closes the NsHandle and resets its file descriptor to -1. // It is not safe to use an NsHandle after Close() is called. func (ns *NsHandle) Close() error { - if err := syscall.Close(int(*ns)); err != nil { + if err := unix.Close(int(*ns)); err != nil { return err } - (*ns) = -1 + *ns = -1 return nil } diff --git a/vendor/github.com/vishvananda/netns/nshandle_others.go b/vendor/github.com/vishvananda/netns/nshandle_others.go new file mode 100644 index 000000000..af727bc09 --- /dev/null +++ b/vendor/github.com/vishvananda/netns/nshandle_others.go @@ -0,0 +1,45 @@ +//go:build !linux +// +build !linux + +package netns + +// NsHandle is a handle to a network namespace. It can only be used on Linux, +// but provides stub methods on other platforms. +type NsHandle int + +// Equal determines if two network handles refer to the same network +// namespace. It is only implemented on Linux. +func (ns NsHandle) Equal(_ NsHandle) bool { + return false +} + +// String shows the file descriptor number and its dev and inode. +// It is only implemented on Linux, and returns "NS(none)" on other +// platforms. +func (ns NsHandle) String() string { + return "NS(none)" +} + +// UniqueId returns a string which uniquely identifies the namespace +// associated with the network handle. It is only implemented on Linux, +// and returns "NS(none)" on other platforms. +func (ns NsHandle) UniqueId() string { + return "NS(none)" +} + +// IsOpen returns true if Close() has not been called. It is only implemented +// on Linux and always returns false on other platforms. +func (ns NsHandle) IsOpen() bool { + return false +} + +// Close closes the NsHandle and resets its file descriptor to -1. +// It is only implemented on Linux. +func (ns *NsHandle) Close() error { + return nil +} + +// None gets an empty (closed) NsHandle. +func None() NsHandle { + return NsHandle(-1) +} diff --git a/vendor/golang.org/x/crypto/sha3/doc.go b/vendor/golang.org/x/crypto/sha3/doc.go index 7e0230907..bbf391fe6 100644 --- a/vendor/golang.org/x/crypto/sha3/doc.go +++ b/vendor/golang.org/x/crypto/sha3/doc.go @@ -5,6 +5,10 @@ // Package sha3 implements the SHA-3 fixed-output-length hash functions and // the SHAKE variable-output-length hash functions defined by FIPS-202. // +// All types in this package also implement [encoding.BinaryMarshaler], +// [encoding.BinaryAppender] and [encoding.BinaryUnmarshaler] to marshal and +// unmarshal the internal state of the hash. +// // Both types of hash function use the "sponge" construction and the Keccak // permutation. For a detailed specification see http://keccak.noekeon.org/ // diff --git a/vendor/golang.org/x/crypto/sha3/hashes.go b/vendor/golang.org/x/crypto/sha3/hashes.go index c544b29e5..31fffbe04 100644 --- a/vendor/golang.org/x/crypto/sha3/hashes.go +++ b/vendor/golang.org/x/crypto/sha3/hashes.go @@ -48,33 +48,52 @@ func init() { crypto.RegisterHash(crypto.SHA3_512, New512) } +const ( + dsbyteSHA3 = 0b00000110 + dsbyteKeccak = 0b00000001 + dsbyteShake = 0b00011111 + dsbyteCShake = 0b00000100 + + // rateK[c] is the rate in bytes for Keccak[c] where c is the capacity in + // bits. Given the sponge size is 1600 bits, the rate is 1600 - c bits. + rateK256 = (1600 - 256) / 8 + rateK448 = (1600 - 448) / 8 + rateK512 = (1600 - 512) / 8 + rateK768 = (1600 - 768) / 8 + rateK1024 = (1600 - 1024) / 8 +) + func new224Generic() *state { - return &state{rate: 144, outputLen: 28, dsbyte: 0x06} + return &state{rate: rateK448, outputLen: 28, dsbyte: dsbyteSHA3} } func new256Generic() *state { - return &state{rate: 136, outputLen: 32, dsbyte: 0x06} + return &state{rate: rateK512, outputLen: 32, dsbyte: dsbyteSHA3} } func new384Generic() *state { - return &state{rate: 104, outputLen: 48, dsbyte: 0x06} + return &state{rate: rateK768, outputLen: 48, dsbyte: dsbyteSHA3} } func new512Generic() *state { - return &state{rate: 72, outputLen: 64, dsbyte: 0x06} + return &state{rate: rateK1024, outputLen: 64, dsbyte: dsbyteSHA3} } // NewLegacyKeccak256 creates a new Keccak-256 hash. // // Only use this function if you require compatibility with an existing cryptosystem // that uses non-standard padding. All other users should use New256 instead. -func NewLegacyKeccak256() hash.Hash { return &state{rate: 136, outputLen: 32, dsbyte: 0x01} } +func NewLegacyKeccak256() hash.Hash { + return &state{rate: rateK512, outputLen: 32, dsbyte: dsbyteKeccak} +} // NewLegacyKeccak512 creates a new Keccak-512 hash. // // Only use this function if you require compatibility with an existing cryptosystem // that uses non-standard padding. All other users should use New512 instead. -func NewLegacyKeccak512() hash.Hash { return &state{rate: 72, outputLen: 64, dsbyte: 0x01} } +func NewLegacyKeccak512() hash.Hash { + return &state{rate: rateK1024, outputLen: 64, dsbyte: dsbyteKeccak} +} // Sum224 returns the SHA3-224 digest of the data. func Sum224(data []byte) (digest [28]byte) { diff --git a/vendor/golang.org/x/crypto/sha3/sha3.go b/vendor/golang.org/x/crypto/sha3/sha3.go index afedde5ab..6658c4447 100644 --- a/vendor/golang.org/x/crypto/sha3/sha3.go +++ b/vendor/golang.org/x/crypto/sha3/sha3.go @@ -4,6 +4,15 @@ package sha3 +import ( + "crypto/subtle" + "encoding/binary" + "errors" + "unsafe" + + "golang.org/x/sys/cpu" +) + // spongeDirection indicates the direction bytes are flowing through the sponge. type spongeDirection int @@ -14,16 +23,13 @@ const ( spongeSqueezing ) -const ( - // maxRate is the maximum size of the internal buffer. SHAKE-256 - // currently needs the largest buffer. - maxRate = 168 -) - type state struct { - // Generic sponge components. - a [25]uint64 // main state of the hash - rate int // the number of bytes of state to use + a [1600 / 8]byte // main state of the hash + + // a[n:rate] is the buffer. If absorbing, it's the remaining space to XOR + // into before running the permutation. If squeezing, it's the remaining + // output to produce before running the permutation. + n, rate int // dsbyte contains the "domain separation" bits and the first bit of // the padding. Sections 6.1 and 6.2 of [1] separate the outputs of the @@ -39,10 +45,6 @@ type state struct { // Extendable-Output Functions (May 2014)" dsbyte byte - i, n int // storage[i:n] is the buffer, i is only used while squeezing - storage [maxRate]byte - - // Specific to SHA-3 and SHAKE. outputLen int // the default output size in bytes state spongeDirection // whether the sponge is absorbing or squeezing } @@ -61,7 +63,7 @@ func (d *state) Reset() { d.a[i] = 0 } d.state = spongeAbsorbing - d.i, d.n = 0, 0 + d.n = 0 } func (d *state) clone() *state { @@ -69,22 +71,25 @@ func (d *state) clone() *state { return &ret } -// permute applies the KeccakF-1600 permutation. It handles -// any input-output buffering. +// permute applies the KeccakF-1600 permutation. func (d *state) permute() { - switch d.state { - case spongeAbsorbing: - // If we're absorbing, we need to xor the input into the state - // before applying the permutation. - xorIn(d, d.storage[:d.rate]) - d.n = 0 - keccakF1600(&d.a) - case spongeSqueezing: - // If we're squeezing, we need to apply the permutation before - // copying more output. - keccakF1600(&d.a) - d.i = 0 - copyOut(d, d.storage[:d.rate]) + var a *[25]uint64 + if cpu.IsBigEndian { + a = new([25]uint64) + for i := range a { + a[i] = binary.LittleEndian.Uint64(d.a[i*8:]) + } + } else { + a = (*[25]uint64)(unsafe.Pointer(&d.a)) + } + + keccakF1600(a) + d.n = 0 + + if cpu.IsBigEndian { + for i := range a { + binary.LittleEndian.PutUint64(d.a[i*8:], a[i]) + } } } @@ -92,53 +97,36 @@ func (d *state) permute() { // the multi-bitrate 10..1 padding rule, and permutes the state. func (d *state) padAndPermute() { // Pad with this instance's domain-separator bits. We know that there's - // at least one byte of space in d.buf because, if it were full, + // at least one byte of space in the sponge because, if it were full, // permute would have been called to empty it. dsbyte also contains the // first one bit for the padding. See the comment in the state struct. - d.storage[d.n] = d.dsbyte - d.n++ - for d.n < d.rate { - d.storage[d.n] = 0 - d.n++ - } + d.a[d.n] ^= d.dsbyte // This adds the final one bit for the padding. Because of the way that // bits are numbered from the LSB upwards, the final bit is the MSB of // the last byte. - d.storage[d.rate-1] ^= 0x80 + d.a[d.rate-1] ^= 0x80 // Apply the permutation d.permute() d.state = spongeSqueezing - d.n = d.rate - copyOut(d, d.storage[:d.rate]) } // Write absorbs more data into the hash's state. It panics if any // output has already been read. -func (d *state) Write(p []byte) (written int, err error) { +func (d *state) Write(p []byte) (n int, err error) { if d.state != spongeAbsorbing { panic("sha3: Write after Read") } - written = len(p) + + n = len(p) for len(p) > 0 { - if d.n == 0 && len(p) >= d.rate { - // The fast path; absorb a full "rate" bytes of input and apply the permutation. - xorIn(d, p[:d.rate]) - p = p[d.rate:] - keccakF1600(&d.a) - } else { - // The slow path; buffer the input until we can fill the sponge, and then xor it in. - todo := d.rate - d.n - if todo > len(p) { - todo = len(p) - } - d.n += copy(d.storage[d.n:], p[:todo]) - p = p[todo:] - - // If the sponge is full, apply the permutation. - if d.n == d.rate { - d.permute() - } + x := subtle.XORBytes(d.a[d.n:d.rate], d.a[d.n:d.rate], p) + d.n += x + p = p[x:] + + // If the sponge is full, apply the permutation. + if d.n == d.rate { + d.permute() } } @@ -156,14 +144,14 @@ func (d *state) Read(out []byte) (n int, err error) { // Now, do the squeezing. for len(out) > 0 { - n := copy(out, d.storage[d.i:d.n]) - d.i += n - out = out[n:] - // Apply the permutation if we've squeezed the sponge dry. - if d.i == d.rate { + if d.n == d.rate { d.permute() } + + x := copy(out, d.a[d.n:d.rate]) + d.n += x + out = out[x:] } return @@ -183,3 +171,74 @@ func (d *state) Sum(in []byte) []byte { dup.Read(hash) return append(in, hash...) } + +const ( + magicSHA3 = "sha\x08" + magicShake = "sha\x09" + magicCShake = "sha\x0a" + magicKeccak = "sha\x0b" + // magic || rate || main state || n || sponge direction + marshaledSize = len(magicSHA3) + 1 + 200 + 1 + 1 +) + +func (d *state) MarshalBinary() ([]byte, error) { + return d.AppendBinary(make([]byte, 0, marshaledSize)) +} + +func (d *state) AppendBinary(b []byte) ([]byte, error) { + switch d.dsbyte { + case dsbyteSHA3: + b = append(b, magicSHA3...) + case dsbyteShake: + b = append(b, magicShake...) + case dsbyteCShake: + b = append(b, magicCShake...) + case dsbyteKeccak: + b = append(b, magicKeccak...) + default: + panic("unknown dsbyte") + } + // rate is at most 168, and n is at most rate. + b = append(b, byte(d.rate)) + b = append(b, d.a[:]...) + b = append(b, byte(d.n), byte(d.state)) + return b, nil +} + +func (d *state) UnmarshalBinary(b []byte) error { + if len(b) != marshaledSize { + return errors.New("sha3: invalid hash state") + } + + magic := string(b[:len(magicSHA3)]) + b = b[len(magicSHA3):] + switch { + case magic == magicSHA3 && d.dsbyte == dsbyteSHA3: + case magic == magicShake && d.dsbyte == dsbyteShake: + case magic == magicCShake && d.dsbyte == dsbyteCShake: + case magic == magicKeccak && d.dsbyte == dsbyteKeccak: + default: + return errors.New("sha3: invalid hash state identifier") + } + + rate := int(b[0]) + b = b[1:] + if rate != d.rate { + return errors.New("sha3: invalid hash state function") + } + + copy(d.a[:], b) + b = b[len(d.a):] + + n, state := int(b[0]), spongeDirection(b[1]) + if n > d.rate { + return errors.New("sha3: invalid hash state") + } + d.n = n + if state != spongeAbsorbing && state != spongeSqueezing { + return errors.New("sha3: invalid hash state") + } + d.state = state + + return nil +} diff --git a/vendor/golang.org/x/crypto/sha3/shake.go b/vendor/golang.org/x/crypto/sha3/shake.go index a01ef4357..a6b3a4281 100644 --- a/vendor/golang.org/x/crypto/sha3/shake.go +++ b/vendor/golang.org/x/crypto/sha3/shake.go @@ -16,9 +16,12 @@ package sha3 // [2] https://doi.org/10.6028/NIST.SP.800-185 import ( + "bytes" "encoding/binary" + "errors" "hash" "io" + "math/bits" ) // ShakeHash defines the interface to hash functions that support @@ -50,41 +53,33 @@ type cshakeState struct { initBlock []byte } -// Consts for configuring initial SHA-3 state -const ( - dsbyteShake = 0x1f - dsbyteCShake = 0x04 - rate128 = 168 - rate256 = 136 -) +func bytepad(data []byte, rate int) []byte { + out := make([]byte, 0, 9+len(data)+rate-1) + out = append(out, leftEncode(uint64(rate))...) + out = append(out, data...) + if padlen := rate - len(out)%rate; padlen < rate { + out = append(out, make([]byte, padlen)...) + } + return out +} -func bytepad(input []byte, w int) []byte { - // leftEncode always returns max 9 bytes - buf := make([]byte, 0, 9+len(input)+w) - buf = append(buf, leftEncode(uint64(w))...) - buf = append(buf, input...) - padlen := w - (len(buf) % w) - return append(buf, make([]byte, padlen)...) -} - -func leftEncode(value uint64) []byte { - var b [9]byte - binary.BigEndian.PutUint64(b[1:], value) - // Trim all but last leading zero bytes - i := byte(1) - for i < 8 && b[i] == 0 { - i++ +func leftEncode(x uint64) []byte { + // Let n be the smallest positive integer for which 2^(8n) > x. + n := (bits.Len64(x) + 7) / 8 + if n == 0 { + n = 1 } - // Prepend number of encoded bytes - b[i-1] = 9 - i - return b[i-1:] + // Return n || x with n as a byte and x an n bytes in big-endian order. + b := make([]byte, 9) + binary.BigEndian.PutUint64(b[1:], x) + b = b[9-n-1:] + b[0] = byte(n) + return b } func newCShake(N, S []byte, rate, outputLen int, dsbyte byte) ShakeHash { c := cshakeState{state: &state{rate: rate, outputLen: outputLen, dsbyte: dsbyte}} - - // leftEncode returns max 9 bytes - c.initBlock = make([]byte, 0, 9*2+len(N)+len(S)) + c.initBlock = make([]byte, 0, 9+len(N)+9+len(S)) // leftEncode returns max 9 bytes c.initBlock = append(c.initBlock, leftEncode(uint64(len(N))*8)...) c.initBlock = append(c.initBlock, N...) c.initBlock = append(c.initBlock, leftEncode(uint64(len(S))*8)...) @@ -111,6 +106,30 @@ func (c *state) Clone() ShakeHash { return c.clone() } +func (c *cshakeState) MarshalBinary() ([]byte, error) { + return c.AppendBinary(make([]byte, 0, marshaledSize+len(c.initBlock))) +} + +func (c *cshakeState) AppendBinary(b []byte) ([]byte, error) { + b, err := c.state.AppendBinary(b) + if err != nil { + return nil, err + } + b = append(b, c.initBlock...) + return b, nil +} + +func (c *cshakeState) UnmarshalBinary(b []byte) error { + if len(b) <= marshaledSize { + return errors.New("sha3: invalid hash state") + } + if err := c.state.UnmarshalBinary(b[:marshaledSize]); err != nil { + return err + } + c.initBlock = bytes.Clone(b[marshaledSize:]) + return nil +} + // NewShake128 creates a new SHAKE128 variable-output-length ShakeHash. // Its generic security strength is 128 bits against all attacks if at // least 32 bytes of its output are used. @@ -126,11 +145,11 @@ func NewShake256() ShakeHash { } func newShake128Generic() *state { - return &state{rate: rate128, outputLen: 32, dsbyte: dsbyteShake} + return &state{rate: rateK256, outputLen: 32, dsbyte: dsbyteShake} } func newShake256Generic() *state { - return &state{rate: rate256, outputLen: 64, dsbyte: dsbyteShake} + return &state{rate: rateK512, outputLen: 64, dsbyte: dsbyteShake} } // NewCShake128 creates a new instance of cSHAKE128 variable-output-length ShakeHash, @@ -143,7 +162,7 @@ func NewCShake128(N, S []byte) ShakeHash { if len(N) == 0 && len(S) == 0 { return NewShake128() } - return newCShake(N, S, rate128, 32, dsbyteCShake) + return newCShake(N, S, rateK256, 32, dsbyteCShake) } // NewCShake256 creates a new instance of cSHAKE256 variable-output-length ShakeHash, @@ -156,7 +175,7 @@ func NewCShake256(N, S []byte) ShakeHash { if len(N) == 0 && len(S) == 0 { return NewShake256() } - return newCShake(N, S, rate256, 64, dsbyteCShake) + return newCShake(N, S, rateK512, 64, dsbyteCShake) } // ShakeSum128 writes an arbitrary-length digest of data into hash. diff --git a/vendor/golang.org/x/crypto/sha3/xor.go b/vendor/golang.org/x/crypto/sha3/xor.go deleted file mode 100644 index 6ada5c957..000000000 --- a/vendor/golang.org/x/crypto/sha3/xor.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package sha3 - -import ( - "crypto/subtle" - "encoding/binary" - "unsafe" - - "golang.org/x/sys/cpu" -) - -// xorIn xors the bytes in buf into the state. -func xorIn(d *state, buf []byte) { - if cpu.IsBigEndian { - for i := 0; len(buf) >= 8; i++ { - a := binary.LittleEndian.Uint64(buf) - d.a[i] ^= a - buf = buf[8:] - } - } else { - ab := (*[25 * 64 / 8]byte)(unsafe.Pointer(&d.a)) - subtle.XORBytes(ab[:], ab[:], buf) - } -} - -// copyOut copies uint64s to a byte buffer. -func copyOut(d *state, b []byte) { - if cpu.IsBigEndian { - for i := 0; len(b) >= 8; i++ { - binary.LittleEndian.PutUint64(b, d.a[i]) - b = b[8:] - } - } else { - ab := (*[25 * 64 / 8]byte)(unsafe.Pointer(&d.a)) - copy(b, ab[:]) - } -} diff --git a/vendor/golang.org/x/exp/LICENSE b/vendor/golang.org/x/exp/LICENSE index 6a66aea5e..2a7cf70da 100644 --- a/vendor/golang.org/x/exp/LICENSE +++ b/vendor/golang.org/x/exp/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s b/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s new file mode 100644 index 000000000..ec2acfe54 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s @@ -0,0 +1,17 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build darwin && amd64 && gc + +#include "textflag.h" + +TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_sysctl(SB) +GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 +DATA ·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB) + +TEXT libc_sysctlbyname_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_sysctlbyname(SB) +GLOBL ·libc_sysctlbyname_trampoline_addr(SB), RODATA, $8 +DATA ·libc_sysctlbyname_trampoline_addr(SB)/8, $libc_sysctlbyname_trampoline<>(SB) diff --git a/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go b/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go new file mode 100644 index 000000000..b838cb9e9 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go @@ -0,0 +1,61 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build darwin && amd64 && gc + +package cpu + +// darwinSupportsAVX512 checks Darwin kernel for AVX512 support via sysctl +// call (see issue 43089). It also restricts AVX512 support for Darwin to +// kernel version 21.3.0 (MacOS 12.2.0) or later (see issue 49233). +// +// Background: +// Darwin implements a special mechanism to economize on thread state when +// AVX512 specific registers are not in use. This scheme minimizes state when +// preempting threads that haven't yet used any AVX512 instructions, but adds +// special requirements to check for AVX512 hardware support at runtime (e.g. +// via sysctl call or commpage inspection). See issue 43089 and link below for +// full background: +// https://github.com/apple-oss-distributions/xnu/blob/xnu-11215.1.10/osfmk/i386/fpu.c#L214-L240 +// +// Additionally, all versions of the Darwin kernel from 19.6.0 through 21.2.0 +// (corresponding to MacOS 10.15.6 - 12.1) have a bug that can cause corruption +// of the AVX512 mask registers (K0-K7) upon signal return. For this reason +// AVX512 is considered unsafe to use on Darwin for kernel versions prior to +// 21.3.0, where a fix has been confirmed. See issue 49233 for full background. +func darwinSupportsAVX512() bool { + return darwinSysctlEnabled([]byte("hw.optional.avx512f\x00")) && darwinKernelVersionCheck(21, 3, 0) +} + +// Ensure Darwin kernel version is at least major.minor.patch, avoiding dependencies +func darwinKernelVersionCheck(major, minor, patch int) bool { + var release [256]byte + err := darwinOSRelease(&release) + if err != nil { + return false + } + + var mmp [3]int + c := 0 +Loop: + for _, b := range release[:] { + switch { + case b >= '0' && b <= '9': + mmp[c] = 10*mmp[c] + int(b-'0') + case b == '.': + c++ + if c > 2 { + return false + } + case b == 0: + break Loop + default: + return false + } + } + if c != 2 { + return false + } + return mmp[0] > major || mmp[0] == major && (mmp[1] > minor || mmp[1] == minor && mmp[2] >= patch) +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go index 910728fb1..32a44514e 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go +++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go @@ -6,10 +6,10 @@ package cpu -// cpuid is implemented in cpu_x86.s for gc compiler +// cpuid is implemented in cpu_gc_x86.s for gc compiler // and in cpu_gccgo.c for gccgo. func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) -// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler +// xgetbv with ecx = 0 is implemented in cpu_gc_x86.s for gc compiler // and in cpu_gccgo.c for gccgo. func xgetbv() (eax, edx uint32) diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.s b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.s similarity index 94% rename from vendor/golang.org/x/sys/cpu/cpu_x86.s rename to vendor/golang.org/x/sys/cpu/cpu_gc_x86.s index 7d7ba33ef..ce208ce6d 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_x86.s +++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.s @@ -18,7 +18,7 @@ TEXT ·cpuid(SB), NOSPLIT, $0-24 RET // func xgetbv() (eax, edx uint32) -TEXT ·xgetbv(SB),NOSPLIT,$0-8 +TEXT ·xgetbv(SB), NOSPLIT, $0-8 MOVL $0, CX XGETBV MOVL AX, eax+0(FP) diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go index 99c60fe9f..170d21ddf 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go +++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go @@ -23,9 +23,3 @@ func xgetbv() (eax, edx uint32) { gccgoXgetbv(&a, &d) return a, d } - -// gccgo doesn't build on Darwin, per: -// https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/gcc.rb#L76 -func darwinSupportsAVX512() bool { - return false -} diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go index 08f35ea17..f1caf0f78 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go @@ -110,7 +110,6 @@ func doinit() { ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM) ARM64.HasDIT = isSet(hwCap, hwcap_DIT) - // HWCAP2 feature bits ARM64.HasSVE2 = isSet(hwCap2, hwcap2_SVE2) ARM64.HasI8MM = isSet(hwCap2, hwcap2_I8MM) diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_x86.go b/vendor/golang.org/x/sys/cpu/cpu_other_x86.go new file mode 100644 index 000000000..a0fd7e2f7 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_other_x86.go @@ -0,0 +1,11 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build 386 || amd64p32 || (amd64 && (!darwin || !gc)) + +package cpu + +func darwinSupportsAVX512() bool { + panic("only implemented for gc && amd64 && darwin") +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.go b/vendor/golang.org/x/sys/cpu/cpu_x86.go index c29f5e4c5..600a68078 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_x86.go +++ b/vendor/golang.org/x/sys/cpu/cpu_x86.go @@ -92,10 +92,8 @@ func archInit() { osSupportsAVX = isSet(1, eax) && isSet(2, eax) if runtime.GOOS == "darwin" { - // Darwin doesn't save/restore AVX-512 mask registers correctly across signal handlers. - // Since users can't rely on mask register contents, let's not advertise AVX-512 support. - // See issue 49233. - osSupportsAVX512 = false + // Darwin requires special AVX512 checks, see cpu_darwin_x86.go + osSupportsAVX512 = osSupportsAVX && darwinSupportsAVX512() } else { // Check if OPMASK and ZMM registers have OS support. osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax) diff --git a/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go b/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go new file mode 100644 index 000000000..4d0888b0c --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go @@ -0,0 +1,98 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Minimal copy of x/sys/unix so the cpu package can make a +// system call on Darwin without depending on x/sys/unix. + +//go:build darwin && amd64 && gc + +package cpu + +import ( + "syscall" + "unsafe" +) + +type _C_int int32 + +// adapted from unix.Uname() at x/sys/unix/syscall_darwin.go L419 +func darwinOSRelease(release *[256]byte) error { + // from x/sys/unix/zerrors_openbsd_amd64.go + const ( + CTL_KERN = 0x1 + KERN_OSRELEASE = 0x2 + ) + + mib := []_C_int{CTL_KERN, KERN_OSRELEASE} + n := unsafe.Sizeof(*release) + + return sysctl(mib, &release[0], &n, nil, 0) +} + +type Errno = syscall.Errno + +var _zero uintptr // Single-word zero for use when we need a valid pointer to 0 bytes. + +// from x/sys/unix/zsyscall_darwin_amd64.go L791-807 +func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error { + var _p0 unsafe.Pointer + if len(mib) > 0 { + _p0 = unsafe.Pointer(&mib[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + if _, _, err := syscall_syscall6( + libc_sysctl_trampoline_addr, + uintptr(_p0), + uintptr(len(mib)), + uintptr(unsafe.Pointer(old)), + uintptr(unsafe.Pointer(oldlen)), + uintptr(unsafe.Pointer(new)), + uintptr(newlen), + ); err != 0 { + return err + } + + return nil +} + +var libc_sysctl_trampoline_addr uintptr + +// adapted from internal/cpu/cpu_arm64_darwin.go +func darwinSysctlEnabled(name []byte) bool { + out := int32(0) + nout := unsafe.Sizeof(out) + if ret := sysctlbyname(&name[0], (*byte)(unsafe.Pointer(&out)), &nout, nil, 0); ret != nil { + return false + } + return out > 0 +} + +//go:cgo_import_dynamic libc_sysctl sysctl "/usr/lib/libSystem.B.dylib" + +var libc_sysctlbyname_trampoline_addr uintptr + +// adapted from runtime/sys_darwin.go in the pattern of sysctl() above, as defined in x/sys/unix +func sysctlbyname(name *byte, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error { + if _, _, err := syscall_syscall6( + libc_sysctlbyname_trampoline_addr, + uintptr(unsafe.Pointer(name)), + uintptr(unsafe.Pointer(old)), + uintptr(unsafe.Pointer(oldlen)), + uintptr(unsafe.Pointer(new)), + uintptr(newlen), + 0, + ); err != 0 { + return err + } + + return nil +} + +//go:cgo_import_dynamic libc_sysctlbyname sysctlbyname "/usr/lib/libSystem.B.dylib" + +// Implemented in the runtime package (runtime/sys_darwin.go) +func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno) + +//go:linkname syscall_syscall6 syscall.syscall6 diff --git a/vendor/golang.org/x/sys/unix/ioctl_linux.go b/vendor/golang.org/x/sys/unix/ioctl_linux.go index dbe680eab..7ca4fa12a 100644 --- a/vendor/golang.org/x/sys/unix/ioctl_linux.go +++ b/vendor/golang.org/x/sys/unix/ioctl_linux.go @@ -58,6 +58,102 @@ func IoctlGetEthtoolDrvinfo(fd int, ifname string) (*EthtoolDrvinfo, error) { return &value, err } +// IoctlGetEthtoolTsInfo fetches ethtool timestamping and PHC +// association for the network device specified by ifname. +func IoctlGetEthtoolTsInfo(fd int, ifname string) (*EthtoolTsInfo, error) { + ifr, err := NewIfreq(ifname) + if err != nil { + return nil, err + } + + value := EthtoolTsInfo{Cmd: ETHTOOL_GET_TS_INFO} + ifrd := ifr.withData(unsafe.Pointer(&value)) + + err = ioctlIfreqData(fd, SIOCETHTOOL, &ifrd) + return &value, err +} + +// IoctlGetHwTstamp retrieves the hardware timestamping configuration +// for the network device specified by ifname. +func IoctlGetHwTstamp(fd int, ifname string) (*HwTstampConfig, error) { + ifr, err := NewIfreq(ifname) + if err != nil { + return nil, err + } + + value := HwTstampConfig{} + ifrd := ifr.withData(unsafe.Pointer(&value)) + + err = ioctlIfreqData(fd, SIOCGHWTSTAMP, &ifrd) + return &value, err +} + +// IoctlSetHwTstamp updates the hardware timestamping configuration for +// the network device specified by ifname. +func IoctlSetHwTstamp(fd int, ifname string, cfg *HwTstampConfig) error { + ifr, err := NewIfreq(ifname) + if err != nil { + return err + } + ifrd := ifr.withData(unsafe.Pointer(cfg)) + return ioctlIfreqData(fd, SIOCSHWTSTAMP, &ifrd) +} + +// FdToClockID derives the clock ID from the file descriptor number +// - see clock_gettime(3), FD_TO_CLOCKID macros. The resulting ID is +// suitable for system calls like ClockGettime. +func FdToClockID(fd int) int32 { return int32((int(^fd) << 3) | 3) } + +// IoctlPtpClockGetcaps returns the description of a given PTP device. +func IoctlPtpClockGetcaps(fd int) (*PtpClockCaps, error) { + var value PtpClockCaps + err := ioctlPtr(fd, PTP_CLOCK_GETCAPS2, unsafe.Pointer(&value)) + return &value, err +} + +// IoctlPtpSysOffsetPrecise returns a description of the clock +// offset compared to the system clock. +func IoctlPtpSysOffsetPrecise(fd int) (*PtpSysOffsetPrecise, error) { + var value PtpSysOffsetPrecise + err := ioctlPtr(fd, PTP_SYS_OFFSET_PRECISE2, unsafe.Pointer(&value)) + return &value, err +} + +// IoctlPtpSysOffsetExtended returns an extended description of the +// clock offset compared to the system clock. The samples parameter +// specifies the desired number of measurements. +func IoctlPtpSysOffsetExtended(fd int, samples uint) (*PtpSysOffsetExtended, error) { + value := PtpSysOffsetExtended{Samples: uint32(samples)} + err := ioctlPtr(fd, PTP_SYS_OFFSET_EXTENDED2, unsafe.Pointer(&value)) + return &value, err +} + +// IoctlPtpPinGetfunc returns the configuration of the specified +// I/O pin on given PTP device. +func IoctlPtpPinGetfunc(fd int, index uint) (*PtpPinDesc, error) { + value := PtpPinDesc{Index: uint32(index)} + err := ioctlPtr(fd, PTP_PIN_GETFUNC2, unsafe.Pointer(&value)) + return &value, err +} + +// IoctlPtpPinSetfunc updates configuration of the specified PTP +// I/O pin. +func IoctlPtpPinSetfunc(fd int, pd *PtpPinDesc) error { + return ioctlPtr(fd, PTP_PIN_SETFUNC2, unsafe.Pointer(pd)) +} + +// IoctlPtpPeroutRequest configures the periodic output mode of the +// PTP I/O pins. +func IoctlPtpPeroutRequest(fd int, r *PtpPeroutRequest) error { + return ioctlPtr(fd, PTP_PEROUT_REQUEST2, unsafe.Pointer(r)) +} + +// IoctlPtpExttsRequest configures the external timestamping mode +// of the PTP I/O pins. +func IoctlPtpExttsRequest(fd int, r *PtpExttsRequest) error { + return ioctlPtr(fd, PTP_EXTTS_REQUEST2, unsafe.Pointer(r)) +} + // IoctlGetWatchdogInfo fetches information about a watchdog device from the // Linux watchdog API. For more information, see: // https://www.kernel.org/doc/html/latest/watchdog/watchdog-api.html. diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh index ac54ecaba..6ab02b6c3 100644 --- a/vendor/golang.org/x/sys/unix/mkerrors.sh +++ b/vendor/golang.org/x/sys/unix/mkerrors.sh @@ -158,6 +158,16 @@ includes_Linux=' #endif #define _GNU_SOURCE +// See the description in unix/linux/types.go +#if defined(__ARM_EABI__) || \ + (defined(__mips__) && (_MIPS_SIM == _ABIO32)) || \ + (defined(__powerpc__) && (!defined(__powerpc64__))) +# ifdef _TIME_BITS +# undef _TIME_BITS +# endif +# define _TIME_BITS 32 +#endif + // is broken on powerpc64, as it fails to include definitions of // these structures. We just include them copied from . #if defined(__powerpc__) @@ -256,6 +266,7 @@ struct ltchars { #include #include #include +#include #include #include #include @@ -527,6 +538,7 @@ ccflags="$@" $2 ~ /^(AF|SOCK|SO|SOL|IPPROTO|IP|IPV6|TCP|MCAST|EVFILT|NOTE|SHUT|PROT|MAP|MREMAP|MFD|T?PACKET|MSG|SCM|MCL|DT|MADV|PR|LOCAL|TCPOPT|UDP)_/ || $2 ~ /^NFC_(GENL|PROTO|COMM|RF|SE|DIRECTION|LLCP|SOCKPROTO)_/ || $2 ~ /^NFC_.*_(MAX)?SIZE$/ || + $2 ~ /^PTP_/ || $2 ~ /^RAW_PAYLOAD_/ || $2 ~ /^[US]F_/ || $2 ~ /^TP_STATUS_/ || diff --git a/vendor/golang.org/x/sys/unix/syscall_linux.go b/vendor/golang.org/x/sys/unix/syscall_linux.go index f08abd434..230a94549 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux.go @@ -1860,6 +1860,7 @@ func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err e //sys ClockAdjtime(clockid int32, buf *Timex) (state int, err error) //sys ClockGetres(clockid int32, res *Timespec) (err error) //sys ClockGettime(clockid int32, time *Timespec) (err error) +//sys ClockSettime(clockid int32, time *Timespec) (err error) //sys ClockNanosleep(clockid int32, flags int, request *Timespec, remain *Timespec) (err error) //sys Close(fd int) (err error) //sys CloseRange(first uint, last uint, flags uint) (err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_zos_s390x.go b/vendor/golang.org/x/sys/unix/syscall_zos_s390x.go index 312ae6ac1..7bf5c04bb 100644 --- a/vendor/golang.org/x/sys/unix/syscall_zos_s390x.go +++ b/vendor/golang.org/x/sys/unix/syscall_zos_s390x.go @@ -768,6 +768,15 @@ func Munmap(b []byte) (err error) { return mapper.Munmap(b) } +func MmapPtr(fd int, offset int64, addr unsafe.Pointer, length uintptr, prot int, flags int) (ret unsafe.Pointer, err error) { + xaddr, err := mapper.mmap(uintptr(addr), length, prot, flags, fd, offset) + return unsafe.Pointer(xaddr), err +} + +func MunmapPtr(addr unsafe.Pointer, length uintptr) (err error) { + return mapper.munmap(uintptr(addr), length) +} + //sys Gethostname(buf []byte) (err error) = SYS___GETHOSTNAME_A //sysnb Getgid() (gid int) //sysnb Getpid() (pid int) @@ -816,10 +825,10 @@ func Lstat(path string, stat *Stat_t) (err error) { // for checking symlinks begins with $VERSION/ $SYSNAME/ $SYSSYMR/ $SYSSYMA/ func isSpecialPath(path []byte) (v bool) { var special = [4][8]byte{ - [8]byte{'V', 'E', 'R', 'S', 'I', 'O', 'N', '/'}, - [8]byte{'S', 'Y', 'S', 'N', 'A', 'M', 'E', '/'}, - [8]byte{'S', 'Y', 'S', 'S', 'Y', 'M', 'R', '/'}, - [8]byte{'S', 'Y', 'S', 'S', 'Y', 'M', 'A', '/'}} + {'V', 'E', 'R', 'S', 'I', 'O', 'N', '/'}, + {'S', 'Y', 'S', 'N', 'A', 'M', 'E', '/'}, + {'S', 'Y', 'S', 'S', 'Y', 'M', 'R', '/'}, + {'S', 'Y', 'S', 'S', 'Y', 'M', 'A', '/'}} var i, j int for i = 0; i < len(special); i++ { @@ -3115,3 +3124,90 @@ func legacy_Mkfifoat(dirfd int, path string, mode uint32) (err error) { //sys Posix_openpt(oflag int) (fd int, err error) = SYS_POSIX_OPENPT //sys Grantpt(fildes int) (rc int, err error) = SYS_GRANTPT //sys Unlockpt(fildes int) (rc int, err error) = SYS_UNLOCKPT + +func fcntlAsIs(fd uintptr, cmd int, arg uintptr) (val int, err error) { + runtime.EnterSyscall() + r0, e2, e1 := CallLeFuncWithErr(GetZosLibVec()+SYS_FCNTL<<4, uintptr(fd), uintptr(cmd), arg) + runtime.ExitSyscall() + val = int(r0) + if int64(r0) == -1 { + err = errnoErr2(e1, e2) + } + return +} + +func Fcntl(fd uintptr, cmd int, op interface{}) (ret int, err error) { + switch op.(type) { + case *Flock_t: + err = FcntlFlock(fd, cmd, op.(*Flock_t)) + if err != nil { + ret = -1 + } + return + case int: + return FcntlInt(fd, cmd, op.(int)) + case *F_cnvrt: + return fcntlAsIs(fd, cmd, uintptr(unsafe.Pointer(op.(*F_cnvrt)))) + case unsafe.Pointer: + return fcntlAsIs(fd, cmd, uintptr(op.(unsafe.Pointer))) + default: + return -1, EINVAL + } + return +} + +func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) { + if raceenabled { + raceReleaseMerge(unsafe.Pointer(&ioSync)) + } + return sendfile(outfd, infd, offset, count) +} + +func sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) { + // TODO: use LE call instead if the call is implemented + originalOffset, err := Seek(infd, 0, SEEK_CUR) + if err != nil { + return -1, err + } + //start reading data from in_fd + if offset != nil { + _, err := Seek(infd, *offset, SEEK_SET) + if err != nil { + return -1, err + } + } + + buf := make([]byte, count) + readBuf := make([]byte, 0) + var n int = 0 + for i := 0; i < count; i += n { + n, err := Read(infd, buf) + if n == 0 { + if err != nil { + return -1, err + } else { // EOF + break + } + } + readBuf = append(readBuf, buf...) + buf = buf[0:0] + } + + n2, err := Write(outfd, readBuf) + if err != nil { + return -1, err + } + + //When sendfile() returns, this variable will be set to the + // offset of the byte following the last byte that was read. + if offset != nil { + *offset = *offset + int64(n) + // If offset is not NULL, then sendfile() does not modify the file + // offset of in_fd + _, err := Seek(infd, originalOffset, SEEK_SET) + if err != nil { + return -1, err + } + } + return n2, nil +} diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux.go b/vendor/golang.org/x/sys/unix/zerrors_linux.go index de3b46248..ccba391c9 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux.go @@ -2625,6 +2625,28 @@ const ( PR_UNALIGN_NOPRINT = 0x1 PR_UNALIGN_SIGBUS = 0x2 PSTOREFS_MAGIC = 0x6165676c + PTP_CLK_MAGIC = '=' + PTP_ENABLE_FEATURE = 0x1 + PTP_EXTTS_EDGES = 0x6 + PTP_EXTTS_EVENT_VALID = 0x1 + PTP_EXTTS_V1_VALID_FLAGS = 0x7 + PTP_EXTTS_VALID_FLAGS = 0x1f + PTP_EXT_OFFSET = 0x10 + PTP_FALLING_EDGE = 0x4 + PTP_MAX_SAMPLES = 0x19 + PTP_PEROUT_DUTY_CYCLE = 0x2 + PTP_PEROUT_ONE_SHOT = 0x1 + PTP_PEROUT_PHASE = 0x4 + PTP_PEROUT_V1_VALID_FLAGS = 0x0 + PTP_PEROUT_VALID_FLAGS = 0x7 + PTP_PIN_GETFUNC = 0xc0603d06 + PTP_PIN_GETFUNC2 = 0xc0603d0f + PTP_RISING_EDGE = 0x2 + PTP_STRICT_FLAGS = 0x8 + PTP_SYS_OFFSET_EXTENDED = 0xc4c03d09 + PTP_SYS_OFFSET_EXTENDED2 = 0xc4c03d12 + PTP_SYS_OFFSET_PRECISE = 0xc0403d08 + PTP_SYS_OFFSET_PRECISE2 = 0xc0403d11 PTRACE_ATTACH = 0x10 PTRACE_CONT = 0x7 PTRACE_DETACH = 0x11 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go index 8aa6d77c0..0c00cb3f3 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go @@ -237,6 +237,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x7434 PPPIOCXFERUNIT = 0x744e PR_SET_PTRACER_ANY = 0xffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_GETFPREGS = 0xe PTRACE_GETFPXREGS = 0x12 PTRACE_GET_THREAD_AREA = 0x19 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go index da428f425..dfb364554 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go @@ -237,6 +237,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x7434 PPPIOCXFERUNIT = 0x744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_ARCH_PRCTL = 0x1e PTRACE_GETFPREGS = 0xe PTRACE_GETFPXREGS = 0x12 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go index bf45bfec7..d46dcf78a 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go @@ -234,6 +234,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x7434 PPPIOCXFERUNIT = 0x744e PR_SET_PTRACER_ANY = 0xffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_GETCRUNCHREGS = 0x19 PTRACE_GETFDPIC = 0x1f PTRACE_GETFDPIC_EXEC = 0x0 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go index 71c67162b..3af3248a7 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go @@ -240,6 +240,20 @@ const ( PROT_BTI = 0x10 PROT_MTE = 0x20 PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_PEEKMTETAGS = 0x21 PTRACE_POKEMTETAGS = 0x22 PTRACE_SYSEMU = 0x1f diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go index 9476628fa..292bcf028 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go @@ -238,6 +238,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x7434 PPPIOCXFERUNIT = 0x744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_SYSEMU = 0x1f PTRACE_SYSEMU_SINGLESTEP = 0x20 RLIMIT_AS = 0x9 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go index b9e85f3cf..782b7110f 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go @@ -234,6 +234,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x20007434 PPPIOCXFERUNIT = 0x2000744e PR_SET_PTRACER_ANY = 0xffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETFPREGS = 0xe PTRACE_GET_THREAD_AREA = 0x19 PTRACE_GET_THREAD_AREA_3264 = 0xc4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go index a48b68a76..84973fd92 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go @@ -234,6 +234,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x20007434 PPPIOCXFERUNIT = 0x2000744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETFPREGS = 0xe PTRACE_GET_THREAD_AREA = 0x19 PTRACE_GET_THREAD_AREA_3264 = 0xc4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go index ea00e8522..6d9cbc3b2 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go @@ -234,6 +234,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x20007434 PPPIOCXFERUNIT = 0x2000744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETFPREGS = 0xe PTRACE_GET_THREAD_AREA = 0x19 PTRACE_GET_THREAD_AREA_3264 = 0xc4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go index 91c646871..5f9fedbce 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go @@ -234,6 +234,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x20007434 PPPIOCXFERUNIT = 0x2000744e PR_SET_PTRACER_ANY = 0xffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETFPREGS = 0xe PTRACE_GET_THREAD_AREA = 0x19 PTRACE_GET_THREAD_AREA_3264 = 0xc4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go index 8cbf38d63..bb0026ee0 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go @@ -237,6 +237,20 @@ const ( PPPIOCXFERUNIT = 0x2000744e PROT_SAO = 0x10 PR_SET_PTRACER_ANY = 0xffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETEVRREGS = 0x14 PTRACE_GETFPREGS = 0xe PTRACE_GETREGS64 = 0x16 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go index a2df73419..46120db5c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go @@ -237,6 +237,20 @@ const ( PPPIOCXFERUNIT = 0x2000744e PROT_SAO = 0x10 PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETEVRREGS = 0x14 PTRACE_GETFPREGS = 0xe PTRACE_GETREGS64 = 0x16 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go index 247913792..5c951634f 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go @@ -237,6 +237,20 @@ const ( PPPIOCXFERUNIT = 0x2000744e PROT_SAO = 0x10 PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETEVRREGS = 0x14 PTRACE_GETFPREGS = 0xe PTRACE_GETREGS64 = 0x16 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go index d265f146e..11a84d5af 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go @@ -234,6 +234,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x7434 PPPIOCXFERUNIT = 0x744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_GETFDPIC = 0x21 PTRACE_GETFDPIC_EXEC = 0x0 PTRACE_GETFDPIC_INTERP = 0x1 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go index 3f2d64439..f78c4617c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go @@ -234,6 +234,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x7434 PPPIOCXFERUNIT = 0x744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x80503d01 + PTP_CLOCK_GETCAPS2 = 0x80503d0a + PTP_ENABLE_PPS = 0x40043d04 + PTP_ENABLE_PPS2 = 0x40043d0d + PTP_EXTTS_REQUEST = 0x40103d02 + PTP_EXTTS_REQUEST2 = 0x40103d0b + PTP_MASK_CLEAR_ALL = 0x3d13 + PTP_MASK_EN_SINGLE = 0x40043d14 + PTP_PEROUT_REQUEST = 0x40383d03 + PTP_PEROUT_REQUEST2 = 0x40383d0c + PTP_PIN_SETFUNC = 0x40603d07 + PTP_PIN_SETFUNC2 = 0x40603d10 + PTP_SYS_OFFSET = 0x43403d05 + PTP_SYS_OFFSET2 = 0x43403d0e PTRACE_DISABLE_TE = 0x5010 PTRACE_ENABLE_TE = 0x5009 PTRACE_GET_LAST_BREAK = 0x5006 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go index 5d8b727a1..aeb777c34 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go @@ -239,6 +239,20 @@ const ( PPPIOCUNBRIDGECHAN = 0x20007434 PPPIOCXFERUNIT = 0x2000744e PR_SET_PTRACER_ANY = 0xffffffffffffffff + PTP_CLOCK_GETCAPS = 0x40503d01 + PTP_CLOCK_GETCAPS2 = 0x40503d0a + PTP_ENABLE_PPS = 0x80043d04 + PTP_ENABLE_PPS2 = 0x80043d0d + PTP_EXTTS_REQUEST = 0x80103d02 + PTP_EXTTS_REQUEST2 = 0x80103d0b + PTP_MASK_CLEAR_ALL = 0x20003d13 + PTP_MASK_EN_SINGLE = 0x80043d14 + PTP_PEROUT_REQUEST = 0x80383d03 + PTP_PEROUT_REQUEST2 = 0x80383d0c + PTP_PIN_SETFUNC = 0x80603d07 + PTP_PIN_SETFUNC2 = 0x80603d10 + PTP_SYS_OFFSET = 0x83403d05 + PTP_SYS_OFFSET2 = 0x83403d0e PTRACE_GETFPAREGS = 0x14 PTRACE_GETFPREGS = 0xe PTRACE_GETFPREGS64 = 0x19 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux.go b/vendor/golang.org/x/sys/unix/zsyscall_linux.go index af30da557..5cc1e8eb2 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux.go @@ -592,6 +592,16 @@ func ClockGettime(clockid int32, time *Timespec) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func ClockSettime(clockid int32, time *Timespec) (err error) { + _, _, e1 := Syscall(SYS_CLOCK_SETTIME, uintptr(clockid), uintptr(unsafe.Pointer(time)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func ClockNanosleep(clockid int32, flags int, request *Timespec, remain *Timespec) (err error) { _, _, e1 := Syscall6(SYS_CLOCK_NANOSLEEP, uintptr(clockid), uintptr(flags), uintptr(unsafe.Pointer(request)), uintptr(unsafe.Pointer(remain)), 0, 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go index 3a69e4549..8daaf3faf 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go @@ -1752,12 +1752,6 @@ const ( IFLA_IPVLAN_UNSPEC = 0x0 IFLA_IPVLAN_MODE = 0x1 IFLA_IPVLAN_FLAGS = 0x2 - NETKIT_NEXT = -0x1 - NETKIT_PASS = 0x0 - NETKIT_DROP = 0x2 - NETKIT_REDIRECT = 0x7 - NETKIT_L2 = 0x0 - NETKIT_L3 = 0x1 IFLA_NETKIT_UNSPEC = 0x0 IFLA_NETKIT_PEER_INFO = 0x1 IFLA_NETKIT_PRIMARY = 0x2 @@ -1796,6 +1790,7 @@ const ( IFLA_VXLAN_DF = 0x1d IFLA_VXLAN_VNIFILTER = 0x1e IFLA_VXLAN_LOCALBYPASS = 0x1f + IFLA_VXLAN_LABEL_POLICY = 0x20 IFLA_GENEVE_UNSPEC = 0x0 IFLA_GENEVE_ID = 0x1 IFLA_GENEVE_REMOTE = 0x2 @@ -1825,6 +1820,8 @@ const ( IFLA_GTP_ROLE = 0x4 IFLA_GTP_CREATE_SOCKETS = 0x5 IFLA_GTP_RESTART_COUNT = 0x6 + IFLA_GTP_LOCAL = 0x7 + IFLA_GTP_LOCAL6 = 0x8 IFLA_BOND_UNSPEC = 0x0 IFLA_BOND_MODE = 0x1 IFLA_BOND_ACTIVE_SLAVE = 0x2 @@ -1857,6 +1854,7 @@ const ( IFLA_BOND_AD_LACP_ACTIVE = 0x1d IFLA_BOND_MISSED_MAX = 0x1e IFLA_BOND_NS_IP6_TARGET = 0x1f + IFLA_BOND_COUPLED_CONTROL = 0x20 IFLA_BOND_AD_INFO_UNSPEC = 0x0 IFLA_BOND_AD_INFO_AGGREGATOR = 0x1 IFLA_BOND_AD_INFO_NUM_PORTS = 0x2 @@ -1925,6 +1923,7 @@ const ( IFLA_HSR_SEQ_NR = 0x5 IFLA_HSR_VERSION = 0x6 IFLA_HSR_PROTOCOL = 0x7 + IFLA_HSR_INTERLINK = 0x8 IFLA_STATS_UNSPEC = 0x0 IFLA_STATS_LINK_64 = 0x1 IFLA_STATS_LINK_XSTATS = 0x2 @@ -1977,6 +1976,15 @@ const ( IFLA_DSA_MASTER = 0x1 ) +const ( + NETKIT_NEXT = -0x1 + NETKIT_PASS = 0x0 + NETKIT_DROP = 0x2 + NETKIT_REDIRECT = 0x7 + NETKIT_L2 = 0x0 + NETKIT_L3 = 0x1 +) + const ( NF_INET_PRE_ROUTING = 0x0 NF_INET_LOCAL_IN = 0x1 @@ -4110,6 +4118,106 @@ type EthtoolDrvinfo struct { Regdump_len uint32 } +type EthtoolTsInfo struct { + Cmd uint32 + So_timestamping uint32 + Phc_index int32 + Tx_types uint32 + Tx_reserved [3]uint32 + Rx_filters uint32 + Rx_reserved [3]uint32 +} + +type HwTstampConfig struct { + Flags int32 + Tx_type int32 + Rx_filter int32 +} + +const ( + HWTSTAMP_FILTER_NONE = 0x0 + HWTSTAMP_FILTER_ALL = 0x1 + HWTSTAMP_FILTER_SOME = 0x2 + HWTSTAMP_FILTER_PTP_V1_L4_EVENT = 0x3 + HWTSTAMP_FILTER_PTP_V2_L4_EVENT = 0x6 + HWTSTAMP_FILTER_PTP_V2_L2_EVENT = 0x9 + HWTSTAMP_FILTER_PTP_V2_EVENT = 0xc +) + +const ( + HWTSTAMP_TX_OFF = 0x0 + HWTSTAMP_TX_ON = 0x1 + HWTSTAMP_TX_ONESTEP_SYNC = 0x2 +) + +type ( + PtpClockCaps struct { + Max_adj int32 + N_alarm int32 + N_ext_ts int32 + N_per_out int32 + Pps int32 + N_pins int32 + Cross_timestamping int32 + Adjust_phase int32 + Max_phase_adj int32 + Rsv [11]int32 + } + PtpClockTime struct { + Sec int64 + Nsec uint32 + Reserved uint32 + } + PtpExttsEvent struct { + T PtpClockTime + Index uint32 + Flags uint32 + Rsv [2]uint32 + } + PtpExttsRequest struct { + Index uint32 + Flags uint32 + Rsv [2]uint32 + } + PtpPeroutRequest struct { + StartOrPhase PtpClockTime + Period PtpClockTime + Index uint32 + Flags uint32 + On PtpClockTime + } + PtpPinDesc struct { + Name [64]byte + Index uint32 + Func uint32 + Chan uint32 + Rsv [5]uint32 + } + PtpSysOffset struct { + Samples uint32 + Rsv [3]uint32 + Ts [51]PtpClockTime + } + PtpSysOffsetExtended struct { + Samples uint32 + Rsv [3]uint32 + Ts [25][3]PtpClockTime + } + PtpSysOffsetPrecise struct { + Device PtpClockTime + Realtime PtpClockTime + Monoraw PtpClockTime + Rsv [4]uint32 + } +) + +const ( + PTP_PF_NONE = 0x0 + PTP_PF_EXTTS = 0x1 + PTP_PF_PEROUT = 0x2 + PTP_PF_PHYSYNC = 0x3 +) + type ( HIDRawReportDescriptor struct { Size uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_zos_s390x.go b/vendor/golang.org/x/sys/unix/ztypes_zos_s390x.go index d9a13af46..2e5d5a443 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_zos_s390x.go +++ b/vendor/golang.org/x/sys/unix/ztypes_zos_s390x.go @@ -377,6 +377,12 @@ type Flock_t struct { Pid int32 } +type F_cnvrt struct { + Cvtcmd int32 + Pccsid int16 + Fccsid int16 +} + type Termios struct { Cflag uint32 Iflag uint32 diff --git a/vendor/golang.org/x/sys/windows/syscall_windows.go b/vendor/golang.org/x/sys/windows/syscall_windows.go index 5cee9a314..4510bfc3f 100644 --- a/vendor/golang.org/x/sys/windows/syscall_windows.go +++ b/vendor/golang.org/x/sys/windows/syscall_windows.go @@ -725,20 +725,12 @@ func DurationSinceBoot() time.Duration { } func Ftruncate(fd Handle, length int64) (err error) { - curoffset, e := Seek(fd, 0, 1) - if e != nil { - return e - } - defer Seek(fd, curoffset, 0) - _, e = Seek(fd, length, 0) - if e != nil { - return e + type _FILE_END_OF_FILE_INFO struct { + EndOfFile int64 } - e = SetEndOfFile(fd) - if e != nil { - return e - } - return nil + var info _FILE_END_OF_FILE_INFO + info.EndOfFile = length + return SetFileInformationByHandle(fd, FileEndOfFileInfo, (*byte)(unsafe.Pointer(&info)), uint32(unsafe.Sizeof(info))) } func Gettimeofday(tv *Timeval) (err error) { @@ -894,6 +886,11 @@ const socket_error = uintptr(^uint32(0)) //sys GetACP() (acp uint32) = kernel32.GetACP //sys MultiByteToWideChar(codePage uint32, dwFlags uint32, str *byte, nstr int32, wchar *uint16, nwchar int32) (nwrite int32, err error) = kernel32.MultiByteToWideChar //sys getBestInterfaceEx(sockaddr unsafe.Pointer, pdwBestIfIndex *uint32) (errcode error) = iphlpapi.GetBestInterfaceEx +//sys GetIfEntry2Ex(level uint32, row *MibIfRow2) (errcode error) = iphlpapi.GetIfEntry2Ex +//sys GetUnicastIpAddressEntry(row *MibUnicastIpAddressRow) (errcode error) = iphlpapi.GetUnicastIpAddressEntry +//sys NotifyIpInterfaceChange(family uint16, callback uintptr, callerContext unsafe.Pointer, initialNotification bool, notificationHandle *Handle) (errcode error) = iphlpapi.NotifyIpInterfaceChange +//sys NotifyUnicastIpAddressChange(family uint16, callback uintptr, callerContext unsafe.Pointer, initialNotification bool, notificationHandle *Handle) (errcode error) = iphlpapi.NotifyUnicastIpAddressChange +//sys CancelMibChangeNotify2(notificationHandle Handle) (errcode error) = iphlpapi.CancelMibChangeNotify2 // For testing: clients can set this flag to force // creation of IPv6 sockets to return EAFNOSUPPORT. @@ -1685,13 +1682,16 @@ func (s NTStatus) Error() string { // do not use NTUnicodeString, and instead UTF16PtrFromString should be used for // the more common *uint16 string type. func NewNTUnicodeString(s string) (*NTUnicodeString, error) { - var u NTUnicodeString - s16, err := UTF16PtrFromString(s) + s16, err := UTF16FromString(s) if err != nil { return nil, err } - RtlInitUnicodeString(&u, s16) - return &u, nil + n := uint16(len(s16) * 2) + return &NTUnicodeString{ + Length: n - 2, // subtract 2 bytes for the NULL terminator + MaximumLength: n, + Buffer: &s16[0], + }, nil } // Slice returns a uint16 slice that aliases the data in the NTUnicodeString. diff --git a/vendor/golang.org/x/sys/windows/types_windows.go b/vendor/golang.org/x/sys/windows/types_windows.go index 7b97a154c..51311e205 100644 --- a/vendor/golang.org/x/sys/windows/types_windows.go +++ b/vendor/golang.org/x/sys/windows/types_windows.go @@ -2203,6 +2203,132 @@ const ( IfOperStatusLowerLayerDown = 7 ) +const ( + IF_MAX_PHYS_ADDRESS_LENGTH = 32 + IF_MAX_STRING_SIZE = 256 +) + +// MIB_IF_ENTRY_LEVEL enumeration from netioapi.h or +// https://learn.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2ex. +const ( + MibIfEntryNormal = 0 + MibIfEntryNormalWithoutStatistics = 2 +) + +// MIB_NOTIFICATION_TYPE enumeration from netioapi.h or +// https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ne-netioapi-mib_notification_type. +const ( + MibParameterNotification = 0 + MibAddInstance = 1 + MibDeleteInstance = 2 + MibInitialNotification = 3 +) + +// MibIfRow2 stores information about a particular interface. See +// https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2. +type MibIfRow2 struct { + InterfaceLuid uint64 + InterfaceIndex uint32 + InterfaceGuid GUID + Alias [IF_MAX_STRING_SIZE + 1]uint16 + Description [IF_MAX_STRING_SIZE + 1]uint16 + PhysicalAddressLength uint32 + PhysicalAddress [IF_MAX_PHYS_ADDRESS_LENGTH]uint8 + PermanentPhysicalAddress [IF_MAX_PHYS_ADDRESS_LENGTH]uint8 + Mtu uint32 + Type uint32 + TunnelType uint32 + MediaType uint32 + PhysicalMediumType uint32 + AccessType uint32 + DirectionType uint32 + InterfaceAndOperStatusFlags uint8 + OperStatus uint32 + AdminStatus uint32 + MediaConnectState uint32 + NetworkGuid GUID + ConnectionType uint32 + TransmitLinkSpeed uint64 + ReceiveLinkSpeed uint64 + InOctets uint64 + InUcastPkts uint64 + InNUcastPkts uint64 + InDiscards uint64 + InErrors uint64 + InUnknownProtos uint64 + InUcastOctets uint64 + InMulticastOctets uint64 + InBroadcastOctets uint64 + OutOctets uint64 + OutUcastPkts uint64 + OutNUcastPkts uint64 + OutDiscards uint64 + OutErrors uint64 + OutUcastOctets uint64 + OutMulticastOctets uint64 + OutBroadcastOctets uint64 + OutQLen uint64 +} + +// MIB_UNICASTIPADDRESS_ROW stores information about a unicast IP address. See +// https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_unicastipaddress_row. +type MibUnicastIpAddressRow struct { + Address RawSockaddrInet6 // SOCKADDR_INET union + InterfaceLuid uint64 + InterfaceIndex uint32 + PrefixOrigin uint32 + SuffixOrigin uint32 + ValidLifetime uint32 + PreferredLifetime uint32 + OnLinkPrefixLength uint8 + SkipAsSource uint8 + DadState uint32 + ScopeId uint32 + CreationTimeStamp Filetime +} + +const ScopeLevelCount = 16 + +// MIB_IPINTERFACE_ROW stores interface management information for a particular IP address family on a network interface. +// See https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_ipinterface_row. +type MibIpInterfaceRow struct { + Family uint16 + InterfaceLuid uint64 + InterfaceIndex uint32 + MaxReassemblySize uint32 + InterfaceIdentifier uint64 + MinRouterAdvertisementInterval uint32 + MaxRouterAdvertisementInterval uint32 + AdvertisingEnabled uint8 + ForwardingEnabled uint8 + WeakHostSend uint8 + WeakHostReceive uint8 + UseAutomaticMetric uint8 + UseNeighborUnreachabilityDetection uint8 + ManagedAddressConfigurationSupported uint8 + OtherStatefulConfigurationSupported uint8 + AdvertiseDefaultRoute uint8 + RouterDiscoveryBehavior uint32 + DadTransmits uint32 + BaseReachableTime uint32 + RetransmitTime uint32 + PathMtuDiscoveryTimeout uint32 + LinkLocalAddressBehavior uint32 + LinkLocalAddressTimeout uint32 + ZoneIndices [ScopeLevelCount]uint32 + SitePrefixLength uint32 + Metric uint32 + NlMtu uint32 + Connected uint8 + SupportsWakeUpPatterns uint8 + SupportsNeighborDiscovery uint8 + SupportsRouterDiscovery uint8 + ReachableTime uint32 + TransmitOffload uint32 + ReceiveOffload uint32 + DisableDefaultRoutes uint8 +} + // Console related constants used for the mode parameter to SetConsoleMode. See // https://docs.microsoft.com/en-us/windows/console/setconsolemode for details. diff --git a/vendor/golang.org/x/sys/windows/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/zsyscall_windows.go index 4c2e1bdc0..6f5252880 100644 --- a/vendor/golang.org/x/sys/windows/zsyscall_windows.go +++ b/vendor/golang.org/x/sys/windows/zsyscall_windows.go @@ -181,10 +181,15 @@ var ( procDnsRecordListFree = moddnsapi.NewProc("DnsRecordListFree") procDwmGetWindowAttribute = moddwmapi.NewProc("DwmGetWindowAttribute") procDwmSetWindowAttribute = moddwmapi.NewProc("DwmSetWindowAttribute") + procCancelMibChangeNotify2 = modiphlpapi.NewProc("CancelMibChangeNotify2") procGetAdaptersAddresses = modiphlpapi.NewProc("GetAdaptersAddresses") procGetAdaptersInfo = modiphlpapi.NewProc("GetAdaptersInfo") procGetBestInterfaceEx = modiphlpapi.NewProc("GetBestInterfaceEx") procGetIfEntry = modiphlpapi.NewProc("GetIfEntry") + procGetIfEntry2Ex = modiphlpapi.NewProc("GetIfEntry2Ex") + procGetUnicastIpAddressEntry = modiphlpapi.NewProc("GetUnicastIpAddressEntry") + procNotifyIpInterfaceChange = modiphlpapi.NewProc("NotifyIpInterfaceChange") + procNotifyUnicastIpAddressChange = modiphlpapi.NewProc("NotifyUnicastIpAddressChange") procAddDllDirectory = modkernel32.NewProc("AddDllDirectory") procAssignProcessToJobObject = modkernel32.NewProc("AssignProcessToJobObject") procCancelIo = modkernel32.NewProc("CancelIo") @@ -1606,6 +1611,14 @@ func DwmSetWindowAttribute(hwnd HWND, attribute uint32, value unsafe.Pointer, si return } +func CancelMibChangeNotify2(notificationHandle Handle) (errcode error) { + r0, _, _ := syscall.SyscallN(procCancelMibChangeNotify2.Addr(), uintptr(notificationHandle)) + if r0 != 0 { + errcode = syscall.Errno(r0) + } + return +} + func GetAdaptersAddresses(family uint32, flags uint32, reserved uintptr, adapterAddresses *IpAdapterAddresses, sizePointer *uint32) (errcode error) { r0, _, _ := syscall.Syscall6(procGetAdaptersAddresses.Addr(), 5, uintptr(family), uintptr(flags), uintptr(reserved), uintptr(unsafe.Pointer(adapterAddresses)), uintptr(unsafe.Pointer(sizePointer)), 0) if r0 != 0 { @@ -1638,6 +1651,46 @@ func GetIfEntry(pIfRow *MibIfRow) (errcode error) { return } +func GetIfEntry2Ex(level uint32, row *MibIfRow2) (errcode error) { + r0, _, _ := syscall.SyscallN(procGetIfEntry2Ex.Addr(), uintptr(level), uintptr(unsafe.Pointer(row))) + if r0 != 0 { + errcode = syscall.Errno(r0) + } + return +} + +func GetUnicastIpAddressEntry(row *MibUnicastIpAddressRow) (errcode error) { + r0, _, _ := syscall.SyscallN(procGetUnicastIpAddressEntry.Addr(), uintptr(unsafe.Pointer(row))) + if r0 != 0 { + errcode = syscall.Errno(r0) + } + return +} + +func NotifyIpInterfaceChange(family uint16, callback uintptr, callerContext unsafe.Pointer, initialNotification bool, notificationHandle *Handle) (errcode error) { + var _p0 uint32 + if initialNotification { + _p0 = 1 + } + r0, _, _ := syscall.SyscallN(procNotifyIpInterfaceChange.Addr(), uintptr(family), uintptr(callback), uintptr(callerContext), uintptr(_p0), uintptr(unsafe.Pointer(notificationHandle))) + if r0 != 0 { + errcode = syscall.Errno(r0) + } + return +} + +func NotifyUnicastIpAddressChange(family uint16, callback uintptr, callerContext unsafe.Pointer, initialNotification bool, notificationHandle *Handle) (errcode error) { + var _p0 uint32 + if initialNotification { + _p0 = 1 + } + r0, _, _ := syscall.SyscallN(procNotifyUnicastIpAddressChange.Addr(), uintptr(family), uintptr(callback), uintptr(callerContext), uintptr(_p0), uintptr(unsafe.Pointer(notificationHandle))) + if r0 != 0 { + errcode = syscall.Errno(r0) + } + return +} + func AddDllDirectory(path *uint16) (cookie uintptr, err error) { r0, _, e1 := syscall.Syscall(procAddDllDirectory.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0) cookie = uintptr(r0) diff --git a/vendor/golang.org/x/term/README.md b/vendor/golang.org/x/term/README.md index d03d0aefe..05ff623f9 100644 --- a/vendor/golang.org/x/term/README.md +++ b/vendor/golang.org/x/term/README.md @@ -4,16 +4,13 @@ This repository provides Go terminal and console support packages. -## Download/Install - -The easiest way to install is to run `go get -u golang.org/x/term`. You can -also manually git clone the repository to `$GOPATH/src/golang.org/x/term`. - ## Report Issues / Send Patches This repository uses Gerrit for code changes. To learn how to submit changes to -this repository, see https://golang.org/doc/contribute.html. +this repository, see https://go.dev/doc/contribute. + +The git repository is https://go.googlesource.com/term. The main issue tracker for the term repository is located at -https://github.com/golang/go/issues. Prefix your issue with "x/term:" in the +https://go.dev/issues. Prefix your issue with "x/term:" in the subject line, so it is easy to find. diff --git a/vendor/modules.txt b/vendor/modules.txt index 43df57a02..a49396c8c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -108,7 +108,10 @@ github.com/cilium/ebpf/internal/unix github.com/cilium/ebpf/link github.com/cilium/ebpf/ringbuf github.com/cilium/ebpf/rlimit -# github.com/davecgh/go-spew v1.1.1 +# github.com/containers/common v0.61.0 +## explicit; go 1.22.6 +github.com/containers/common/pkg/cgroupv2 +# github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc ## explicit github.com/davecgh/go-spew/spew # github.com/emicklei/go-restful/v3 v3.12.1 @@ -121,9 +124,10 @@ github.com/evanphx/json-patch/v5 # github.com/felixge/httpsnoop v1.0.4 ## explicit; go 1.13 github.com/felixge/httpsnoop -# github.com/fsnotify/fsnotify v1.7.0 +# github.com/fsnotify/fsnotify v1.8.0 ## explicit; go 1.17 github.com/fsnotify/fsnotify +github.com/fsnotify/fsnotify/internal # github.com/gabriel-vasile/mimetype v1.4.2 ## explicit; go 1.20 github.com/gabriel-vasile/mimetype @@ -347,7 +351,7 @@ github.com/pierrec/lz4/v4/internal/xxh32 # github.com/pkg/errors v0.9.1 ## explicit github.com/pkg/errors -# github.com/pmezard/go-difflib v1.0.0 +# github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 ## explicit github.com/pmezard/go-difflib/difflib # github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c @@ -423,12 +427,12 @@ github.com/twitchyliquid64/golang-asm/unsafeheader # github.com/ugorji/go/codec v1.2.11 ## explicit; go 1.11 github.com/ugorji/go/codec -# github.com/vishvananda/netlink v1.1.0 +# github.com/vishvananda/netlink v1.3.0 ## explicit; go 1.12 github.com/vishvananda/netlink github.com/vishvananda/netlink/nl -# github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df -## explicit; go 1.12 +# github.com/vishvananda/netns v0.0.4 +## explicit; go 1.17 github.com/vishvananda/netns # github.com/vladimirvivien/gexe v0.3.0 ## explicit; go 1.22 @@ -699,14 +703,14 @@ go.uber.org/zap/zapcore ## explicit; go 1.18 golang.org/x/arch/arm64/arm64asm golang.org/x/arch/x86/x86asm -# golang.org/x/crypto v0.28.0 +# golang.org/x/crypto v0.29.0 ## explicit; go 1.20 golang.org/x/crypto/sha3 -# golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 -## explicit; go 1.18 +# golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c +## explicit; go 1.22.0 golang.org/x/exp/constraints -# golang.org/x/mod v0.20.0 -## explicit; go 1.18 +# golang.org/x/mod v0.21.0 +## explicit; go 1.22.0 golang.org/x/mod/semver # golang.org/x/net v0.30.0 ## explicit; go 1.18 @@ -726,17 +730,17 @@ golang.org/x/net/trace ## explicit; go 1.18 golang.org/x/oauth2 golang.org/x/oauth2/internal -# golang.org/x/sys v0.26.0 +# golang.org/x/sys v0.27.0 ## explicit; go 1.18 golang.org/x/sys/cpu golang.org/x/sys/plan9 golang.org/x/sys/unix golang.org/x/sys/windows golang.org/x/sys/windows/registry -# golang.org/x/term v0.25.0 +# golang.org/x/term v0.26.0 ## explicit; go 1.18 golang.org/x/term -# golang.org/x/text v0.19.0 +# golang.org/x/text v0.20.0 ## explicit; go 1.18 golang.org/x/text/internal/language golang.org/x/text/internal/language/compact