Created
December 18, 2025 07:59
-
-
Save superboum/16cfcfa27f4dc0a0a77468876a50156b to your computer and use it in GitHub Desktop.
TCP RTT Linux EBPF
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //go:build ignore | |
| #include "common.h" | |
| #include "bpf_endian.h" | |
| #include "bpf_tracing.h" | |
| #define AF_INET 2 | |
| char __license[] SEC("license") = "Dual MIT/GPL"; | |
| /** | |
| * For CO-RE relocatable eBPF programs, __attribute__((preserve_access_index)) | |
| * preserves the offset of the specified fields in the original kernel struct. | |
| * So here we don't need to include "vmlinux.h". Instead we only need to define | |
| * the kernel struct and their fields the eBPF program actually requires. | |
| * | |
| * Also note that BTF-enabled programs like fentry, fexit, fmod_ret, tp_btf, | |
| * lsm, etc. declared using the BPF_PROG macro can read kernel memory without | |
| * needing to call bpf_probe_read*(). | |
| */ | |
| /** | |
| * struct sock_common is the minimal network layer representation of sockets. | |
| * This is a simplified copy of the kernel's struct sock_common. | |
| * This copy contains only the fields needed for this example to | |
| * fetch the source and destination port numbers and IP addresses. | |
| */ | |
| struct sock_common { | |
| union { | |
| struct { | |
| // skc_daddr is destination IP address | |
| __be32 skc_daddr; | |
| // skc_rcv_saddr is the source IP address | |
| __be32 skc_rcv_saddr; | |
| }; | |
| }; | |
| union { | |
| struct { | |
| // skc_dport is the destination TCP/UDP port | |
| __be16 skc_dport; | |
| // skc_num is the source TCP/UDP port | |
| __u16 skc_num; | |
| }; | |
| }; | |
| // skc_family is the network address family (2 for IPV4) | |
| short unsigned int skc_family; | |
| } __attribute__((preserve_access_index)); | |
| /** | |
| * struct sock is the network layer representation of sockets. | |
| * This is a simplified copy of the kernel's struct sock. | |
| * This copy is needed only to access struct sock_common. | |
| */ | |
| struct sock { | |
| struct sock_common __sk_common; | |
| } __attribute__((preserve_access_index)); | |
| /** | |
| * struct tcp_sock is the Linux representation of a TCP socket. | |
| * This is a simplified copy of the kernel's struct tcp_sock. | |
| * For this example we only need srtt_us to read the smoothed RTT. | |
| */ | |
| struct tcp_sock { | |
| u32 srtt_us; | |
| } __attribute__((preserve_access_index)); | |
| struct { | |
| __uint(type, BPF_MAP_TYPE_RINGBUF); | |
| __uint(max_entries, 1 << 24); | |
| __type(value, struct event); | |
| } events SEC(".maps"); | |
| /** | |
| * The sample submitted to userspace over a ring buffer. | |
| * Emit struct event's type info into the ELF's BTF so bpf2go | |
| * can generate a Go type from it. | |
| */ | |
| struct event { | |
| u16 sport; | |
| u16 dport; | |
| u32 saddr; | |
| u32 daddr; | |
| u32 srtt; | |
| }; | |
| SEC("fentry/tcp_close") | |
| int BPF_PROG(tcp_close, struct sock *sk) { | |
| if (sk->__sk_common.skc_family != AF_INET) { | |
| return 0; | |
| } | |
| // The input struct sock is actually a tcp_sock, so we can type-cast | |
| struct tcp_sock *ts = bpf_skc_to_tcp_sock(sk); | |
| if (!ts) { | |
| return 0; | |
| } | |
| struct event *tcp_info; | |
| tcp_info = bpf_ringbuf_reserve(&events, sizeof(struct event), 0); | |
| if (!tcp_info) { | |
| return 0; | |
| } | |
| tcp_info->saddr = sk->__sk_common.skc_rcv_saddr; | |
| tcp_info->daddr = sk->__sk_common.skc_daddr; | |
| tcp_info->dport = bpf_ntohs(sk->__sk_common.skc_dport); | |
| tcp_info->sport = sk->__sk_common.skc_num; | |
| tcp_info->srtt = ts->srtt_us >> 3; | |
| tcp_info->srtt /= 1000; | |
| bpf_ringbuf_submit(tcp_info, 0); | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| A simple RTT tracker implemented using eBPF fentry hook to read RTT from TCP socket. | |
| */ | |
| #ifndef __RTT_TRACKER_H__ | |
| #define __RTT_TRACKER_H__ | |
| #include <bpf_tracing.h> | |
| #include "utils.h" | |
| #include "maps_definition.h" | |
| static inline int rtt_lookup_and_update_flow(flow_id *id, u64 rtt) { | |
| additional_metrics *extra_metrics = bpf_map_lookup_elem(&additional_flow_metrics, id); | |
| if (extra_metrics != NULL) { | |
| extra_metrics->end_mono_time_ts = bpf_ktime_get_ns(); | |
| if (extra_metrics->flow_rtt < rtt) { | |
| extra_metrics->flow_rtt = rtt; | |
| } | |
| return 0; | |
| } | |
| return -1; | |
| } | |
| static inline int calculate_flow_rtt_tcp(struct sock *sk, struct sk_buff *skb) { | |
| u8 dscp = 0, protocol = 0; | |
| struct tcp_sock *ts; | |
| u16 family = 0, flags = 0, eth_protocol = 0; | |
| u64 rtt = 0; | |
| int ret = 0; | |
| flow_id id; | |
| if (!enable_rtt) { | |
| return 0; | |
| } | |
| __builtin_memset(&id, 0, sizeof(id)); | |
| u32 if_index = BPF_CORE_READ(skb, skb_iif); | |
| // filter out TCP sockets with unknown or loopback interface | |
| if (if_index == 0 || if_index == 1) { | |
| return 0; | |
| } | |
| // read L2 info | |
| core_fill_in_l2(skb, ð_protocol, &family); | |
| // read L3 info | |
| core_fill_in_l3(skb, &id, family, &protocol, &dscp); | |
| if (protocol != IPPROTO_TCP) { | |
| return 0; | |
| } | |
| // read TCP info | |
| core_fill_in_tcp(skb, &id, &flags); | |
| // read TCP socket rtt and store it in nanoseconds | |
| ts = (struct tcp_sock *)(sk); | |
| rtt = BPF_CORE_READ(ts, srtt_us) >> 3; | |
| rtt *= 1000u; | |
| // check if this packet need to be filtered if filtering feature is enabled | |
| bool skip = check_and_do_flow_filtering(&id, flags, 0, eth_protocol, NULL, 0); | |
| if (skip) { | |
| return 0; | |
| } | |
| // update flow with rtt info | |
| ret = rtt_lookup_and_update_flow(&id, rtt); | |
| if (ret == 0) { | |
| return 0; | |
| } | |
| u64 current_time = bpf_ktime_get_ns(); | |
| additional_metrics new_flow; | |
| __builtin_memset(&new_flow, 0, sizeof(new_flow)); | |
| new_flow.start_mono_time_ts = current_time; | |
| new_flow.end_mono_time_ts = current_time; | |
| new_flow.eth_protocol = eth_protocol; | |
| new_flow.flow_rtt = rtt; | |
| ret = bpf_map_update_elem(&additional_flow_metrics, &id, &new_flow, BPF_NOEXIST); | |
| if (ret != 0) { | |
| if (trace_messages && ret != -EEXIST) { | |
| bpf_printk("error rtt track creating flow %d\n", ret); | |
| } | |
| if (ret == -EEXIST) { | |
| ret = rtt_lookup_and_update_flow(&id, rtt); | |
| if (trace_messages && ret != 0) { | |
| bpf_printk("error rtt track updating an existing flow %d\n", ret); | |
| } | |
| } | |
| } | |
| return 0; | |
| } | |
| SEC("fentry/tcp_rcv_established") | |
| int BPF_PROG(tcp_rcv_fentry, struct sock *sk, struct sk_buff *skb) { | |
| if (sk == NULL || skb == NULL || do_sampling == 0) { | |
| return 0; | |
| } | |
| return calculate_flow_rtt_tcp(sk, skb); | |
| } | |
| SEC("kprobe/tcp_rcv_established") | |
| int BPF_KPROBE(tcp_rcv_kprobe, struct sock *sk, struct sk_buff *skb) { | |
| if (sk == NULL || skb == NULL || do_sampling == 0) { | |
| return 0; | |
| } | |
| return calculate_flow_rtt_tcp(sk, skb); | |
| } | |
| #endif /* __RTT_TRACKER_H__ */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment