Skip to content

Instantly share code, notes, and snippets.

@superboum
Created December 18, 2025 07:59
Show Gist options
  • Select an option

  • Save superboum/16cfcfa27f4dc0a0a77468876a50156b to your computer and use it in GitHub Desktop.

Select an option

Save superboum/16cfcfa27f4dc0a0a77468876a50156b to your computer and use it in GitHub Desktop.
TCP RTT Linux EBPF
//go:build ignore
#include "common.h"
#include "bpf_endian.h"
#include "bpf_tracing.h"
#define AF_INET 2
char __license[] SEC("license") = "Dual MIT/GPL";
/**
* For CO-RE relocatable eBPF programs, __attribute__((preserve_access_index))
* preserves the offset of the specified fields in the original kernel struct.
* So here we don't need to include "vmlinux.h". Instead we only need to define
* the kernel struct and their fields the eBPF program actually requires.
*
* Also note that BTF-enabled programs like fentry, fexit, fmod_ret, tp_btf,
* lsm, etc. declared using the BPF_PROG macro can read kernel memory without
* needing to call bpf_probe_read*().
*/
/**
* struct sock_common is the minimal network layer representation of sockets.
* This is a simplified copy of the kernel's struct sock_common.
* This copy contains only the fields needed for this example to
* fetch the source and destination port numbers and IP addresses.
*/
struct sock_common {
union {
struct {
// skc_daddr is destination IP address
__be32 skc_daddr;
// skc_rcv_saddr is the source IP address
__be32 skc_rcv_saddr;
};
};
union {
struct {
// skc_dport is the destination TCP/UDP port
__be16 skc_dport;
// skc_num is the source TCP/UDP port
__u16 skc_num;
};
};
// skc_family is the network address family (2 for IPV4)
short unsigned int skc_family;
} __attribute__((preserve_access_index));
/**
* struct sock is the network layer representation of sockets.
* This is a simplified copy of the kernel's struct sock.
* This copy is needed only to access struct sock_common.
*/
struct sock {
struct sock_common __sk_common;
} __attribute__((preserve_access_index));
/**
* struct tcp_sock is the Linux representation of a TCP socket.
* This is a simplified copy of the kernel's struct tcp_sock.
* For this example we only need srtt_us to read the smoothed RTT.
*/
struct tcp_sock {
u32 srtt_us;
} __attribute__((preserve_access_index));
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 1 << 24);
__type(value, struct event);
} events SEC(".maps");
/**
* The sample submitted to userspace over a ring buffer.
* Emit struct event's type info into the ELF's BTF so bpf2go
* can generate a Go type from it.
*/
struct event {
u16 sport;
u16 dport;
u32 saddr;
u32 daddr;
u32 srtt;
};
SEC("fentry/tcp_close")
int BPF_PROG(tcp_close, struct sock *sk) {
if (sk->__sk_common.skc_family != AF_INET) {
return 0;
}
// The input struct sock is actually a tcp_sock, so we can type-cast
struct tcp_sock *ts = bpf_skc_to_tcp_sock(sk);
if (!ts) {
return 0;
}
struct event *tcp_info;
tcp_info = bpf_ringbuf_reserve(&events, sizeof(struct event), 0);
if (!tcp_info) {
return 0;
}
tcp_info->saddr = sk->__sk_common.skc_rcv_saddr;
tcp_info->daddr = sk->__sk_common.skc_daddr;
tcp_info->dport = bpf_ntohs(sk->__sk_common.skc_dport);
tcp_info->sport = sk->__sk_common.skc_num;
tcp_info->srtt = ts->srtt_us >> 3;
tcp_info->srtt /= 1000;
bpf_ringbuf_submit(tcp_info, 0);
return 0;
}
/*
A simple RTT tracker implemented using eBPF fentry hook to read RTT from TCP socket.
*/
#ifndef __RTT_TRACKER_H__
#define __RTT_TRACKER_H__
#include <bpf_tracing.h>
#include "utils.h"
#include "maps_definition.h"
static inline int rtt_lookup_and_update_flow(flow_id *id, u64 rtt) {
additional_metrics *extra_metrics = bpf_map_lookup_elem(&additional_flow_metrics, id);
if (extra_metrics != NULL) {
extra_metrics->end_mono_time_ts = bpf_ktime_get_ns();
if (extra_metrics->flow_rtt < rtt) {
extra_metrics->flow_rtt = rtt;
}
return 0;
}
return -1;
}
static inline int calculate_flow_rtt_tcp(struct sock *sk, struct sk_buff *skb) {
u8 dscp = 0, protocol = 0;
struct tcp_sock *ts;
u16 family = 0, flags = 0, eth_protocol = 0;
u64 rtt = 0;
int ret = 0;
flow_id id;
if (!enable_rtt) {
return 0;
}
__builtin_memset(&id, 0, sizeof(id));
u32 if_index = BPF_CORE_READ(skb, skb_iif);
// filter out TCP sockets with unknown or loopback interface
if (if_index == 0 || if_index == 1) {
return 0;
}
// read L2 info
core_fill_in_l2(skb, &eth_protocol, &family);
// read L3 info
core_fill_in_l3(skb, &id, family, &protocol, &dscp);
if (protocol != IPPROTO_TCP) {
return 0;
}
// read TCP info
core_fill_in_tcp(skb, &id, &flags);
// read TCP socket rtt and store it in nanoseconds
ts = (struct tcp_sock *)(sk);
rtt = BPF_CORE_READ(ts, srtt_us) >> 3;
rtt *= 1000u;
// check if this packet need to be filtered if filtering feature is enabled
bool skip = check_and_do_flow_filtering(&id, flags, 0, eth_protocol, NULL, 0);
if (skip) {
return 0;
}
// update flow with rtt info
ret = rtt_lookup_and_update_flow(&id, rtt);
if (ret == 0) {
return 0;
}
u64 current_time = bpf_ktime_get_ns();
additional_metrics new_flow;
__builtin_memset(&new_flow, 0, sizeof(new_flow));
new_flow.start_mono_time_ts = current_time;
new_flow.end_mono_time_ts = current_time;
new_flow.eth_protocol = eth_protocol;
new_flow.flow_rtt = rtt;
ret = bpf_map_update_elem(&additional_flow_metrics, &id, &new_flow, BPF_NOEXIST);
if (ret != 0) {
if (trace_messages && ret != -EEXIST) {
bpf_printk("error rtt track creating flow %d\n", ret);
}
if (ret == -EEXIST) {
ret = rtt_lookup_and_update_flow(&id, rtt);
if (trace_messages && ret != 0) {
bpf_printk("error rtt track updating an existing flow %d\n", ret);
}
}
}
return 0;
}
SEC("fentry/tcp_rcv_established")
int BPF_PROG(tcp_rcv_fentry, struct sock *sk, struct sk_buff *skb) {
if (sk == NULL || skb == NULL || do_sampling == 0) {
return 0;
}
return calculate_flow_rtt_tcp(sk, skb);
}
SEC("kprobe/tcp_rcv_established")
int BPF_KPROBE(tcp_rcv_kprobe, struct sock *sk, struct sk_buff *skb) {
if (sk == NULL || skb == NULL || do_sampling == 0) {
return 0;
}
return calculate_flow_rtt_tcp(sk, skb);
}
#endif /* __RTT_TRACKER_H__ */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment