superboum · December 18, 2025 07:59
diff --git a/cilium_rtt_example.c b/cilium_rtt_example.c
 //go:build ignore

 #include "common.h"

 #include "bpf_endian.h"
 #include "bpf_tracing.h"

 #define AF_INET 2

 char __license[] SEC("license") = "Dual MIT/GPL";

 /**
 * For CO-RE relocatable eBPF programs, __attribute__((preserve_access_index))
 * preserves the offset of the specified fields in the original kernel struct.
 * So here we don't need to include "vmlinux.h". Instead we only need to define
 * the kernel struct and their fields the eBPF program actually requires.
 *
 * Also note that BTF-enabled programs like fentry, fexit, fmod_ret, tp_btf,
 * lsm, etc. declared using the BPF_PROG macro can read kernel memory without
 * needing to call bpf_probe_read*().
 */

 /**
 * struct sock_common is the minimal network layer representation of sockets.
 * This is a simplified copy of the kernel's struct sock_common.
 * This copy contains only the fields needed for this example to
 * fetch the source and destination port numbers and IP addresses.
 */
 struct sock_common {
 	union {
 		struct {
 			// skc_daddr is destination IP address
 			__be32 skc_daddr;
 			// skc_rcv_saddr is the source IP address
 			__be32 skc_rcv_saddr;
 		};
 	};
 	union {
 		struct {
 			// skc_dport is the destination TCP/UDP port
 			__be16 skc_dport;
 			// skc_num is the source TCP/UDP port
 			__u16 skc_num;
 		};
 	};
 	// skc_family is the network address family (2 for IPV4)
 	short unsigned int skc_family;
 } __attribute__((preserve_access_index));

 /**
 * struct sock is the network layer representation of sockets.
 * This is a simplified copy of the kernel's struct sock.
 * This copy is needed only to access struct sock_common.
 */
 struct sock {
 	struct sock_common __sk_common;
 } __attribute__((preserve_access_index));

 /**
 * struct tcp_sock is the Linux representation of a TCP socket.
 * This is a simplified copy of the kernel's struct tcp_sock.
 * For this example we only need srtt_us to read the smoothed RTT.
 */
 struct tcp_sock {
 	u32 srtt_us;
 } __attribute__((preserve_access_index));

 struct {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
 	__uint(max_entries, 1 << 24);
 	__type(value, struct event);
 } events SEC(".maps");

 /**
 * The sample submitted to userspace over a ring buffer.
 * Emit struct event's type info into the ELF's BTF so bpf2go
 * can generate a Go type from it.
 */
 struct event {
 	u16 sport;
 	u16 dport;
 	u32 saddr;
 	u32 daddr;
 	u32 srtt;
 };

 SEC("fentry/tcp_close")
 int BPF_PROG(tcp_close, struct sock *sk) {
 	if (sk->__sk_common.skc_family != AF_INET) {
 		return 0;
 	}

 	// The input struct sock is actually a tcp_sock, so we can type-cast
 	struct tcp_sock *ts = bpf_skc_to_tcp_sock(sk);
 	if (!ts) {
 		return 0;
 	}

 	struct event *tcp_info;
 	tcp_info = bpf_ringbuf_reserve(&events, sizeof(struct event), 0);
 	if (!tcp_info) {
 		return 0;
 	}

 	tcp_info->saddr = sk->__sk_common.skc_rcv_saddr;
 	tcp_info->daddr = sk->__sk_common.skc_daddr;
 	tcp_info->dport = bpf_ntohs(sk->__sk_common.skc_dport);
 	tcp_info->sport = sk->__sk_common.skc_num;

 	tcp_info->srtt = ts->srtt_us >> 3;
 	tcp_info->srtt /= 1000;

 	bpf_ringbuf_submit(tcp_info, 0);

 	return 0;
 }
diff --git a/netobserv_rtt.c b/netobserv_rtt.c
 /*
    A simple RTT tracker implemented using eBPF fentry hook to read RTT from TCP socket.
 */

 #ifndef __RTT_TRACKER_H__
 #define __RTT_TRACKER_H__

 #include <bpf_tracing.h>
 #include "utils.h"
 #include "maps_definition.h"

 static inline int rtt_lookup_and_update_flow(flow_id *id, u64 rtt) {
    additional_metrics *extra_metrics = bpf_map_lookup_elem(&additional_flow_metrics, id);
    if (extra_metrics != NULL) {
        extra_metrics->end_mono_time_ts = bpf_ktime_get_ns();
        if (extra_metrics->flow_rtt < rtt) {
            extra_metrics->flow_rtt = rtt;
        }
        return 0;
    }
    return -1;
 }

 static inline int calculate_flow_rtt_tcp(struct sock *sk, struct sk_buff *skb) {
    u8 dscp = 0, protocol = 0;
    struct tcp_sock *ts;
    u16 family = 0, flags = 0, eth_protocol = 0;
    u64 rtt = 0;
    int ret = 0;
    flow_id id;

    if (!enable_rtt) {
        return 0;
    }
    __builtin_memset(&id, 0, sizeof(id));

    u32 if_index = BPF_CORE_READ(skb, skb_iif);
    // filter out TCP sockets with unknown or loopback interface
    if (if_index == 0 || if_index == 1) {
        return 0;
    }

    // read L2 info
    core_fill_in_l2(skb, &eth_protocol, &family);

    // read L3 info
    core_fill_in_l3(skb, &id, family, &protocol, &dscp);

    if (protocol != IPPROTO_TCP) {
        return 0;
    }

    // read TCP info
    core_fill_in_tcp(skb, &id, &flags);

    // read TCP socket rtt and store it in nanoseconds
    ts = (struct tcp_sock *)(sk);
    rtt = BPF_CORE_READ(ts, srtt_us) >> 3;
    rtt *= 1000u;

    // check if this packet need to be filtered if filtering feature is enabled
    bool skip = check_and_do_flow_filtering(&id, flags, 0, eth_protocol, NULL, 0);
    if (skip) {
        return 0;
    }

    // update flow with rtt info
    ret = rtt_lookup_and_update_flow(&id, rtt);
    if (ret == 0) {
        return 0;
    }

    u64 current_time = bpf_ktime_get_ns();
    additional_metrics new_flow;
    __builtin_memset(&new_flow, 0, sizeof(new_flow));
    new_flow.start_mono_time_ts = current_time;
    new_flow.end_mono_time_ts = current_time;
    new_flow.eth_protocol = eth_protocol;
    new_flow.flow_rtt = rtt;
    ret = bpf_map_update_elem(&additional_flow_metrics, &id, &new_flow, BPF_NOEXIST);
    if (ret != 0) {
        if (trace_messages && ret != -EEXIST) {
            bpf_printk("error rtt track creating flow %d\n", ret);
        }
        if (ret == -EEXIST) {
            ret = rtt_lookup_and_update_flow(&id, rtt);
            if (trace_messages && ret != 0) {
                bpf_printk("error rtt track updating an existing flow %d\n", ret);
            }
        }
    }

    return 0;
 }

 SEC("fentry/tcp_rcv_established")
 int BPF_PROG(tcp_rcv_fentry, struct sock *sk, struct sk_buff *skb) {
    if (sk == NULL || skb == NULL || do_sampling == 0) {
        return 0;
    }
    return calculate_flow_rtt_tcp(sk, skb);
 }

 SEC("kprobe/tcp_rcv_established")
 int BPF_KPROBE(tcp_rcv_kprobe, struct sock *sk, struct sk_buff *skb) {
    if (sk == NULL || skb == NULL || do_sampling == 0) {
        return 0;
    }
    return calculate_flow_rtt_tcp(sk, skb);
 }

 #endif /* __RTT_TRACKER_H__ */
	//go:build ignore

	#include "common.h"

	#include "bpf_endian.h"
	#include "bpf_tracing.h"

	#define AF_INET 2

	char __license[] SEC("license") = "Dual MIT/GPL";

	/**
	* For CO-RE relocatable eBPF programs, __attribute__((preserve_access_index))
	* preserves the offset of the specified fields in the original kernel struct.
	* So here we don't need to include "vmlinux.h". Instead we only need to define
	* the kernel struct and their fields the eBPF program actually requires.
	*
	* Also note that BTF-enabled programs like fentry, fexit, fmod_ret, tp_btf,
	* lsm, etc. declared using the BPF_PROG macro can read kernel memory without
	* needing to call bpf_probe_read*().
	*/

	/**
	* struct sock_common is the minimal network layer representation of sockets.
	* This is a simplified copy of the kernel's struct sock_common.
	* This copy contains only the fields needed for this example to
	* fetch the source and destination port numbers and IP addresses.
	*/
	struct sock_common {
	union {
	struct {
	// skc_daddr is destination IP address
	__be32 skc_daddr;
	// skc_rcv_saddr is the source IP address
	__be32 skc_rcv_saddr;
	};
	};
	union {
	struct {
	// skc_dport is the destination TCP/UDP port
	__be16 skc_dport;
	// skc_num is the source TCP/UDP port
	__u16 skc_num;
	};
	};
	// skc_family is the network address family (2 for IPV4)
	short unsigned int skc_family;
	} __attribute__((preserve_access_index));

	/**
	* struct sock is the network layer representation of sockets.
	* This is a simplified copy of the kernel's struct sock.
	* This copy is needed only to access struct sock_common.
	*/
	struct sock {
	struct sock_common __sk_common;
	} __attribute__((preserve_access_index));

	/**
	* struct tcp_sock is the Linux representation of a TCP socket.
	* This is a simplified copy of the kernel's struct tcp_sock.
	* For this example we only need srtt_us to read the smoothed RTT.
	*/
	struct tcp_sock {
	u32 srtt_us;
	} __attribute__((preserve_access_index));

	struct {
	__uint(type, BPF_MAP_TYPE_RINGBUF);
	__uint(max_entries, 1 << 24);
	__type(value, struct event);
	} events SEC(".maps");

	/**
	* The sample submitted to userspace over a ring buffer.
	* Emit struct event's type info into the ELF's BTF so bpf2go
	* can generate a Go type from it.
	*/
	struct event {
	u16 sport;
	u16 dport;
	u32 saddr;
	u32 daddr;
	u32 srtt;
	};

	SEC("fentry/tcp_close")
	int BPF_PROG(tcp_close, struct sock *sk) {
	if (sk->__sk_common.skc_family != AF_INET) {
	return 0;
	}

	// The input struct sock is actually a tcp_sock, so we can type-cast
	struct tcp_sock *ts = bpf_skc_to_tcp_sock(sk);
	if (!ts) {
	return 0;
	}

	struct event *tcp_info;
	tcp_info = bpf_ringbuf_reserve(&events, sizeof(struct event), 0);
	if (!tcp_info) {
	return 0;
	}

	tcp_info->saddr = sk->__sk_common.skc_rcv_saddr;
	tcp_info->daddr = sk->__sk_common.skc_daddr;
	tcp_info->dport = bpf_ntohs(sk->__sk_common.skc_dport);
	tcp_info->sport = sk->__sk_common.skc_num;

	tcp_info->srtt = ts->srtt_us >> 3;
	tcp_info->srtt /= 1000;

	bpf_ringbuf_submit(tcp_info, 0);

	return 0;
	}
	/*
	A simple RTT tracker implemented using eBPF fentry hook to read RTT from TCP socket.
	*/

	#ifndef __RTT_TRACKER_H__
	#define __RTT_TRACKER_H__

	#include <bpf_tracing.h>
	#include "utils.h"
	#include "maps_definition.h"

	static inline int rtt_lookup_and_update_flow(flow_id *id, u64 rtt) {
	additional_metrics *extra_metrics = bpf_map_lookup_elem(&additional_flow_metrics, id);
	if (extra_metrics != NULL) {
	extra_metrics->end_mono_time_ts = bpf_ktime_get_ns();
	if (extra_metrics->flow_rtt < rtt) {
	extra_metrics->flow_rtt = rtt;
	}
	return 0;
	}
	return -1;
	}

	static inline int calculate_flow_rtt_tcp(struct sock sk, struct sk_buff skb) {
	u8 dscp = 0, protocol = 0;
	struct tcp_sock *ts;
	u16 family = 0, flags = 0, eth_protocol = 0;
	u64 rtt = 0;
	int ret = 0;
	flow_id id;

	if (!enable_rtt) {
	return 0;
	}
	__builtin_memset(&id, 0, sizeof(id));

	u32 if_index = BPF_CORE_READ(skb, skb_iif);
	// filter out TCP sockets with unknown or loopback interface
	if (if_index == 0 \|\| if_index == 1) {
	return 0;
	}

	// read L2 info
	core_fill_in_l2(skb, &eth_protocol, &family);

	// read L3 info
	core_fill_in_l3(skb, &id, family, &protocol, &dscp);

	if (protocol != IPPROTO_TCP) {
	return 0;
	}

	// read TCP info
	core_fill_in_tcp(skb, &id, &flags);

	// read TCP socket rtt and store it in nanoseconds
	ts = (struct tcp_sock *)(sk);
	rtt = BPF_CORE_READ(ts, srtt_us) >> 3;
	rtt *= 1000u;

	// check if this packet need to be filtered if filtering feature is enabled
	bool skip = check_and_do_flow_filtering(&id, flags, 0, eth_protocol, NULL, 0);
	if (skip) {
	return 0;
	}

	// update flow with rtt info
	ret = rtt_lookup_and_update_flow(&id, rtt);
	if (ret == 0) {
	return 0;
	}

	u64 current_time = bpf_ktime_get_ns();
	additional_metrics new_flow;
	__builtin_memset(&new_flow, 0, sizeof(new_flow));
	new_flow.start_mono_time_ts = current_time;
	new_flow.end_mono_time_ts = current_time;
	new_flow.eth_protocol = eth_protocol;
	new_flow.flow_rtt = rtt;
	ret = bpf_map_update_elem(&additional_flow_metrics, &id, &new_flow, BPF_NOEXIST);
	if (ret != 0) {
	if (trace_messages && ret != -EEXIST) {
	bpf_printk("error rtt track creating flow %d\n", ret);
	}
	if (ret == -EEXIST) {
	ret = rtt_lookup_and_update_flow(&id, rtt);
	if (trace_messages && ret != 0) {
	bpf_printk("error rtt track updating an existing flow %d\n", ret);
	}
	}
	}

	return 0;
	}

	SEC("fentry/tcp_rcv_established")
	int BPF_PROG(tcp_rcv_fentry, struct sock sk, struct sk_buff skb) {
	if (sk == NULL \|\| skb == NULL \|\| do_sampling == 0) {
	return 0;
	}
	return calculate_flow_rtt_tcp(sk, skb);
	}

	SEC("kprobe/tcp_rcv_established")
	int BPF_KPROBE(tcp_rcv_kprobe, struct sock sk, struct sk_buff skb) {
	if (sk == NULL \|\| skb == NULL \|\| do_sampling == 0) {
	return 0;
	}
	return calculate_flow_rtt_tcp(sk, skb);
	}

	#endif /* __RTT_TRACKER_H__ */