Last active
February 8, 2026 12:07
-
-
Save oddmario/bf1058f0083e237d8a896e3c300c9285 to your computer and use it in GitHub Desktop.
net_drops is a Python script which helps you monitor RX, TX and qdisc packet drops on Linux systems in a live way
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # https://gist.github.com/oddmario/bf1058f0083e237d8a896e3c300c9285 | |
| import os | |
| import time | |
| import subprocess | |
| import re | |
| import argparse | |
| import sys | |
| def get_sysfs_value(iface, metric): | |
| """Reads a single integer value from /sys/class/net.""" | |
| path = f"/sys/class/net/{iface}/statistics/{metric}" | |
| try: | |
| with open(path, 'r') as f: | |
| return int(f.read().strip()) | |
| except (IOError, ValueError): | |
| return 0 | |
| def get_duplex(iface): | |
| path = f"/sys/class/net/{iface}/duplex" | |
| try: | |
| with open(path, 'r') as f: | |
| return str(f.read().strip()) | |
| except (IOError, ValueError): | |
| return "" | |
| def get_qdisc_drops(iface): | |
| """ | |
| Parses 'tc -s qdisc show dev <iface>' to get qdisc specific drops. | |
| Returns the cumulative total of drops since boot/reset. | |
| """ | |
| try: | |
| result = subprocess.run( | |
| ['tc', '-s', 'qdisc', 'show', 'dev', iface], | |
| capture_output=True, | |
| text=True | |
| ) | |
| if result.returncode != 0: | |
| return 0 | |
| drops = 0 | |
| # Find all instances of 'dropped' followed by a number | |
| matches = re.findall(r'dropped\s+(\d+)', result.stdout) | |
| for match in matches: | |
| drops += int(match) | |
| return drops | |
| except Exception: | |
| return 0 | |
| def get_interface_stats(iface): | |
| """Collects all relevant counters for an interface.""" | |
| return { | |
| 'rx_packets': get_sysfs_value(iface, 'rx_packets'), | |
| 'rx_dropped': get_sysfs_value(iface, 'rx_dropped'), | |
| 'tx_packets': get_sysfs_value(iface, 'tx_packets'), | |
| 'tx_dropped': get_sysfs_value(iface, 'tx_dropped'), | |
| 'qdisc_dropped': get_qdisc_drops(iface) | |
| } | |
| def calculate_rate(dropped_count, good_packet_count): | |
| """ | |
| Calculates percentage safely. | |
| Total Events = Good Packets + Dropped Packets | |
| """ | |
| total_events = good_packet_count + dropped_count | |
| if total_events == 0: | |
| return 0.0 | |
| return (dropped_count / total_events) * 100 | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Monitor Network Drop Rates (Live vs Cumulative)") | |
| parser.add_argument("interface", help="The network interface (e.g., eth0)") | |
| parser.add_argument("-i", "--interval", type=int, default=1, help="Update interval in seconds") | |
| args = parser.parse_args() | |
| iface = args.interface | |
| if not os.path.exists(f"/sys/class/net/{iface}"): | |
| print(f"Error: Interface '{iface}' not found.") | |
| sys.exit(1) | |
| currDuplex = get_duplex(iface) | |
| if currDuplex != "full": | |
| print(f"WARNING: The current duplex of your interface is '{currDuplex}'. The duplex should be 'full' on wired bare-metal servers to ensure that no packet loss will happen. Consider checking if the auto-negotiation behavior of your network is faulty.") | |
| print("The duplex is usually 'unknown' or unset on VMs using virtio-net and also on wireless interfaces.") | |
| print("") | |
| print("Recommendations:") | |
| print("- If you see a high TX drop rate, consider increasing the txqueuelen of your interface.") | |
| print("- If you see a high RX drop rate, consider increasing your kernel net.core.netdev_max_backlog.") | |
| print("") | |
| print("- If you see a high qdisc drop rate, consider increasing your qdisc's max packets queue limit (note that for some qdiscs like fq and fq_codel, it's normal to have a few drops every now and then because such qdiscs intentionally drop packets to work properly; as long as the drops aren't huge like thousands per second).") | |
| print("You can check if you're hitting your qdisc max packets queue size by running `watch -n 0.1 \"tc -s qdisc show dev [iface name] | grep backlog\"`") | |
| print("The output will be like 'backlog 9084b 2p requeues 0'. This means that there are currently 2 packets in the qdisc queue") | |
| print("If you're using the fq qdisc: If you're seeing qdisc drops but your currently-queued fq packets aren't exceeding your fq queue size limit, then maybe the drops are happening because the flow_limit of certain connections is being hit. Note that increasing the flow_limit of fq is usually NOT a good idea because it's one of the main features of fq which help avoiding bufferbloat by limiting the number of packets per connection.") | |
| print("") | |
| print("- If increasing the txqueuelen, qdisc queue size and/or the netdev_max_backlog doesn't solve your packet drops, consider increasing your NIC's RX/TX ring buffer sizes using ethtool.") | |
| print("- A 'high drop rate' is something >= 0.5% or 1.0% (or a continuously increasing drop rate every few seconds/milliseconds)") | |
| print("- If none of the above solve your problems, you may be running out of bandwidth or you need a better NIC (or maybe there's a [temporary] problem with your ISP/hosting provider's network).") | |
| print("- You can also try to increase the kernel's TCP and UDP buffer sizes to absorb any excessive packet drops.") | |
| print("- Make sure NOT to increase any queues/buffer sizes by crazy amounts to avoid causing bufferbloating on your network (especially the TX queues/buffer sizes).") | |
| print("") | |
| print(f"Monitoring {iface} (Interval: {args.interval}s)") | |
| print("Rates displayed as: Current Interval / Total Since Boot") | |
| print("-" * 85) | |
| # Header formatting | |
| headers = f"{'Time':<10} | {'RX Drop % (Now/Tot)':<22} | {'TX Drop % (Now/Tot)':<22} | {'Qdisc % (Now/Tot)':<22}" | |
| print(headers) | |
| print("-" * 85) | |
| try: | |
| # Initialize baseline for delta calculations | |
| prev_stats = get_interface_stats(iface) | |
| while True: | |
| time.sleep(args.interval) | |
| curr_stats = get_interface_stats(iface) | |
| # --- 1. Calculate Deltas (Traffic in the last interval) --- | |
| d_rx_pkts = curr_stats['rx_packets'] - prev_stats['rx_packets'] | |
| d_rx_drop = curr_stats['rx_dropped'] - prev_stats['rx_dropped'] | |
| d_tx_pkts = curr_stats['tx_packets'] - prev_stats['tx_packets'] | |
| d_tx_drop = curr_stats['tx_dropped'] - prev_stats['tx_dropped'] | |
| d_qdisc_drop = curr_stats['qdisc_dropped'] - prev_stats['qdisc_dropped'] | |
| # Delta Rates | |
| rx_rate_now = calculate_rate(d_rx_drop, d_rx_pkts) | |
| tx_rate_now = calculate_rate(d_tx_drop, d_tx_pkts) | |
| qdisc_rate_now = calculate_rate(d_qdisc_drop, d_tx_pkts) # Qdisc is usually on TX path | |
| # --- 2. Calculate Totals (Traffic since boot) --- | |
| # We use the raw counters directly from curr_stats | |
| rx_rate_tot = calculate_rate(curr_stats['rx_dropped'], curr_stats['rx_packets']) | |
| tx_rate_tot = calculate_rate(curr_stats['tx_dropped'], curr_stats['tx_packets']) | |
| qdisc_rate_tot = calculate_rate(curr_stats['qdisc_dropped'], curr_stats['tx_packets']) | |
| # --- 3. Formatting Output --- | |
| # Creates strings like "0.05% / 1.20%" | |
| rx_str = f"{rx_rate_now:.2f}% / {rx_rate_tot:.2f}%" | |
| tx_str = f"{tx_rate_now:.2f}% / {tx_rate_tot:.2f}%" | |
| qd_str = f"{qdisc_rate_now:.2f}% / {qdisc_rate_tot:.2f}%" | |
| timestamp = time.strftime("%H:%M:%S") | |
| print(f"{timestamp:<10} | {rx_str:<22} | {tx_str:<22} | {qd_str:<22}") | |
| # Update previous stats for the next loop | |
| prev_stats = curr_stats | |
| except KeyboardInterrupt: | |
| print("\nStopping calculation.") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment