Skip to content

Instantly share code, notes, and snippets.

@oddmario
Last active February 8, 2026 12:07
Show Gist options
  • Select an option

  • Save oddmario/bf1058f0083e237d8a896e3c300c9285 to your computer and use it in GitHub Desktop.

Select an option

Save oddmario/bf1058f0083e237d8a896e3c300c9285 to your computer and use it in GitHub Desktop.
net_drops is a Python script which helps you monitor RX, TX and qdisc packet drops on Linux systems in a live way
# https://gist.github.com/oddmario/bf1058f0083e237d8a896e3c300c9285
import os
import time
import subprocess
import re
import argparse
import sys
def get_sysfs_value(iface, metric):
"""Reads a single integer value from /sys/class/net."""
path = f"/sys/class/net/{iface}/statistics/{metric}"
try:
with open(path, 'r') as f:
return int(f.read().strip())
except (IOError, ValueError):
return 0
def get_duplex(iface):
path = f"/sys/class/net/{iface}/duplex"
try:
with open(path, 'r') as f:
return str(f.read().strip())
except (IOError, ValueError):
return ""
def get_qdisc_drops(iface):
"""
Parses 'tc -s qdisc show dev <iface>' to get qdisc specific drops.
Returns the cumulative total of drops since boot/reset.
"""
try:
result = subprocess.run(
['tc', '-s', 'qdisc', 'show', 'dev', iface],
capture_output=True,
text=True
)
if result.returncode != 0:
return 0
drops = 0
# Find all instances of 'dropped' followed by a number
matches = re.findall(r'dropped\s+(\d+)', result.stdout)
for match in matches:
drops += int(match)
return drops
except Exception:
return 0
def get_interface_stats(iface):
"""Collects all relevant counters for an interface."""
return {
'rx_packets': get_sysfs_value(iface, 'rx_packets'),
'rx_dropped': get_sysfs_value(iface, 'rx_dropped'),
'tx_packets': get_sysfs_value(iface, 'tx_packets'),
'tx_dropped': get_sysfs_value(iface, 'tx_dropped'),
'qdisc_dropped': get_qdisc_drops(iface)
}
def calculate_rate(dropped_count, good_packet_count):
"""
Calculates percentage safely.
Total Events = Good Packets + Dropped Packets
"""
total_events = good_packet_count + dropped_count
if total_events == 0:
return 0.0
return (dropped_count / total_events) * 100
def main():
parser = argparse.ArgumentParser(description="Monitor Network Drop Rates (Live vs Cumulative)")
parser.add_argument("interface", help="The network interface (e.g., eth0)")
parser.add_argument("-i", "--interval", type=int, default=1, help="Update interval in seconds")
args = parser.parse_args()
iface = args.interface
if not os.path.exists(f"/sys/class/net/{iface}"):
print(f"Error: Interface '{iface}' not found.")
sys.exit(1)
currDuplex = get_duplex(iface)
if currDuplex != "full":
print(f"WARNING: The current duplex of your interface is '{currDuplex}'. The duplex should be 'full' on wired bare-metal servers to ensure that no packet loss will happen. Consider checking if the auto-negotiation behavior of your network is faulty.")
print("The duplex is usually 'unknown' or unset on VMs using virtio-net and also on wireless interfaces.")
print("")
print("Recommendations:")
print("- If you see a high TX drop rate, consider increasing the txqueuelen of your interface.")
print("- If you see a high RX drop rate, consider increasing your kernel net.core.netdev_max_backlog.")
print("")
print("- If you see a high qdisc drop rate, consider increasing your qdisc's max packets queue limit (note that for some qdiscs like fq and fq_codel, it's normal to have a few drops every now and then because such qdiscs intentionally drop packets to work properly; as long as the drops aren't huge like thousands per second).")
print("You can check if you're hitting your qdisc max packets queue size by running `watch -n 0.1 \"tc -s qdisc show dev [iface name] | grep backlog\"`")
print("The output will be like 'backlog 9084b 2p requeues 0'. This means that there are currently 2 packets in the qdisc queue")
print("If you're using the fq qdisc: If you're seeing qdisc drops but your currently-queued fq packets aren't exceeding your fq queue size limit, then maybe the drops are happening because the flow_limit of certain connections is being hit. Note that increasing the flow_limit of fq is usually NOT a good idea because it's one of the main features of fq which help avoiding bufferbloat by limiting the number of packets per connection.")
print("")
print("- If increasing the txqueuelen, qdisc queue size and/or the netdev_max_backlog doesn't solve your packet drops, consider increasing your NIC's RX/TX ring buffer sizes using ethtool.")
print("- A 'high drop rate' is something >= 0.5% or 1.0% (or a continuously increasing drop rate every few seconds/milliseconds)")
print("- If none of the above solve your problems, you may be running out of bandwidth or you need a better NIC (or maybe there's a [temporary] problem with your ISP/hosting provider's network).")
print("- You can also try to increase the kernel's TCP and UDP buffer sizes to absorb any excessive packet drops.")
print("- Make sure NOT to increase any queues/buffer sizes by crazy amounts to avoid causing bufferbloating on your network (especially the TX queues/buffer sizes).")
print("")
print(f"Monitoring {iface} (Interval: {args.interval}s)")
print("Rates displayed as: Current Interval / Total Since Boot")
print("-" * 85)
# Header formatting
headers = f"{'Time':<10} | {'RX Drop % (Now/Tot)':<22} | {'TX Drop % (Now/Tot)':<22} | {'Qdisc % (Now/Tot)':<22}"
print(headers)
print("-" * 85)
try:
# Initialize baseline for delta calculations
prev_stats = get_interface_stats(iface)
while True:
time.sleep(args.interval)
curr_stats = get_interface_stats(iface)
# --- 1. Calculate Deltas (Traffic in the last interval) ---
d_rx_pkts = curr_stats['rx_packets'] - prev_stats['rx_packets']
d_rx_drop = curr_stats['rx_dropped'] - prev_stats['rx_dropped']
d_tx_pkts = curr_stats['tx_packets'] - prev_stats['tx_packets']
d_tx_drop = curr_stats['tx_dropped'] - prev_stats['tx_dropped']
d_qdisc_drop = curr_stats['qdisc_dropped'] - prev_stats['qdisc_dropped']
# Delta Rates
rx_rate_now = calculate_rate(d_rx_drop, d_rx_pkts)
tx_rate_now = calculate_rate(d_tx_drop, d_tx_pkts)
qdisc_rate_now = calculate_rate(d_qdisc_drop, d_tx_pkts) # Qdisc is usually on TX path
# --- 2. Calculate Totals (Traffic since boot) ---
# We use the raw counters directly from curr_stats
rx_rate_tot = calculate_rate(curr_stats['rx_dropped'], curr_stats['rx_packets'])
tx_rate_tot = calculate_rate(curr_stats['tx_dropped'], curr_stats['tx_packets'])
qdisc_rate_tot = calculate_rate(curr_stats['qdisc_dropped'], curr_stats['tx_packets'])
# --- 3. Formatting Output ---
# Creates strings like "0.05% / 1.20%"
rx_str = f"{rx_rate_now:.2f}% / {rx_rate_tot:.2f}%"
tx_str = f"{tx_rate_now:.2f}% / {tx_rate_tot:.2f}%"
qd_str = f"{qdisc_rate_now:.2f}% / {qdisc_rate_tot:.2f}%"
timestamp = time.strftime("%H:%M:%S")
print(f"{timestamp:<10} | {rx_str:<22} | {tx_str:<22} | {qd_str:<22}")
# Update previous stats for the next loop
prev_stats = curr_stats
except KeyboardInterrupt:
print("\nStopping calculation.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment