Skip to content

Instantly share code, notes, and snippets.

@teleboas
Forked from ponceto/haproxy.cfg
Created December 20, 2025 08:32
Show Gist options
  • Select an option

  • Save teleboas/a97287e0447f8932f7f2fa634d55111d to your computer and use it in GitHub Desktop.

Select an option

Save teleboas/a97287e0447f8932f7f2fa634d55111d to your computer and use it in GitHub Desktop.
HAProxy full configuration with throttling for IA bots
#-----------------------------------------------------------------------------
# global: please note the localpeer declaration
#-----------------------------------------------------------------------------
global
localpeer self
log /dev/log local0
log /dev/log local1 notice
chroot /var/lib/haproxy
stats socket /run/haproxy/admin.sock mode 660 level admin
stats timeout 30s
user haproxy
group haproxy
daemon
# Default SSL material locations
ca-base /etc/ssl/certs
crt-base /etc/ssl/private
# See: https://ssl-config.mozilla.org/#server=haproxy&server-version=2.0.3&config=intermediate
ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets
#-----------------------------------------------------------------------------
# defaults: those good old defaults
#-----------------------------------------------------------------------------
defaults
log global
mode http
option httplog
option dontlognull
timeout connect 5000
timeout client 50000
timeout server 50000
errorfile 400 /etc/haproxy/errors/400.http
errorfile 403 /etc/haproxy/errors/403.http
errorfile 408 /etc/haproxy/errors/408.http
errorfile 500 /etc/haproxy/errors/500.http
errorfile 502 /etc/haproxy/errors/502.http
errorfile 503 /etc/haproxy/errors/503.http
errorfile 504 /etc/haproxy/errors/504.http
#-----------------------------------------------------------------------------
# peers: setup the peers but above all the tables used to track bad bots
#-----------------------------------------------------------------------------
peers cluster
peer self localhost:9999
table bad-bots-by-ip type ip size 512k expire 10m store http_req_rate(5m)
table bad-bots-by-ua type string len 128 size 512k expire 10m store http_req_rate(5m)
#-----------------------------------------------------------------------------
# frontend: http frontend
#-----------------------------------------------------------------------------
frontend fe-http
bind *:80,:::80
# Define ACLs; acl are written and named with readability in mind
acl has-acme-challenge path_beg /.well-known/acme-challenge/
acl has-leading-site-1 hdr_beg(host) -i site-1.
acl has-leading-site-2 hdr_beg(host) -i site-2.
acl dom-any-domain hdr_len(host) gt 0
acl dom-example-com hdr_dom(host) -i example.com
# 1) Get https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/refs/heads/main/haproxy-block-ai-bots.txt
# 2) Copy the file to /etc/haproxy/bad-bots.txt
# 3) Adjust the rate by ip and user-agent (default is 450 hits / 5 minutes)
acl is-bad-bot hdr_sub(user-agent) -i -f /etc/haproxy/bad-bots.txt
acl too-many-bad-bots-requests sc_http_req_rate(0) gt 450
acl too-many-bad-bots-requests sc_http_req_rate(1) gt 450
# Filter requests; here we are tracking and throttling bad bots
http-request track-sc0 src table cluster/bad-bots-by-ip if is-bad-bot
http-request track-sc1 req.hdr(user-agent) table cluster/bad-bots-by-ua if is-bad-bot
http-request deny deny_status 429 if too-many-bad-bots-requests
# Redirect forced to https if needed; comment to authorize plain http routing
redirect scheme https if dom-example-com has-leading-site-1 !has-acme-challenge
redirect scheme https if dom-example-com has-leading-site-2 !has-acme-challenge
# Use backend
use_backend be-certbot-http if dom-any-domain has-acme-challenge
use_backend be-site-1-example-com-http if dom-example-com has-leading-site-1
use_backend be-site-2-example-com-http if dom-example-com has-leading-site-2
# Default backend
default_backend be-default-http
#-----------------------------------------------------------------------------
# frontend: https frontend
#-----------------------------------------------------------------------------
frontend fe-https
bind *:443,:::443 ssl crt /etc/ssl/haproxy/ alpn h2,http/1.1
# Define ACLs; acl are written and named with readability in mind
acl has-acme-challenge path_beg /.well-known/acme-challenge/
acl has-leading-site-1 hdr_beg(host) -i site-1.
acl has-leading-site-2 hdr_beg(host) -i site-2.
acl dom-any-domain hdr_len(host) gt 0
acl dom-example-com hdr_dom(host) -i example.com
# 1) Get https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/refs/heads/main/haproxy-block-ai-bots.txt
# 2) Copy the file to /etc/haproxy/bad-bots.txt
# 3) Adjust the rate by ip and user-agent (default is 450 hits / 5 minutes)
acl is-bad-bot hdr_sub(user-agent) -i -f /etc/haproxy/bad-bots.txt
acl too-many-bad-bots-requests sc_http_req_rate(0) gt 450
acl too-many-bad-bots-requests sc_http_req_rate(1) gt 450
# Filter requests
http-request track-sc0 src table cluster/bad-bots-by-ip if is-bad-bot
http-request track-sc1 req.hdr(user-agent) table cluster/bad-bots-by-ua if is-bad-bot
http-request deny deny_status 429 if too-many-bad-bots-requests
# Use backend
use_backend be-certbot-https if dom-any-domain has-acme-challenge
use_backend be-site-1-example-com-https if dom-example-com has-leading-site-1
use_backend be-site-2-example-com-https if dom-example-com has-leading-site-2
# Default backend
default_backend be-default-https
#-----------------------------------------------------------------------------
# backend: certbot
#-----------------------------------------------------------------------------
backend be-certbot-http
mode http
option forwardfor
server certbot certbot.home.arpa:80
backend be-certbot-https
mode http
option forwardfor
server certbot certbot.home.arpa:443 ssl verify none
#-----------------------------------------------------------------------------
# backend: default
#-----------------------------------------------------------------------------
backend be-default-http
mode http
option forwardfor
server default default.home.arpa:80 check
backend be-default-https
mode http
option forwardfor
server default default.home.arpa:443 check ssl verify none
#-----------------------------------------------------------------------------
# backend: site-1.example.com
#-----------------------------------------------------------------------------
backend be-site-1-example-com-http
mode http
option forwardfor
server site-1 site-1.home.arpa:80 check
backend be-site-1-example-com-https
mode http
option forwardfor
server site-1 site-1.home.arpa:443 check ssl verify none
#-----------------------------------------------------------------------------
# backend: site-2.example.com
#-----------------------------------------------------------------------------
backend be-site-2-example-com-http
mode http
option forwardfor
server site-2 site-2.home.arpa:80 check
backend be-site-2-example-com-https
mode http
option forwardfor
server site-2 site-2.home.arpa:443 check ssl verify none
#-----------------------------------------------------------------------------
# End-Of-File
#-----------------------------------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment