Skip to content

Instantly share code, notes, and snippets.

@matanper
Last active January 6, 2026 22:05
Show Gist options
  • Select an option

  • Save matanper/d0b147f93f01642e4346a63141661a77 to your computer and use it in GitHub Desktop.

Select an option

Save matanper/d0b147f93f01642e4346a63141661a77 to your computer and use it in GitHub Desktop.
Clickhouse queries
SELECT
database,
table,
data_files as pending_files,
formatReadableSize(data_compressed_bytes) as pending_size,
error_count,
substring(last_exception, 1, 80) as last_error
FROM clusterAllReplicas('{cluster}', system.distribution_queue)
ORDER BY data_compressed_bytes DESC
-- Replication queue --
SELECT
hostName() as node,
database,
table,
countIf(type = 'GET_PART') as get_parts,
countIf(type = 'MERGE_PARTS') as merges,
min(create_time) as oldest_task,
dateDiff('second', min(create_time), now()) as max_delay_sec
FROM clusterAllReplicas('{cluster}', system.replication_queue)
WHERE database = 'data'
GROUP BY hostName(), database, table
HAVING count() > 0
ORDER BY max_delay_sec DESC
-- New parts inserts --
SELECT
round(sum(rows) / 60, 0) as rows_per_sec,
avg(rows) as rows_per_part,
count(*) as parts
FROM clusterAllReplicas('{cluster}', system.part_log)
WHERE database = 'data'
AND table = 'logs_shard'
AND event_type = 'NewPart'
AND event_time > now() - INTERVAL 1 MINUTE
-- Merges IO write --
SELECT
hostName() as host,
count(*) as merges,
round(sum(bytes_written_uncompressed / elapsed) / 1024 / 1024, 0) as write_mb_sec
FROM clusterAllReplicas('{cluster}', system.merges)
WHERE database = 'data' AND table = 'logs_shard' AND elapsed > 2
GROUP BY host
-- Partitions count --
SELECT
partition,
count(*) as parts,
formatReadableSize(sum(bytes_on_disk)) as size
FROM clusterAllReplicas('{cluster}', system.parts)
WHERE database = 'data' AND table = 'logs_shard' AND active
GROUP BY partition
ORDER BY parts DESC
-- Query Performance Analysis - Identifies bottlenecks (CPU/Disk/Network)
SELECT
-- Basic Info
query_id,
query_duration_ms as wall_time_ms,
-- CPU Metrics (ms)
round(ProfileEvents['OSCPUVirtualTimeMicroseconds'] / 1000, 1) as cpu_ms,
round(ProfileEvents['UserTimeMicroseconds'] / 1000, 1) as user_cpu_ms,
round(ProfileEvents['SystemTimeMicroseconds'] / 1000, 1) as sys_cpu_ms,
-- I/O Metrics (ms)
round(ProfileEvents['DiskReadElapsedMicroseconds'] / 1000, 1) as disk_read_ms,
round(ProfileEvents['ThreadPoolReaderPageCacheHitElapsedMicroseconds'] / 1000, 1) as cache_read_ms,
round(ProfileEvents['SynchronousReadWaitMicroseconds'] / 1000, 1) as io_wait_ms,
-- Network Metrics (ms)
round(ProfileEvents['NetworkReceiveElapsedMicroseconds'] / 1000, 1) as network_recv_ms,
round(ProfileEvents['NetworkSendElapsedMicroseconds'] / 1000, 1) as network_send_ms,
-- Data Volume
formatReadableQuantity(read_rows) as rows_read,
formatReadableSize(read_bytes) as uncompressed_bytes,
formatReadableSize(ProfileEvents['ReadCompressedBytes']) as compressed_bytes,
formatReadableSize(memory_usage) as peak_memory,
-- Bottleneck % of wall time
round(ProfileEvents['DiskReadElapsedMicroseconds'] / 1000 / query_duration_ms * 100, 1) as disk_pct,
round(ProfileEvents['NetworkReceiveElapsedMicroseconds'] / 1000 / query_duration_ms * 100, 1) as network_pct,
-- Auto-detect bottleneck
multiIf(
ProfileEvents['DiskReadElapsedMicroseconds'] / 1000 > query_duration_ms * 0.5, 'DISK-BOUND',
ProfileEvents['NetworkReceiveElapsedMicroseconds'] / 1000 > query_duration_ms * 0.5, 'NETWORK-BOUND',
ProfileEvents['SynchronousReadWaitMicroseconds'] / 1000 > query_duration_ms * 0.3, 'IO-WAIT-BOUND',
'CPU-BOUND'
) as bottleneck
FROM clusterAllReplicas('{cluster}', system.query_log)
WHERE type = 'QueryFinish'
AND query_duration_ms > 100 -- Only queries > 100ms
AND event_time >= now() - INTERVAL 10 MINUTE
-- Optional: filter by specific query
-- AND query LIKE '%your_table%'
ORDER BY event_time DESC
LIMIT 10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment