-
ANYoperator trên array columns (campaign_ids,tags)- Không thể sử dụng B-tree index hiệu quả
- Full scan hoặc bitmap index scan tốn kém
-
JOIN với condition phức tạp
c.id = ANY(cuh.campaign_ids)→ Nested loop có thể xảy ra- Với 150k records/ngày, cost rất cao
-
CAST operations
cast(c.display_names as varchar)cast(cuh.data_changed as varchar)- Tốn CPU, không cache được
-
Range query + pagination
updated_onrange +id < :lastIndex- Có thể gây index scan không tối ưu
-
Với dataset 100GB + 40GB index
- Memory pressure cao
- Cache hit rate thấp nếu query không tối ưu
-- GIN index cho array columns
CREATE INDEX idx_cuh_tags_gin ON campaign_usage_history USING GIN (tags);
CREATE INDEX idx_cuh_campaign_ids_gin ON campaign_usage_history USING GIN (campaign_ids);
-- Composite B-tree index cho range query + pagination
CREATE INDEX idx_cuh_active_updated_id ON campaign_usage_history (active, updated_on DESC, id DESC)
WHERE active = TRUE;
-- Index cho JOIN condition
CREATE INDEX idx_campaigns_active_id ON campaigns (active, id)
WHERE active = TRUE;
-- Index cho customer lookup
CREATE INDEX idx_customers_active_id ON customers (active, id)
WHERE active = TRUE;-- Step 1: Filter campaign_usage_history trước (subquery)
WITH filtered_cuh AS (
SELECT
id,
type,
applied_subject_type,
campaign_ids,
order_source_id,
created_on,
created_by,
session_id,
status,
data_changed::varchar AS data_changed,
data_request::varchar AS data_request,
redeemed_location,
applied_subject_id
FROM campaign_usage_history
WHERE active = TRUE
AND tags @> ARRAY[:orgId]::bigint[] -- GIN index friendly
AND campaign_ids @> ARRAY[:campaignId]::bigint[]
AND updated_on >= :fromUpdatedOn
AND updated_on <= :toUpdatedOn
AND (:lastIndex = 0 OR id < :lastIndex)
ORDER BY id DESC
LIMIT 1000 -- Thêm LIMIT để giảm JOIN cost
)
-- Step 2: JOIN với filtered data
SELECT
cuh.id,
cuh.type,
cuh.applied_subject_type,
c.id AS campaign_id,
c.code AS campaign_code,
c.name AS campaign_name,
c.display_names::varchar AS display_names,
cuh.order_source_id,
cuh.created_on AS date_of_use,
cuh.created_by,
cuh.session_id,
cuh.status,
cuh.data_changed,
cuh.data_request,
cuh.redeemed_location,
c2.original_id,
c2.full_name,
c2.email,
c2.phone_number
FROM filtered_cuh cuh
JOIN campaigns c ON c.id = ANY(cuh.campaign_ids) AND c.active = TRUE
LEFT JOIN customers c2 ON c2.id = cuh.applied_subject_id AND c2.active = TRUE
ORDER BY cuh.id DESC;Lý do hiệu quả hơn:
- Filter data sớm nhất có thể
- GIN index với
@>operator nhanh hơnANY - LIMIT giảm số rows phải JOIN
- Chuyển CAST ra CTE để chỉ làm 1 lần
-- Thêm cột để tránh JOIN
ALTER TABLE campaign_usage_history
ADD COLUMN campaign_code varchar(100),
ADD COLUMN campaign_name varchar(255);
-- Trigger hoặc application logic update khi campaign thay đổi
-- Hoặc dùng materialized view-- Partition by updated_on (monthly)
CREATE TABLE campaign_usage_history (
id BIGSERIAL,
updated_on TIMESTAMP NOT NULL,
-- ... other columns
) PARTITION BY RANGE (updated_on);
-- Tạo partition cho từng tháng
CREATE TABLE campaign_usage_history_2024_12
PARTITION OF campaign_usage_history
FOR VALUES FROM ('2024-12-01') TO ('2025-01-01');
CREATE TABLE campaign_usage_history_2025_01
PARTITION OF campaign_usage_history
FOR VALUES FROM ('2025-01-01') TO ('2025-02-01');Lợi ích:
- Query chỉ scan partition cần thiết
- Old data có thể move sang slower storage
- Maintenance (VACUUM) nhanh hơn
CREATE MATERIALIZED VIEW mv_campaign_usage_report AS
SELECT
cuh.id,
cuh.type,
c.id AS campaign_id,
c.code AS campaign_code,
c.name AS campaign_name,
cuh.created_on AS date_of_use,
cuh.status,
c2.full_name,
c2.email,
DATE_TRUNC('day', cuh.updated_on) AS report_date
FROM campaign_usage_history cuh
JOIN campaigns c ON c.id = ANY(cuh.campaign_ids) AND c.active = TRUE
LEFT JOIN customers c2 ON c2.id = cuh.applied_subject_id AND c2.active = TRUE
WHERE cuh.active = TRUE;
CREATE INDEX idx_mv_report_date ON mv_campaign_usage_report (report_date, campaign_id);
-- Refresh hàng ngày hoặc theo schedule
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_campaign_usage_report;// Redis caching layer
async function getCampaignUsageHistory(params: QueryParams) {
const cacheKey = `campaign_usage:${params.orgId}:${params.campaignId}:${params.fromDate}:${params.lastIndex}`;
// Check cache first
const cached = await redis.get(cacheKey);
if (cached) {
return JSON.parse(cached);
}
// Query database
const result = await db.query(optimizedQuery, params);
// Cache for 5 minutes (adjust based on freshness requirement)
await redis.setex(cacheKey, 300, JSON.stringify(result));
return result;
}Planning Time: 5-10ms
Execution Time: 5000-15000ms
Shared Buffers Hit: 40-50%
Planning Time: 2-5ms
Execution Time: 100-500ms
Shared Buffers Hit: 85-95%
-
Critical (Làm ngay):
- Tạo GIN indexes cho
tagsvàcampaign_ids - Rewrite query với CTE và
@>operator - Thêm LIMIT trong subquery
- Tạo GIN indexes cho
-
High Priority:
- Implement partitioning by
updated_on - Thêm composite B-tree index
- Application-level caching
- Implement partitioning by
-
Medium Priority:
- Materialized view cho reporting
- Denormalization nếu business logic cho phép
-
Monitor:
-- Check query performance
EXPLAIN (ANALYZE, BUFFERS) <your_query>;
-- Monitor index usage
SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
ORDER BY idx_scan ASC;-- Database configuration cho large dataset
ALTER SYSTEM SET shared_buffers = '16GB'; -- 25% of RAM
ALTER SYSTEM SET effective_cache_size = '48GB'; -- 75% of RAM
ALTER SYSTEM SET maintenance_work_mem = '2GB';
ALTER SYSTEM SET work_mem = '256MB';
ALTER SYSTEM SET random_page_cost = 1.1; -- SSD
-- Regular maintenance
VACUUM ANALYZE campaign_usage_history;
REINDEX TABLE CONCURRENTLY campaign_usage_history;