sql-patterns
Compare original and translation side by side
🇺🇸
Original
English🇨🇳
Translation
ChineseSQL-Patterns
SQL模式
Advanced SQL patterns for data engineering beyond basic SELECT/JOIN.
面向数据工程的高级SQL模式,超越基础的SELECT/JOIN操作。
Common Table Expressions (CTEs)
通用表表达式(CTEs)
sql
-- Chain transformations readably
WITH
active_users AS (
SELECT user_id, email
FROM users
WHERE status = 'active'
),
user_orders AS (
SELECT u.user_id, COUNT(*) as order_count
FROM active_users u
JOIN orders o ON u.user_id = o.user_id
GROUP BY u.user_id
)
SELECT * FROM user_orders WHERE order_count > 5;sql
-- Chain transformations readably
WITH
active_users AS (
SELECT user_id, email
FROM users
WHERE status = 'active'
),
user_orders AS (
SELECT u.user_id, COUNT(*) as order_count
FROM active_users u
JOIN orders o ON u.user_id = o.user_id
GROUP BY u.user_id
)
SELECT * FROM user_orders WHERE order_count > 5;Window Functions
窗口函数
sql
-- Row numbering within groups
SELECT *,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY created_at DESC) as rn
FROM products;
-- Running totals
SELECT
date,
revenue,
SUM(revenue) OVER (ORDER BY date) as cumulative_revenue
FROM daily_sales;
-- Percent of total
SELECT
category,
sales,
sales * 100.0 / SUM(sales) OVER () as pct_of_total
FROM category_sales;
-- Lead/Lag for time series
SELECT
date,
value,
LAG(value, 1) OVER (ORDER BY date) as prev_value,
value - LAG(value, 1) OVER (ORDER BY date) as change
FROM metrics;
-- Ranking with ties
SELECT *,
RANK() OVER (ORDER BY score DESC) as rank, -- 1,2,2,4
DENSE_RANK() OVER (ORDER BY score DESC) as drank -- 1,2,2,3
FROM scores;sql
-- Row numbering within groups
SELECT *,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY created_at DESC) as rn
FROM products;
-- Running totals
SELECT
date,
revenue,
SUM(revenue) OVER (ORDER BY date) as cumulative_revenue
FROM daily_sales;
-- Percent of total
SELECT
category,
sales,
sales * 100.0 / SUM(sales) OVER () as pct_of_total
FROM category_sales;
-- Lead/Lag for time series
SELECT
date,
value,
LAG(value, 1) OVER (ORDER BY date) as prev_value,
value - LAG(value, 1) OVER (ORDER BY date) as change
FROM metrics;
-- Ranking with ties
SELECT *,
RANK() OVER (ORDER BY score DESC) as rank, -- 1,2,2,4
DENSE_RANK() OVER (ORDER BY score DESC) as drank -- 1,2,2,3
FROM scores;Recursive CTEs
递归CTEs
sql
-- Hierarchical data (org chart, categories)
WITH RECURSIVE org_tree AS (
-- Base case: top-level managers
SELECT id, name, manager_id, 1 as depth
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case: subordinates
SELECT e.id, e.name, e.manager_id, t.depth + 1
FROM employees e
JOIN org_tree t ON e.manager_id = t.id
)
SELECT * FROM org_tree;
-- Generate date series
WITH RECURSIVE dates AS (
SELECT DATE '2024-01-01' as dt
UNION ALL
SELECT dt + INTERVAL '1 day'
FROM dates
WHERE dt < DATE '2024-12-31'
)
SELECT * FROM dates;sql
-- Hierarchical data (org chart, categories)
WITH RECURSIVE org_tree AS (
-- Base case: top-level managers
SELECT id, name, manager_id, 1 as depth
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case: subordinates
SELECT e.id, e.name, e.manager_id, t.depth + 1
FROM employees e
JOIN org_tree t ON e.manager_id = t.id
)
SELECT * FROM org_tree;
-- Generate date series
WITH RECURSIVE dates AS (
SELECT DATE '2024-01-01' as dt
UNION ALL
SELECT dt + INTERVAL '1 day'
FROM dates
WHERE dt < DATE '2024-12-31'
)
SELECT * FROM dates;CASE Expressions
CASE表达式
sql
-- Simple CASE
SELECT
CASE status
WHEN 'A' THEN 'Active'
WHEN 'I' THEN 'Inactive'
ELSE 'Unknown'
END as status_label
FROM users;
-- Searched CASE for ranges
SELECT
CASE
WHEN age < 18 THEN 'Minor'
WHEN age < 65 THEN 'Adult'
ELSE 'Senior'
END as age_group
FROM users;
-- Conditional aggregation
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE status = 'active') as active_count, -- PostgreSQL
SUM(CASE WHEN status = 'active' THEN 1 ELSE 0 END) as active_count -- ANSI
FROM users;sql
-- Simple CASE
SELECT
CASE status
WHEN 'A' THEN 'Active'
WHEN 'I' THEN 'Inactive'
ELSE 'Unknown'
END as status_label
FROM users;
-- Searched CASE for ranges
SELECT
CASE
WHEN age < 18 THEN 'Minor'
WHEN age < 65 THEN 'Adult'
ELSE 'Senior'
END as age_group
FROM users;
-- Conditional aggregation
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE status = 'active') as active_count, -- PostgreSQL
SUM(CASE WHEN status = 'active' THEN 1 ELSE 0 END) as active_count -- ANSI
FROM users;UPSERT Patterns
UPSERT模式
sql
-- PostgreSQL: INSERT ON CONFLICT
INSERT INTO inventory (sku, quantity, updated_at)
VALUES ('ABC123', 100, NOW())
ON CONFLICT (sku) DO UPDATE SET
quantity = EXCLUDED.quantity,
updated_at = EXCLUDED.updated_at;
-- MySQL: INSERT ON DUPLICATE KEY
INSERT INTO inventory (sku, quantity, updated_at)
VALUES ('ABC123', 100, NOW())
ON DUPLICATE KEY UPDATE
quantity = VALUES(quantity),
updated_at = VALUES(updated_at);
-- SQLite: INSERT OR REPLACE
INSERT OR REPLACE INTO inventory (sku, quantity, updated_at)
VALUES ('ABC123', 100, datetime('now'));sql
-- PostgreSQL: INSERT ON CONFLICT
INSERT INTO inventory (sku, quantity, updated_at)
VALUES ('ABC123', 100, NOW())
ON CONFLICT (sku) DO UPDATE SET
quantity = EXCLUDED.quantity,
updated_at = EXCLUDED.updated_at;
-- MySQL: INSERT ON DUPLICATE KEY
INSERT INTO inventory (sku, quantity, updated_at)
VALUES ('ABC123', 100, NOW())
ON DUPLICATE KEY UPDATE
quantity = VALUES(quantity),
updated_at = VALUES(updated_at);
-- SQLite: INSERT OR REPLACE
INSERT OR REPLACE INTO inventory (sku, quantity, updated_at)
VALUES ('ABC123', 100, datetime('now'));Efficient Pagination
高效分页
sql
-- BAD: OFFSET for large pages
SELECT * FROM orders ORDER BY id LIMIT 20 OFFSET 10000;
-- GOOD: Keyset pagination
SELECT * FROM orders
WHERE id > 10000 -- last seen id
ORDER BY id
LIMIT 20;sql
-- BAD: OFFSET for large pages
SELECT * FROM orders ORDER BY id LIMIT 20 OFFSET 10000;
-- GOOD: Keyset pagination
SELECT * FROM orders
WHERE id > 10000 -- last seen id
ORDER BY id
LIMIT 20;Batch Operations
批量操作
sql
-- Batch DELETE with limit (avoid long locks)
DELETE FROM logs
WHERE created_at < NOW() - INTERVAL '90 days'
LIMIT 10000;
-- Batch UPDATE
UPDATE orders
SET status = 'archived'
WHERE id IN (
SELECT id FROM orders
WHERE status = 'completed'
AND completed_at < NOW() - INTERVAL '1 year'
LIMIT 1000
);sql
-- Batch DELETE with limit (avoid long locks)
DELETE FROM logs
WHERE created_at < NOW() - INTERVAL '90 days'
LIMIT 10000;
-- Batch UPDATE
UPDATE orders
SET status = 'archived'
WHERE id IN (
SELECT id FROM orders
WHERE status = 'completed'
AND completed_at < NOW() - INTERVAL '1 year'
LIMIT 1000
);Index-Friendly Queries
索引友好的查询
sql
-- BAD: Function on indexed column
SELECT * FROM users WHERE LOWER(email) = 'test@example.com';
-- GOOD: Store lowercase or use expression index
SELECT * FROM users WHERE email_lower = 'test@example.com';
-- Or: CREATE INDEX idx_email_lower ON users (LOWER(email));
-- BAD: Leading wildcard
SELECT * FROM products WHERE name LIKE '%widget%';
-- GOOD: Full-text search or prefix match
SELECT * FROM products WHERE name LIKE 'widget%';sql
-- BAD: Function on indexed column
SELECT * FROM users WHERE LOWER(email) = 'test@example.com';
-- GOOD: Store lowercase or use expression index
SELECT * FROM users WHERE email_lower = 'test@example.com';
-- Or: CREATE INDEX idx_email_lower ON users (LOWER(email));
-- BAD: Leading wildcard
SELECT * FROM products WHERE name LIKE '%widget%';
-- GOOD: Full-text search or prefix match
SELECT * FROM products WHERE name LIKE 'widget%';NULL Handling
NULL值处理
sql
-- COALESCE for defaults
SELECT COALESCE(nickname, first_name, 'Anonymous') as display_name
FROM users;
-- NULLIF to convert values to NULL
SELECT NULLIF(status, '') as status -- empty string -> NULL
FROM records;
-- IS DISTINCT FROM (NULL-safe comparison)
SELECT * FROM a
WHERE a.value IS DISTINCT FROM b.value; -- treats NULL != NULL as falsesql
-- COALESCE for defaults
SELECT COALESCE(nickname, first_name, 'Anonymous') as display_name
FROM users;
-- NULLIF to convert values to NULL
SELECT NULLIF(status, '') as status -- empty string -> NULL
FROM records;
-- IS DISTINCT FROM (NULL-safe comparison)
SELECT * FROM a
WHERE a.value IS DISTINCT FROM b.value; -- treats NULL != NULL as falseLATERAL Joins
LATERAL连接
sql
-- Top N per group
SELECT d.name, t.product, t.revenue
FROM departments d
CROSS JOIN LATERAL (
SELECT product, revenue
FROM sales
WHERE sales.dept_id = d.id
ORDER BY revenue DESC
LIMIT 3
) t;sql
-- Top N per group
SELECT d.name, t.product, t.revenue
FROM departments d
CROSS JOIN LATERAL (
SELECT product, revenue
FROM sales
WHERE sales.dept_id = d.id
ORDER BY revenue DESC
LIMIT 3
) t;Materialized Views
物化视图
sql
-- Create for expensive aggregations
CREATE MATERIALIZED VIEW daily_stats AS
SELECT
DATE_TRUNC('day', created_at) as date,
COUNT(*) as total_orders,
SUM(amount) as revenue
FROM orders
GROUP BY 1;
-- Refresh periodically
REFRESH MATERIALIZED VIEW CONCURRENTLY daily_stats;sql
-- Create for expensive aggregations
CREATE MATERIALIZED VIEW daily_stats AS
SELECT
DATE_TRUNC('day', created_at) as date,
COUNT(*) as total_orders,
SUM(amount) as revenue
FROM orders
GROUP BY 1;
-- Refresh periodically
REFRESH MATERIALIZED VIEW CONCURRENTLY daily_stats;Query Optimization Checklist
查询优化检查清单
- Check EXPLAIN ANALYZE - Look for sequential scans on large tables
- Add missing indexes - Columns in WHERE, JOIN, ORDER BY
- **Avoid SELECT *** - Fetch only needed columns
- Use EXISTS over IN - For correlated subqueries
- Batch large operations - Avoid long-running transactions
- Partition large tables - By date or category
- Use connection pooling - Avoid connection overhead
- 检查EXPLAIN ANALYZE - 查看大表上的全表扫描
- 添加缺失的索引 - WHERE、JOIN、ORDER BY子句中的列
- **避免使用SELECT *** - 仅获取需要的列
- 使用EXISTS替代IN - 针对关联子查询
- 批量处理大型操作 - 避免长时间运行的事务
- 对大表进行分区 - 按日期或类别
- 使用连接池 - 避免连接开销