PostgreSQL WITH 子句(公共表表达式)
什么是 WITH?
WITH 子句,也称为公共表表达式(CTE),允许您定义临时命名结果集,可以在 SELECT、INSERT、UPDATE 或 DELETE 语句中引用。CTE 使复杂查询更易读和可维护。
基本语法
sql
WITH cte_name AS (
SELECT column1, column2, ...
FROM table_name
WHERE condition
)
SELECT * FROM cte_name;简单 CTE 示例
示例 1:基本 CTE
sql
-- 定义高价值客户的 CTE
WITH high_value_customers AS (
SELECT customer_id, customer_name, total_spent
FROM customers
WHERE total_spent > 10000
)
SELECT * FROM high_value_customers
ORDER BY total_spent DESC;示例 2:带聚合的 CTE
sql
-- 计算月度销售额
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', order_date) as month,
SUM(total_amount) as total_sales
FROM orders
GROUP BY DATE_TRUNC('month', order_date)
)
SELECT * FROM monthly_sales
ORDER BY month DESC;多个 CTE
您可以在单个查询中定义多个 CTE:
sql
WITH
customer_orders AS (
SELECT customer_id, COUNT(*) as order_count
FROM orders
GROUP BY customer_id
),
customer_revenue AS (
SELECT customer_id, SUM(total_amount) as total_revenue
FROM orders
GROUP BY customer_id
)
SELECT
c.customer_name,
co.order_count,
cr.total_revenue
FROM customers c
JOIN customer_orders co ON c.customer_id = co.customer_id
JOIN customer_revenue cr ON c.customer_id = cr.customer_id
WHERE co.order_count > 5;递归 CTE
递归 CTE 允许您查询层次结构或树形结构数据:
基本递归 CTE 语法
sql
WITH RECURSIVE cte_name AS (
-- 基本情况(锚成员)
SELECT ...
UNION ALL
-- 递归情况(递归成员)
SELECT ...
FROM cte_name
WHERE condition
)
SELECT * FROM cte_name;示例 1:数字序列
sql
-- 生成从 1 到 10 的数字
WITH RECURSIVE numbers AS (
SELECT 1 as n
UNION ALL
SELECT n + 1
FROM numbers
WHERE n < 10
)
SELECT * FROM numbers;示例 2:员工层次结构
sql
-- 获取经理下的所有员工
WITH RECURSIVE employee_hierarchy AS (
-- 基本情况:从 CEO 开始
SELECT employee_id, employee_name, manager_id, 1 as level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- 递归情况:获取下属
SELECT e.employee_id, e.employee_name, e.manager_id, eh.level + 1
FROM employees e
JOIN employee_hierarchy eh ON e.manager_id = eh.employee_id
)
SELECT * FROM employee_hierarchy
ORDER BY level, employee_name;示例 3:类别树
sql
-- 获取所有子类别
WITH RECURSIVE category_tree AS (
-- 基本情况:根类别
SELECT category_id, category_name, parent_id, 0 as depth
FROM categories
WHERE parent_id IS NULL
UNION ALL
-- 递归情况:子类别
SELECT c.category_id, c.category_name, c.parent_id, ct.depth + 1
FROM categories c
JOIN category_tree ct ON c.parent_id = ct.category_id
)
SELECT
REPEAT(' ', depth) || category_name as category_hierarchy,
depth
FROM category_tree
ORDER BY category_id;CTE 与 INSERT、UPDATE、DELETE
CTE 与 INSERT
sql
-- 将顶级客户插入 VIP 表
WITH top_customers AS (
SELECT customer_id, SUM(total_amount) as total_spent
FROM orders
GROUP BY customer_id
HAVING SUM(total_amount) > 50000
)
INSERT INTO vip_customers (customer_id, tier)
SELECT customer_id, 'Gold'
FROM top_customers;CTE 与 UPDATE
sql
-- 根据销售业绩更新产品价格
WITH product_sales AS (
SELECT product_id, SUM(quantity) as total_sold
FROM order_items
WHERE order_date >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY product_id
)
UPDATE products p
SET price = price * 1.1
FROM product_sales ps
WHERE p.product_id = ps.product_id
AND ps.total_sold > 100;CTE 与 DELETE
sql
-- 删除不活跃的客户
WITH inactive_customers AS (
SELECT customer_id
FROM customers c
WHERE NOT EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.customer_id
AND o.order_date >= CURRENT_DATE - INTERVAL '2 years'
)
)
DELETE FROM customers
WHERE customer_id IN (SELECT customer_id FROM inactive_customers);实际应用示例
示例 1:销售分析
sql
-- 比较当月与上月销售额
WITH current_month AS (
SELECT SUM(total_amount) as sales
FROM orders
WHERE DATE_TRUNC('month', order_date) = DATE_TRUNC('month', CURRENT_DATE)
),
previous_month AS (
SELECT SUM(total_amount) as sales
FROM orders
WHERE DATE_TRUNC('month', order_date) = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month')
)
SELECT
cm.sales as current_sales,
pm.sales as previous_sales,
ROUND(((cm.sales - pm.sales) / pm.sales * 100), 2) as growth_percentage
FROM current_month cm, previous_month pm;示例 2:客户细分
sql
-- 按购买行为细分客户
WITH customer_stats AS (
SELECT
customer_id,
COUNT(*) as order_count,
SUM(total_amount) as total_spent,
AVG(total_amount) as avg_order_value,
MAX(order_date) as last_order_date
FROM orders
GROUP BY customer_id
)
SELECT
customer_id,
CASE
WHEN total_spent > 10000 AND order_count > 20 THEN 'VIP'
WHEN total_spent > 5000 OR order_count > 10 THEN 'Regular'
WHEN last_order_date < CURRENT_DATE - INTERVAL '6 months' THEN 'Inactive'
ELSE 'New'
END as customer_segment,
order_count,
total_spent,
avg_order_value
FROM customer_stats;示例 3:累计总计
sql
-- 计算每日销售额的累计总计
WITH daily_sales AS (
SELECT
DATE(order_date) as sale_date,
SUM(total_amount) as daily_total
FROM orders
GROUP BY DATE(order_date)
)
SELECT
sale_date,
daily_total,
SUM(daily_total) OVER (ORDER BY sale_date) as running_total
FROM daily_sales
ORDER BY sale_date;性能优化
1. 物化 CTE
默认情况下,CTE 是优化屏障。使用 MATERIALIZED 或 NOT MATERIALIZED:
sql
-- 强制物化(PostgreSQL 12+)
WITH MATERIALIZED high_value_orders AS (
SELECT * FROM orders WHERE total_amount > 1000
)
SELECT * FROM high_value_orders;
-- 防止物化(内联 CTE)
WITH NOT MATERIALIZED recent_orders AS (
SELECT * FROM orders WHERE order_date >= CURRENT_DATE - INTERVAL '7 days'
)
SELECT * FROM recent_orders;2. 何时使用 CTE
CTE 最适合:
- 提高查询可读性
- 递归查询
- 多次引用同一子查询
- 分解复杂逻辑
3. 性能提示
sql
-- 好:CTE 被多次引用
WITH customer_orders AS (
SELECT customer_id, COUNT(*) as order_count
FROM orders
GROUP BY customer_id
)
SELECT
(SELECT AVG(order_count) FROM customer_orders) as avg_orders,
(SELECT MAX(order_count) FROM customer_orders) as max_orders;
-- 考虑:对于单次使用,子查询可能更好
SELECT customer_id, order_count
FROM (
SELECT customer_id, COUNT(*) as order_count
FROM orders
GROUP BY customer_id
) subquery;高级模式
模式 1:数据去重
sql
-- 删除重复项,保留最新记录
WITH ranked_records AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at DESC) as rn
FROM user_signups
)
DELETE FROM user_signups
WHERE id IN (
SELECT id FROM ranked_records WHERE rn > 1
);模式 2:间隙分析
sql
-- 查找缺失的发票号码
WITH RECURSIVE invoice_range AS (
SELECT MIN(invoice_number) as num FROM invoices
UNION ALL
SELECT num + 1
FROM invoice_range
WHERE num < (SELECT MAX(invoice_number) FROM invoices)
)
SELECT ir.num as missing_invoice
FROM invoice_range ir
LEFT JOIN invoices i ON ir.num = i.invoice_number
WHERE i.invoice_number IS NULL;模式 3:数据透视
sql
-- 按产品类别透视月度销售额
WITH monthly_category_sales AS (
SELECT
DATE_TRUNC('month', o.order_date) as month,
p.category,
SUM(oi.quantity * oi.price) as sales
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.product_id
GROUP BY DATE_TRUNC('month', o.order_date), p.category
)
SELECT
month,
SUM(CASE WHEN category = 'Electronics' THEN sales ELSE 0 END) as electronics,
SUM(CASE WHEN category = 'Clothing' THEN sales ELSE 0 END) as clothing,
SUM(CASE WHEN category = 'Books' THEN sales ELSE 0 END) as books
FROM monthly_category_sales
GROUP BY month
ORDER BY month;常见错误和解决方案
错误 1:无限递归
sql
-- 错误:没有终止条件
WITH RECURSIVE infinite AS (
SELECT 1 as n
UNION ALL
SELECT n + 1 FROM infinite -- 永不停止!
)
SELECT * FROM infinite;
-- 正确:添加终止条件
WITH RECURSIVE finite AS (
SELECT 1 as n
UNION ALL
SELECT n + 1 FROM finite WHERE n < 100
)
SELECT * FROM finite;错误 2:在定义之前引用 CTE
sql
-- 错误:cte2 在定义之前引用 cte1
WITH
cte2 AS (SELECT * FROM cte1),
cte1 AS (SELECT * FROM table1)
SELECT * FROM cte2;
-- 正确:按顺序定义
WITH
cte1 AS (SELECT * FROM table1),
cte2 AS (SELECT * FROM cte1)
SELECT * FROM cte2;错误 3:不必要的复杂性
sql
-- 过于复杂
WITH cte1 AS (SELECT * FROM orders),
cte2 AS (SELECT * FROM cte1 WHERE status = 'completed')
SELECT * FROM cte2;
-- 更简单
SELECT * FROM orders WHERE status = 'completed';最佳实践
- 使用描述性名称:清楚地命名 CTE 以指示其目的
- 分解复杂查询:使用多个 CTE 提高可读性
- 考虑性能:适当时使用 MATERIALIZED/NOT MATERIALIZED
- 限制递归深度:始终包含终止条件
- 记录复杂逻辑:为复杂的递归 CTE 添加注释
- 测试递归 CTE:验证终止和正确性
- 用于可读性:CTE 擅长使查询更易维护
总结
WITH 子句(CTE)是查询组织的强大工具:
- 定义临时命名结果集
- 提高查询可读性和可维护性
- 支持层次数据的递归查询
- 可与 SELECT、INSERT、UPDATE、DELETE 一起使用
- 可以在单个查询中定义多个 CTE
- MATERIALIZED/NOT MATERIALIZED 控制优化
- 对于复杂的数据转换和分析至关重要
掌握 CTE 对于编写清晰、可维护的 PostgreSQL 查询至关重要。