Skip to content

PostgreSQL WITH 子句(公共表表达式)

什么是 WITH?

WITH 子句,也称为公共表表达式(CTE),允许您定义临时命名结果集,可以在 SELECT、INSERT、UPDATE 或 DELETE 语句中引用。CTE 使复杂查询更易读和可维护。

基本语法

sql
WITH cte_name AS (
    SELECT column1, column2, ...
    FROM table_name
    WHERE condition
)
SELECT * FROM cte_name;

简单 CTE 示例

示例 1:基本 CTE

sql
-- 定义高价值客户的 CTE
WITH high_value_customers AS (
    SELECT customer_id, customer_name, total_spent
    FROM customers
    WHERE total_spent > 10000
)
SELECT * FROM high_value_customers
ORDER BY total_spent DESC;

示例 2:带聚合的 CTE

sql
-- 计算月度销售额
WITH monthly_sales AS (
    SELECT 
        DATE_TRUNC('month', order_date) as month,
        SUM(total_amount) as total_sales
    FROM orders
    GROUP BY DATE_TRUNC('month', order_date)
)
SELECT * FROM monthly_sales
ORDER BY month DESC;

多个 CTE

您可以在单个查询中定义多个 CTE:

sql
WITH 
customer_orders AS (
    SELECT customer_id, COUNT(*) as order_count
    FROM orders
    GROUP BY customer_id
),
customer_revenue AS (
    SELECT customer_id, SUM(total_amount) as total_revenue
    FROM orders
    GROUP BY customer_id
)
SELECT 
    c.customer_name,
    co.order_count,
    cr.total_revenue
FROM customers c
JOIN customer_orders co ON c.customer_id = co.customer_id
JOIN customer_revenue cr ON c.customer_id = cr.customer_id
WHERE co.order_count > 5;

递归 CTE

递归 CTE 允许您查询层次结构或树形结构数据:

基本递归 CTE 语法

sql
WITH RECURSIVE cte_name AS (
    -- 基本情况(锚成员)
    SELECT ...
    UNION ALL
    -- 递归情况(递归成员)
    SELECT ...
    FROM cte_name
    WHERE condition
)
SELECT * FROM cte_name;

示例 1:数字序列

sql
-- 生成从 1 到 10 的数字
WITH RECURSIVE numbers AS (
    SELECT 1 as n
    UNION ALL
    SELECT n + 1
    FROM numbers
    WHERE n < 10
)
SELECT * FROM numbers;

示例 2:员工层次结构

sql
-- 获取经理下的所有员工
WITH RECURSIVE employee_hierarchy AS (
    -- 基本情况:从 CEO 开始
    SELECT employee_id, employee_name, manager_id, 1 as level
    FROM employees
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- 递归情况:获取下属
    SELECT e.employee_id, e.employee_name, e.manager_id, eh.level + 1
    FROM employees e
    JOIN employee_hierarchy eh ON e.manager_id = eh.employee_id
)
SELECT * FROM employee_hierarchy
ORDER BY level, employee_name;

示例 3:类别树

sql
-- 获取所有子类别
WITH RECURSIVE category_tree AS (
    -- 基本情况:根类别
    SELECT category_id, category_name, parent_id, 0 as depth
    FROM categories
    WHERE parent_id IS NULL
    
    UNION ALL
    
    -- 递归情况:子类别
    SELECT c.category_id, c.category_name, c.parent_id, ct.depth + 1
    FROM categories c
    JOIN category_tree ct ON c.parent_id = ct.category_id
)
SELECT 
    REPEAT('  ', depth) || category_name as category_hierarchy,
    depth
FROM category_tree
ORDER BY category_id;

CTE 与 INSERT、UPDATE、DELETE

CTE 与 INSERT

sql
-- 将顶级客户插入 VIP 表
WITH top_customers AS (
    SELECT customer_id, SUM(total_amount) as total_spent
    FROM orders
    GROUP BY customer_id
    HAVING SUM(total_amount) > 50000
)
INSERT INTO vip_customers (customer_id, tier)
SELECT customer_id, 'Gold'
FROM top_customers;

CTE 与 UPDATE

sql
-- 根据销售业绩更新产品价格
WITH product_sales AS (
    SELECT product_id, SUM(quantity) as total_sold
    FROM order_items
    WHERE order_date >= CURRENT_DATE - INTERVAL '30 days'
    GROUP BY product_id
)
UPDATE products p
SET price = price * 1.1
FROM product_sales ps
WHERE p.product_id = ps.product_id
  AND ps.total_sold > 100;

CTE 与 DELETE

sql
-- 删除不活跃的客户
WITH inactive_customers AS (
    SELECT customer_id
    FROM customers c
    WHERE NOT EXISTS (
        SELECT 1 FROM orders o
        WHERE o.customer_id = c.customer_id
          AND o.order_date >= CURRENT_DATE - INTERVAL '2 years'
    )
)
DELETE FROM customers
WHERE customer_id IN (SELECT customer_id FROM inactive_customers);

实际应用示例

示例 1:销售分析

sql
-- 比较当月与上月销售额
WITH current_month AS (
    SELECT SUM(total_amount) as sales
    FROM orders
    WHERE DATE_TRUNC('month', order_date) = DATE_TRUNC('month', CURRENT_DATE)
),
previous_month AS (
    SELECT SUM(total_amount) as sales
    FROM orders
    WHERE DATE_TRUNC('month', order_date) = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month')
)
SELECT 
    cm.sales as current_sales,
    pm.sales as previous_sales,
    ROUND(((cm.sales - pm.sales) / pm.sales * 100), 2) as growth_percentage
FROM current_month cm, previous_month pm;

示例 2:客户细分

sql
-- 按购买行为细分客户
WITH customer_stats AS (
    SELECT 
        customer_id,
        COUNT(*) as order_count,
        SUM(total_amount) as total_spent,
        AVG(total_amount) as avg_order_value,
        MAX(order_date) as last_order_date
    FROM orders
    GROUP BY customer_id
)
SELECT 
    customer_id,
    CASE 
        WHEN total_spent > 10000 AND order_count > 20 THEN 'VIP'
        WHEN total_spent > 5000 OR order_count > 10 THEN 'Regular'
        WHEN last_order_date < CURRENT_DATE - INTERVAL '6 months' THEN 'Inactive'
        ELSE 'New'
    END as customer_segment,
    order_count,
    total_spent,
    avg_order_value
FROM customer_stats;

示例 3:累计总计

sql
-- 计算每日销售额的累计总计
WITH daily_sales AS (
    SELECT 
        DATE(order_date) as sale_date,
        SUM(total_amount) as daily_total
    FROM orders
    GROUP BY DATE(order_date)
)
SELECT 
    sale_date,
    daily_total,
    SUM(daily_total) OVER (ORDER BY sale_date) as running_total
FROM daily_sales
ORDER BY sale_date;

性能优化

1. 物化 CTE

默认情况下,CTE 是优化屏障。使用 MATERIALIZED 或 NOT MATERIALIZED:

sql
-- 强制物化(PostgreSQL 12+)
WITH MATERIALIZED high_value_orders AS (
    SELECT * FROM orders WHERE total_amount > 1000
)
SELECT * FROM high_value_orders;

-- 防止物化(内联 CTE)
WITH NOT MATERIALIZED recent_orders AS (
    SELECT * FROM orders WHERE order_date >= CURRENT_DATE - INTERVAL '7 days'
)
SELECT * FROM recent_orders;

2. 何时使用 CTE

CTE 最适合:

  • 提高查询可读性
  • 递归查询
  • 多次引用同一子查询
  • 分解复杂逻辑

3. 性能提示

sql
-- 好:CTE 被多次引用
WITH customer_orders AS (
    SELECT customer_id, COUNT(*) as order_count
    FROM orders
    GROUP BY customer_id
)
SELECT 
    (SELECT AVG(order_count) FROM customer_orders) as avg_orders,
    (SELECT MAX(order_count) FROM customer_orders) as max_orders;

-- 考虑:对于单次使用,子查询可能更好
SELECT customer_id, order_count
FROM (
    SELECT customer_id, COUNT(*) as order_count
    FROM orders
    GROUP BY customer_id
) subquery;

高级模式

模式 1:数据去重

sql
-- 删除重复项,保留最新记录
WITH ranked_records AS (
    SELECT *,
           ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at DESC) as rn
    FROM user_signups
)
DELETE FROM user_signups
WHERE id IN (
    SELECT id FROM ranked_records WHERE rn > 1
);

模式 2:间隙分析

sql
-- 查找缺失的发票号码
WITH RECURSIVE invoice_range AS (
    SELECT MIN(invoice_number) as num FROM invoices
    UNION ALL
    SELECT num + 1
    FROM invoice_range
    WHERE num < (SELECT MAX(invoice_number) FROM invoices)
)
SELECT ir.num as missing_invoice
FROM invoice_range ir
LEFT JOIN invoices i ON ir.num = i.invoice_number
WHERE i.invoice_number IS NULL;

模式 3:数据透视

sql
-- 按产品类别透视月度销售额
WITH monthly_category_sales AS (
    SELECT 
        DATE_TRUNC('month', o.order_date) as month,
        p.category,
        SUM(oi.quantity * oi.price) as sales
    FROM orders o
    JOIN order_items oi ON o.order_id = oi.order_id
    JOIN products p ON oi.product_id = p.product_id
    GROUP BY DATE_TRUNC('month', o.order_date), p.category
)
SELECT 
    month,
    SUM(CASE WHEN category = 'Electronics' THEN sales ELSE 0 END) as electronics,
    SUM(CASE WHEN category = 'Clothing' THEN sales ELSE 0 END) as clothing,
    SUM(CASE WHEN category = 'Books' THEN sales ELSE 0 END) as books
FROM monthly_category_sales
GROUP BY month
ORDER BY month;

常见错误和解决方案

错误 1:无限递归

sql
-- 错误:没有终止条件
WITH RECURSIVE infinite AS (
    SELECT 1 as n
    UNION ALL
    SELECT n + 1 FROM infinite  -- 永不停止!
)
SELECT * FROM infinite;

-- 正确:添加终止条件
WITH RECURSIVE finite AS (
    SELECT 1 as n
    UNION ALL
    SELECT n + 1 FROM finite WHERE n < 100
)
SELECT * FROM finite;

错误 2:在定义之前引用 CTE

sql
-- 错误:cte2 在定义之前引用 cte1
WITH 
cte2 AS (SELECT * FROM cte1),
cte1 AS (SELECT * FROM table1)
SELECT * FROM cte2;

-- 正确:按顺序定义
WITH 
cte1 AS (SELECT * FROM table1),
cte2 AS (SELECT * FROM cte1)
SELECT * FROM cte2;

错误 3:不必要的复杂性

sql
-- 过于复杂
WITH cte1 AS (SELECT * FROM orders),
     cte2 AS (SELECT * FROM cte1 WHERE status = 'completed')
SELECT * FROM cte2;

-- 更简单
SELECT * FROM orders WHERE status = 'completed';

最佳实践

  1. 使用描述性名称:清楚地命名 CTE 以指示其目的
  2. 分解复杂查询:使用多个 CTE 提高可读性
  3. 考虑性能:适当时使用 MATERIALIZED/NOT MATERIALIZED
  4. 限制递归深度:始终包含终止条件
  5. 记录复杂逻辑:为复杂的递归 CTE 添加注释
  6. 测试递归 CTE:验证终止和正确性
  7. 用于可读性:CTE 擅长使查询更易维护

总结

WITH 子句(CTE)是查询组织的强大工具:

  • 定义临时命名结果集
  • 提高查询可读性和可维护性
  • 支持层次数据的递归查询
  • 可与 SELECT、INSERT、UPDATE、DELETE 一起使用
  • 可以在单个查询中定义多个 CTE
  • MATERIALIZED/NOT MATERIALIZED 控制优化
  • 对于复杂的数据转换和分析至关重要

掌握 CTE 对于编写清晰、可维护的 PostgreSQL 查询至关重要。