Skip to content

PostgreSQL GROUP BY

概述

GROUP BY子句将具有相同值的行分组到汇总行中。它通常与聚合函数(COUNT、SUM、AVG、MAX、MIN)一起使用,对每组行执行计算。

基本语法

sql
SELECT column1, aggregate_function(column2)
FROM table_name
GROUP BY column1;

基本GROUP BY

单列分组

sql
-- 按国家统计用户数
SELECT country, COUNT(*) AS user_count
FROM users
GROUP BY country;

-- 按用户汇总订单
SELECT user_id, SUM(total) AS total_spent
FROM orders
GROUP BY user_id;

-- 按类别计算平均价格
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category;

多列分组

sql
-- 按国家和城市分组
SELECT country, city, COUNT(*) AS user_count
FROM users
GROUP BY country, city;

-- 按多列分组
SELECT 
    category,
    brand,
    COUNT(*) AS product_count,
    AVG(price) AS avg_price
FROM products
GROUP BY category, brand;

-- 按状态和日期分组
SELECT 
    status,
    DATE(created_at) AS order_date,
    COUNT(*) AS order_count
FROM orders
GROUP BY status, DATE(created_at);

聚合函数与GROUP BY

COUNT

sql
-- 每个类别的产品数
SELECT category, COUNT(*) AS product_count
FROM products
GROUP BY category;

-- 统计不同的值
SELECT category, COUNT(DISTINCT brand) AS brand_count
FROM products
GROUP BY category;

-- 统计非NULL值
SELECT category, COUNT(description) AS products_with_description
FROM products
GROUP BY category;

SUM

sql
-- 按产品的总销量
SELECT product_id, SUM(quantity) AS total_sold
FROM order_items
GROUP BY product_id;

-- 按类别的总收入
SELECT 
    category,
    SUM(price * quantity) AS total_revenue
FROM products p
JOIN order_items oi ON p.id = oi.product_id
GROUP BY category;

AVG

sql
-- 按类别的平均价格
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category;

-- 按用户的平均订单价值
SELECT user_id, AVG(total) AS avg_order_value
FROM orders
GROUP BY user_id;

-- 带四舍五入的平均值
SELECT 
    category,
    ROUND(AVG(price), 2) AS avg_price
FROM products
GROUP BY category;

MIN和MAX

sql
-- 按类别的价格范围
SELECT 
    category,
    MIN(price) AS min_price,
    MAX(price) AS max_price,
    MAX(price) - MIN(price) AS price_range
FROM products
GROUP BY category;

-- 首次和最后订单日期
SELECT 
    user_id,
    MIN(order_date) AS first_order,
    MAX(order_date) AS last_order
FROM orders
GROUP BY user_id;

多个聚合

sql
-- 综合统计
SELECT 
    category,
    COUNT(*) AS product_count,
    SUM(stock) AS total_stock,
    AVG(price) AS avg_price,
    MIN(price) AS min_price,
    MAX(price) AS max_price,
    STDDEV(price) AS price_stddev
FROM products
GROUP BY category;

GROUP BY与WHERE

WHERE在分组前过滤行。

sql
-- 只分组活跃用户
SELECT country, COUNT(*) AS active_users
FROM users
WHERE status = 'active'
GROUP BY country;

-- 分组2024年的订单
SELECT 
    user_id,
    COUNT(*) AS order_count,
    SUM(total) AS total_spent
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY user_id;

-- 过滤和分组
SELECT 
    category,
    AVG(price) AS avg_price
FROM products
WHERE stock > 0 AND price > 10
GROUP BY category;

GROUP BY与HAVING

HAVING在聚合后过滤分组。

sql
-- 产品数超过10的类别
SELECT category, COUNT(*) AS product_count
FROM products
GROUP BY category
HAVING COUNT(*) > 10;

-- 消费超过1000元的用户
SELECT user_id, SUM(total) AS total_spent
FROM orders
GROUP BY user_id
HAVING SUM(total) > 1000;

-- 平均价格超过50元的类别
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 50;

-- 多个HAVING条件
SELECT 
    category,
    COUNT(*) AS product_count,
    AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING COUNT(*) > 5 AND AVG(price) < 100;

GROUP BY与ORDER BY

sql
-- 分组并按计数排序
SELECT category, COUNT(*) AS product_count
FROM products
GROUP BY category
ORDER BY product_count DESC;

-- 分组并按聚合排序
SELECT 
    user_id,
    SUM(total) AS total_spent
FROM orders
GROUP BY user_id
ORDER BY total_spent DESC
LIMIT 10;

-- 按多个聚合排序
SELECT 
    category,
    COUNT(*) AS product_count,
    AVG(price) AS avg_price
FROM products
GROUP BY category
ORDER BY product_count DESC, avg_price DESC;

GROUP BY与JOIN

sql
-- 分组连接的表
SELECT 
    u.name,
    COUNT(o.id) AS order_count,
    SUM(o.total) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;

-- 多个连接的分组
SELECT 
    c.category_name,
    COUNT(DISTINCT p.id) AS product_count,
    COUNT(DISTINCT oi.order_id) AS order_count,
    SUM(oi.quantity) AS total_sold
FROM categories c
LEFT JOIN products p ON c.id = p.category_id
LEFT JOIN order_items oi ON p.id = oi.product_id
GROUP BY c.id, c.category_name;

GROUP BY与表达式

sql
-- 按计算值分组
SELECT 
    EXTRACT(YEAR FROM order_date) AS year,
    COUNT(*) AS order_count
FROM orders
GROUP BY EXTRACT(YEAR FROM order_date);

-- 按日期截断分组
SELECT 
    DATE_TRUNC('month', order_date) AS month,
    SUM(total) AS monthly_revenue
FROM orders
GROUP BY DATE_TRUNC('month', order_date)
ORDER BY month;

-- 按CASE表达式分组
SELECT 
    CASE 
        WHEN price < 10 THEN '经济型'
        WHEN price < 50 THEN '中档'
        ELSE '高档'
    END AS price_category,
    COUNT(*) AS product_count
FROM products
GROUP BY 
    CASE 
        WHEN price < 10 THEN '经济型'
        WHEN price < 50 THEN '中档'
        ELSE '高档'
    END;

GROUP BY与日期/时间

sql
-- 按年分组
SELECT 
    EXTRACT(YEAR FROM order_date) AS year,
    COUNT(*) AS order_count,
    SUM(total) AS total_revenue
FROM orders
GROUP BY EXTRACT(YEAR FROM order_date);

-- 按月分组
SELECT 
    TO_CHAR(order_date, 'YYYY-MM') AS month,
    COUNT(*) AS order_count
FROM orders
GROUP BY TO_CHAR(order_date, 'YYYY-MM')
ORDER BY month;

-- 按星期几分组
SELECT 
    TO_CHAR(order_date, 'Day') AS day_of_week,
    COUNT(*) AS order_count
FROM orders
GROUP BY TO_CHAR(order_date, 'Day'), EXTRACT(DOW FROM order_date)
ORDER BY EXTRACT(DOW FROM order_date);

-- 按小时分组
SELECT 
    EXTRACT(HOUR FROM created_at) AS hour,
    COUNT(*) AS event_count
FROM events
GROUP BY EXTRACT(HOUR FROM created_at)
ORDER BY hour;

GROUPING SETS

GROUPING SETS允许在单个查询中指定多个分组集。

sql
-- 多个分组级别
SELECT 
    category,
    brand,
    COUNT(*) AS product_count
FROM products
GROUP BY GROUPING SETS (
    (category, brand),
    (category),
    ()
);

-- 等同于多个GROUP BY查询的UNION
SELECT category, brand, COUNT(*) FROM products GROUP BY category, brand
UNION ALL
SELECT category, NULL, COUNT(*) FROM products GROUP BY category
UNION ALL
SELECT NULL, NULL, COUNT(*) FROM products;

ROLLUP

ROLLUP创建小计和总计。

sql
-- 层次总计
SELECT 
    category,
    brand,
    COUNT(*) AS product_count,
    SUM(price) AS total_value
FROM products
GROUP BY ROLLUP (category, brand);

-- 年和月的汇总
SELECT 
    EXTRACT(YEAR FROM order_date) AS year,
    EXTRACT(MONTH FROM order_date) AS month,
    SUM(total) AS revenue
FROM orders
GROUP BY ROLLUP (
    EXTRACT(YEAR FROM order_date),
    EXTRACT(MONTH FROM order_date)
);

CUBE

CUBE创建所有可能的分组组合。

sql
-- 所有组合
SELECT 
    category,
    brand,
    COUNT(*) AS product_count
FROM products
GROUP BY CUBE (category, brand);

-- 结果包括:
-- (category, brand)
-- (category, NULL)
-- (NULL, brand)
-- (NULL, NULL)

GROUPING函数

GROUPING函数标识哪些列被聚合。

sql
-- 识别聚合行
SELECT 
    category,
    brand,
    COUNT(*) AS product_count,
    GROUPING(category) AS is_category_total,
    GROUPING(brand) AS is_brand_total
FROM products
GROUP BY ROLLUP (category, brand);

-- 使用GROUPING添加标签
SELECT 
    CASE WHEN GROUPING(category) = 1 THEN '所有类别' ELSE category END AS category,
    CASE WHEN GROUPING(brand) = 1 THEN '所有品牌' ELSE brand END AS brand,
    COUNT(*) AS product_count
FROM products
GROUP BY ROLLUP (category, brand);

GROUP BY与子查询

sql
-- 按子查询结果分组
SELECT 
    price_range,
    COUNT(*) AS product_count
FROM (
    SELECT 
        CASE 
            WHEN price < 10 THEN '经济型'
            WHEN price < 50 THEN '中档'
            ELSE '高档'
        END AS price_range
    FROM products
) AS categorized
GROUP BY price_range;

-- 聚合的聚合
SELECT 
    AVG(order_count) AS avg_orders_per_user
FROM (
    SELECT user_id, COUNT(*) AS order_count
    FROM orders
    GROUP BY user_id
) AS user_orders;

性能考虑

sql
-- 在GROUP BY列上创建索引
CREATE INDEX idx_products_category ON products(category);
CREATE INDEX idx_orders_user_id ON orders(user_id);

-- 使用EXPLAIN分析
EXPLAIN ANALYZE
SELECT category, COUNT(*)
FROM products
GROUP BY category;

-- 尽可能避免在表达式上分组
-- 不好:GROUP BY UPPER(category)
-- 好:规范化数据,然后GROUP BY category

常见模式

每组的前N条

sql
-- 每个类别价格最高的3个产品
WITH ranked AS (
    SELECT 
        *,
        ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) AS rank
    FROM products
)
SELECT * FROM ranked WHERE rank <= 3;

累计总计

sql
-- 按日期的累计总计
SELECT 
    order_date,
    SUM(total) AS daily_total,
    SUM(SUM(total)) OVER (ORDER BY order_date) AS running_total
FROM orders
GROUP BY order_date
ORDER BY order_date;

占总数的百分比

sql
-- 类别百分比
SELECT 
    category,
    COUNT(*) AS product_count,
    ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 2) AS percentage
FROM products
GROUP BY category;

最佳实践

  1. 在GROUP BY中包含所有非聚合列

    sql
    -- 好
    SELECT category, brand, COUNT(*)
    FROM products
    GROUP BY category, brand;
    
    -- 错误:brand不在GROUP BY中
    SELECT category, brand, COUNT(*)
    FROM products
    GROUP BY category;
  2. 对聚合条件使用HAVING

    sql
    -- 好:对聚合使用HAVING
    SELECT category, COUNT(*)
    FROM products
    GROUP BY category
    HAVING COUNT(*) > 10;
    
    -- 好:对行过滤使用WHERE
    SELECT category, COUNT(*)
    FROM products
    WHERE price > 10
    GROUP BY category;
  3. 执行顺序

    FROM → WHERE → GROUP BY → HAVING → SELECT → ORDER BY → LIMIT
  4. 使用有意义的别名

    sql
    SELECT 
        category,
        COUNT(*) AS product_count,
        AVG(price) AS average_price
    FROM products
    GROUP BY category;

小结

GROUP BY子句要点:

  • 分组行:具有相同值的行
  • 与聚合一起使用:COUNT、SUM、AVG、MIN、MAX
  • WHERE:分组前过滤
  • HAVING:分组后过滤
  • GROUPING SETS:多个分组
  • ROLLUP:层次总计
  • CUBE:所有组合
  • 性能:为GROUP BY列创建索引