PostgreSQL GROUP BY
概述
GROUP BY子句将具有相同值的行分组到汇总行中。它通常与聚合函数(COUNT、SUM、AVG、MAX、MIN)一起使用,对每组行执行计算。
基本语法
sql
SELECT column1, aggregate_function(column2)
FROM table_name
GROUP BY column1;基本GROUP BY
单列分组
sql
-- 按国家统计用户数
SELECT country, COUNT(*) AS user_count
FROM users
GROUP BY country;
-- 按用户汇总订单
SELECT user_id, SUM(total) AS total_spent
FROM orders
GROUP BY user_id;
-- 按类别计算平均价格
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category;多列分组
sql
-- 按国家和城市分组
SELECT country, city, COUNT(*) AS user_count
FROM users
GROUP BY country, city;
-- 按多列分组
SELECT
category,
brand,
COUNT(*) AS product_count,
AVG(price) AS avg_price
FROM products
GROUP BY category, brand;
-- 按状态和日期分组
SELECT
status,
DATE(created_at) AS order_date,
COUNT(*) AS order_count
FROM orders
GROUP BY status, DATE(created_at);聚合函数与GROUP BY
COUNT
sql
-- 每个类别的产品数
SELECT category, COUNT(*) AS product_count
FROM products
GROUP BY category;
-- 统计不同的值
SELECT category, COUNT(DISTINCT brand) AS brand_count
FROM products
GROUP BY category;
-- 统计非NULL值
SELECT category, COUNT(description) AS products_with_description
FROM products
GROUP BY category;SUM
sql
-- 按产品的总销量
SELECT product_id, SUM(quantity) AS total_sold
FROM order_items
GROUP BY product_id;
-- 按类别的总收入
SELECT
category,
SUM(price * quantity) AS total_revenue
FROM products p
JOIN order_items oi ON p.id = oi.product_id
GROUP BY category;AVG
sql
-- 按类别的平均价格
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category;
-- 按用户的平均订单价值
SELECT user_id, AVG(total) AS avg_order_value
FROM orders
GROUP BY user_id;
-- 带四舍五入的平均值
SELECT
category,
ROUND(AVG(price), 2) AS avg_price
FROM products
GROUP BY category;MIN和MAX
sql
-- 按类别的价格范围
SELECT
category,
MIN(price) AS min_price,
MAX(price) AS max_price,
MAX(price) - MIN(price) AS price_range
FROM products
GROUP BY category;
-- 首次和最后订单日期
SELECT
user_id,
MIN(order_date) AS first_order,
MAX(order_date) AS last_order
FROM orders
GROUP BY user_id;多个聚合
sql
-- 综合统计
SELECT
category,
COUNT(*) AS product_count,
SUM(stock) AS total_stock,
AVG(price) AS avg_price,
MIN(price) AS min_price,
MAX(price) AS max_price,
STDDEV(price) AS price_stddev
FROM products
GROUP BY category;GROUP BY与WHERE
WHERE在分组前过滤行。
sql
-- 只分组活跃用户
SELECT country, COUNT(*) AS active_users
FROM users
WHERE status = 'active'
GROUP BY country;
-- 分组2024年的订单
SELECT
user_id,
COUNT(*) AS order_count,
SUM(total) AS total_spent
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY user_id;
-- 过滤和分组
SELECT
category,
AVG(price) AS avg_price
FROM products
WHERE stock > 0 AND price > 10
GROUP BY category;GROUP BY与HAVING
HAVING在聚合后过滤分组。
sql
-- 产品数超过10的类别
SELECT category, COUNT(*) AS product_count
FROM products
GROUP BY category
HAVING COUNT(*) > 10;
-- 消费超过1000元的用户
SELECT user_id, SUM(total) AS total_spent
FROM orders
GROUP BY user_id
HAVING SUM(total) > 1000;
-- 平均价格超过50元的类别
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 50;
-- 多个HAVING条件
SELECT
category,
COUNT(*) AS product_count,
AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING COUNT(*) > 5 AND AVG(price) < 100;GROUP BY与ORDER BY
sql
-- 分组并按计数排序
SELECT category, COUNT(*) AS product_count
FROM products
GROUP BY category
ORDER BY product_count DESC;
-- 分组并按聚合排序
SELECT
user_id,
SUM(total) AS total_spent
FROM orders
GROUP BY user_id
ORDER BY total_spent DESC
LIMIT 10;
-- 按多个聚合排序
SELECT
category,
COUNT(*) AS product_count,
AVG(price) AS avg_price
FROM products
GROUP BY category
ORDER BY product_count DESC, avg_price DESC;GROUP BY与JOIN
sql
-- 分组连接的表
SELECT
u.name,
COUNT(o.id) AS order_count,
SUM(o.total) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;
-- 多个连接的分组
SELECT
c.category_name,
COUNT(DISTINCT p.id) AS product_count,
COUNT(DISTINCT oi.order_id) AS order_count,
SUM(oi.quantity) AS total_sold
FROM categories c
LEFT JOIN products p ON c.id = p.category_id
LEFT JOIN order_items oi ON p.id = oi.product_id
GROUP BY c.id, c.category_name;GROUP BY与表达式
sql
-- 按计算值分组
SELECT
EXTRACT(YEAR FROM order_date) AS year,
COUNT(*) AS order_count
FROM orders
GROUP BY EXTRACT(YEAR FROM order_date);
-- 按日期截断分组
SELECT
DATE_TRUNC('month', order_date) AS month,
SUM(total) AS monthly_revenue
FROM orders
GROUP BY DATE_TRUNC('month', order_date)
ORDER BY month;
-- 按CASE表达式分组
SELECT
CASE
WHEN price < 10 THEN '经济型'
WHEN price < 50 THEN '中档'
ELSE '高档'
END AS price_category,
COUNT(*) AS product_count
FROM products
GROUP BY
CASE
WHEN price < 10 THEN '经济型'
WHEN price < 50 THEN '中档'
ELSE '高档'
END;GROUP BY与日期/时间
sql
-- 按年分组
SELECT
EXTRACT(YEAR FROM order_date) AS year,
COUNT(*) AS order_count,
SUM(total) AS total_revenue
FROM orders
GROUP BY EXTRACT(YEAR FROM order_date);
-- 按月分组
SELECT
TO_CHAR(order_date, 'YYYY-MM') AS month,
COUNT(*) AS order_count
FROM orders
GROUP BY TO_CHAR(order_date, 'YYYY-MM')
ORDER BY month;
-- 按星期几分组
SELECT
TO_CHAR(order_date, 'Day') AS day_of_week,
COUNT(*) AS order_count
FROM orders
GROUP BY TO_CHAR(order_date, 'Day'), EXTRACT(DOW FROM order_date)
ORDER BY EXTRACT(DOW FROM order_date);
-- 按小时分组
SELECT
EXTRACT(HOUR FROM created_at) AS hour,
COUNT(*) AS event_count
FROM events
GROUP BY EXTRACT(HOUR FROM created_at)
ORDER BY hour;GROUPING SETS
GROUPING SETS允许在单个查询中指定多个分组集。
sql
-- 多个分组级别
SELECT
category,
brand,
COUNT(*) AS product_count
FROM products
GROUP BY GROUPING SETS (
(category, brand),
(category),
()
);
-- 等同于多个GROUP BY查询的UNION
SELECT category, brand, COUNT(*) FROM products GROUP BY category, brand
UNION ALL
SELECT category, NULL, COUNT(*) FROM products GROUP BY category
UNION ALL
SELECT NULL, NULL, COUNT(*) FROM products;ROLLUP
ROLLUP创建小计和总计。
sql
-- 层次总计
SELECT
category,
brand,
COUNT(*) AS product_count,
SUM(price) AS total_value
FROM products
GROUP BY ROLLUP (category, brand);
-- 年和月的汇总
SELECT
EXTRACT(YEAR FROM order_date) AS year,
EXTRACT(MONTH FROM order_date) AS month,
SUM(total) AS revenue
FROM orders
GROUP BY ROLLUP (
EXTRACT(YEAR FROM order_date),
EXTRACT(MONTH FROM order_date)
);CUBE
CUBE创建所有可能的分组组合。
sql
-- 所有组合
SELECT
category,
brand,
COUNT(*) AS product_count
FROM products
GROUP BY CUBE (category, brand);
-- 结果包括:
-- (category, brand)
-- (category, NULL)
-- (NULL, brand)
-- (NULL, NULL)GROUPING函数
GROUPING函数标识哪些列被聚合。
sql
-- 识别聚合行
SELECT
category,
brand,
COUNT(*) AS product_count,
GROUPING(category) AS is_category_total,
GROUPING(brand) AS is_brand_total
FROM products
GROUP BY ROLLUP (category, brand);
-- 使用GROUPING添加标签
SELECT
CASE WHEN GROUPING(category) = 1 THEN '所有类别' ELSE category END AS category,
CASE WHEN GROUPING(brand) = 1 THEN '所有品牌' ELSE brand END AS brand,
COUNT(*) AS product_count
FROM products
GROUP BY ROLLUP (category, brand);GROUP BY与子查询
sql
-- 按子查询结果分组
SELECT
price_range,
COUNT(*) AS product_count
FROM (
SELECT
CASE
WHEN price < 10 THEN '经济型'
WHEN price < 50 THEN '中档'
ELSE '高档'
END AS price_range
FROM products
) AS categorized
GROUP BY price_range;
-- 聚合的聚合
SELECT
AVG(order_count) AS avg_orders_per_user
FROM (
SELECT user_id, COUNT(*) AS order_count
FROM orders
GROUP BY user_id
) AS user_orders;性能考虑
sql
-- 在GROUP BY列上创建索引
CREATE INDEX idx_products_category ON products(category);
CREATE INDEX idx_orders_user_id ON orders(user_id);
-- 使用EXPLAIN分析
EXPLAIN ANALYZE
SELECT category, COUNT(*)
FROM products
GROUP BY category;
-- 尽可能避免在表达式上分组
-- 不好:GROUP BY UPPER(category)
-- 好:规范化数据,然后GROUP BY category常见模式
每组的前N条
sql
-- 每个类别价格最高的3个产品
WITH ranked AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) AS rank
FROM products
)
SELECT * FROM ranked WHERE rank <= 3;累计总计
sql
-- 按日期的累计总计
SELECT
order_date,
SUM(total) AS daily_total,
SUM(SUM(total)) OVER (ORDER BY order_date) AS running_total
FROM orders
GROUP BY order_date
ORDER BY order_date;占总数的百分比
sql
-- 类别百分比
SELECT
category,
COUNT(*) AS product_count,
ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 2) AS percentage
FROM products
GROUP BY category;最佳实践
在GROUP BY中包含所有非聚合列
sql-- 好 SELECT category, brand, COUNT(*) FROM products GROUP BY category, brand; -- 错误:brand不在GROUP BY中 SELECT category, brand, COUNT(*) FROM products GROUP BY category;对聚合条件使用HAVING
sql-- 好:对聚合使用HAVING SELECT category, COUNT(*) FROM products GROUP BY category HAVING COUNT(*) > 10; -- 好:对行过滤使用WHERE SELECT category, COUNT(*) FROM products WHERE price > 10 GROUP BY category;执行顺序
FROM → WHERE → GROUP BY → HAVING → SELECT → ORDER BY → LIMIT使用有意义的别名
sqlSELECT category, COUNT(*) AS product_count, AVG(price) AS average_price FROM products GROUP BY category;
小结
GROUP BY子句要点:
- 分组行:具有相同值的行
- 与聚合一起使用:COUNT、SUM、AVG、MIN、MAX
- WHERE:分组前过滤
- HAVING:分组后过滤
- GROUPING SETS:多个分组
- ROLLUP:层次总计
- CUBE:所有组合
- 性能:为GROUP BY列创建索引