Skip to content

GROUP BY 分组

GROUP BY 子句用于将查询结果按一个或多个列进行分组,通常与聚合函数一起使用。本章将详细介绍 GROUP BY 的使用方法。

基本语法

sql
SELECT column1, aggregate_function(column2)
FROM table_name
GROUP BY column1;

简单分组

sql
-- 统计每个城市的用户数
SELECT city, COUNT(*) AS user_count
FROM users
GROUP BY city;

结果:

cityuser_count
北京150
上海120
广州80

与聚合函数配合

COUNT - 计数

sql
-- 每个城市的用户数
SELECT city, COUNT(*) AS user_count
FROM users
GROUP BY city;

SUM - 求和

sql
-- 每个用户的订单总金额
SELECT 
    user_id,
    SUM(total_amount) AS total_spent
FROM orders
GROUP BY user_id;

AVG - 平均值

sql
-- 每个城市用户的平均年龄
SELECT 
    city,
    AVG(age) AS avg_age
FROM users
GROUP BY city;

MAX/MIN - 最大/最小值

sql
-- 每个分类中最贵和最便宜的商品
SELECT 
    category,
    MAX(price) AS max_price,
    MIN(price) AS min_price
FROM products
GROUP BY category;

多列分组

sql
-- 按城市和性别分组
SELECT 
    city,
    gender,
    COUNT(*) AS user_count
FROM users
GROUP BY city, gender
ORDER BY city, gender;

结果:

citygenderuser_count
北京80
北京70
上海65
上海55

HAVING 子句

HAVING 用于过滤分组后的结果(WHERE 用于过滤分组前的行)。

基本用法

sql
-- 查找用户数超过 100 的城市
SELECT 
    city,
    COUNT(*) AS user_count
FROM users
GROUP BY city
HAVING COUNT(*) > 100;

WHERE vs HAVING

sql
-- WHERE:在分组前过滤
SELECT city, COUNT(*) AS user_count
FROM users
WHERE age >= 18  -- 先过滤年龄
GROUP BY city
HAVING COUNT(*) > 50;  -- 再过滤分组结果

执行顺序:

  1. FROM - 选择表
  2. WHERE - 过滤行
  3. GROUP BY - 分组
  4. HAVING - 过滤分组
  5. SELECT - 选择列
  6. ORDER BY - 排序

多个聚合函数

sql
-- 每个用户的订单统计
SELECT 
    user_id,
    COUNT(*) AS order_count,
    SUM(total_amount) AS total_spent,
    AVG(total_amount) AS avg_order_value,
    MAX(total_amount) AS max_order,
    MIN(total_amount) AS min_order
FROM orders
GROUP BY user_id;

按表达式分组

sql
-- 按年份分组统计订单
SELECT 
    YEAR(order_date) AS year,
    COUNT(*) AS order_count,
    SUM(total_amount) AS total_sales
FROM orders
GROUP BY YEAR(order_date)
ORDER BY year;

GROUP_CONCAT(MySQL)

sql
-- 将分组内的值连接成字符串
SELECT 
    user_id,
    GROUP_CONCAT(product_name) AS products
FROM order_items
GROUP BY user_id;

WITH ROLLUP

sql
-- 添加小计和总计行
SELECT 
    city,
    gender,
    COUNT(*) AS user_count
FROM users
GROUP BY city, gender WITH ROLLUP;

实战示例

示例 1:销售统计

sql
-- 按月统计销售额
SELECT 
    DATE_FORMAT(order_date, '%Y-%m') AS month,
    COUNT(*) AS order_count,
    SUM(total_amount) AS total_sales,
    AVG(total_amount) AS avg_order_value
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY DATE_FORMAT(order_date, '%Y-%m')
ORDER BY month;

示例 2:用户行为分析

sql
-- 分析用户购买行为
SELECT 
    u.id,
    u.username,
    COUNT(o.id) AS order_count,
    SUM(o.total_amount) AS total_spent,
    AVG(o.total_amount) AS avg_order_value,
    MAX(o.order_date) AS last_order_date
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.username
HAVING order_count > 0
ORDER BY total_spent DESC
LIMIT 20;

示例 3:商品分类统计

sql
-- 统计每个分类的商品信息
SELECT 
    category,
    COUNT(*) AS product_count,
    AVG(price) AS avg_price,
    SUM(stock) AS total_stock,
    SUM(sales_count) AS total_sales
FROM products
WHERE is_active = TRUE
GROUP BY category
HAVING product_count >= 5
ORDER BY total_sales DESC;

示例 4:时间段分析

sql
-- 按小时统计订单分布
SELECT 
    HOUR(created_at) AS hour,
    COUNT(*) AS order_count,
    SUM(total_amount) AS total_sales
FROM orders
WHERE DATE(created_at) = CURDATE()
GROUP BY HOUR(created_at)
ORDER BY hour;

常见错误

错误 1:SELECT 中包含非分组列

sql
-- 错误:name 不在 GROUP BY 中
SELECT city, name, COUNT(*)
FROM users
GROUP BY city;

-- 正确:只选择分组列或聚合函数
SELECT city, COUNT(*)
FROM users
GROUP BY city;

-- 或者将 name 也加入 GROUP BY
SELECT city, name, COUNT(*)
FROM users
GROUP BY city, name;

错误 2:WHERE 中使用聚合函数

sql
-- 错误:WHERE 不能使用聚合函数
SELECT city, COUNT(*) AS user_count
FROM users
WHERE COUNT(*) > 100
GROUP BY city;

-- 正确:使用 HAVING
SELECT city, COUNT(*) AS user_count
FROM users
GROUP BY city
HAVING COUNT(*) > 100;

错误 3:混淆 WHERE 和 HAVING

sql
-- 不推荐:在 HAVING 中过滤非聚合条件
SELECT city, COUNT(*)
FROM users
GROUP BY city
HAVING city = '北京';

-- 推荐:在 WHERE 中过滤
SELECT city, COUNT(*)
FROM users
WHERE city = '北京'
GROUP BY city;

性能优化

1. 为分组列创建索引

sql
-- 为常用分组列创建索引
CREATE INDEX idx_city ON users(city);
CREATE INDEX idx_category ON products(category);

2. 先过滤再分组

sql
-- 好:先用 WHERE 过滤
SELECT city, COUNT(*)
FROM users
WHERE status = 'active'
GROUP BY city;

-- 不好:分组后再过滤
SELECT city, COUNT(*)
FROM users
GROUP BY city
HAVING city IN ('北京', '上海');

3. 使用覆盖索引

sql
-- 创建覆盖索引
CREATE INDEX idx_city_age ON users(city, age);

-- 查询只使用索引列
SELECT city, COUNT(*), AVG(age)
FROM users
GROUP BY city;

高级用法

条件聚合

sql
-- 使用 CASE 进行条件聚合
SELECT 
    city,
    COUNT(*) AS total_users,
    SUM(CASE WHEN age < 30 THEN 1 ELSE 0 END) AS young_users,
    SUM(CASE WHEN age >= 30 THEN 1 ELSE 0 END) AS older_users
FROM users
GROUP BY city;

多维分组

sql
-- 按多个维度分组
SELECT 
    YEAR(order_date) AS year,
    MONTH(order_date) AS month,
    status,
    COUNT(*) AS order_count,
    SUM(total_amount) AS total_sales
FROM orders
GROUP BY YEAR(order_date), MONTH(order_date), status
ORDER BY year, month, status;

最佳实践

  1. 只在 SELECT 中使用分组列或聚合函数
  2. 使用 WHERE 过滤行,使用 HAVING 过滤分组
  3. 为分组列创建索引
  4. 先过滤再分组以提高性能
  5. 使用有意义的别名

小结

  • GROUP BY:按列分组数据
  • 聚合函数:COUNT、SUM、AVG、MAX、MIN
  • HAVING:过滤分组后的结果
  • 多列分组:按多个列分组
  • 性能优化:创建索引、先过滤再分组

下一步: 学习 ALTER 修改表