GROUP BY 分组
GROUP BY 子句用于将查询结果按一个或多个列进行分组,通常与聚合函数一起使用。本章将详细介绍 GROUP BY 的使用方法。
基本语法
sql
SELECT column1, aggregate_function(column2)
FROM table_name
GROUP BY column1;简单分组
sql
-- 统计每个城市的用户数
SELECT city, COUNT(*) AS user_count
FROM users
GROUP BY city;结果:
| city | user_count |
|---|---|
| 北京 | 150 |
| 上海 | 120 |
| 广州 | 80 |
与聚合函数配合
COUNT - 计数
sql
-- 每个城市的用户数
SELECT city, COUNT(*) AS user_count
FROM users
GROUP BY city;SUM - 求和
sql
-- 每个用户的订单总金额
SELECT
user_id,
SUM(total_amount) AS total_spent
FROM orders
GROUP BY user_id;AVG - 平均值
sql
-- 每个城市用户的平均年龄
SELECT
city,
AVG(age) AS avg_age
FROM users
GROUP BY city;MAX/MIN - 最大/最小值
sql
-- 每个分类中最贵和最便宜的商品
SELECT
category,
MAX(price) AS max_price,
MIN(price) AS min_price
FROM products
GROUP BY category;多列分组
sql
-- 按城市和性别分组
SELECT
city,
gender,
COUNT(*) AS user_count
FROM users
GROUP BY city, gender
ORDER BY city, gender;结果:
| city | gender | user_count |
|---|---|---|
| 北京 | 男 | 80 |
| 北京 | 女 | 70 |
| 上海 | 男 | 65 |
| 上海 | 女 | 55 |
HAVING 子句
HAVING 用于过滤分组后的结果(WHERE 用于过滤分组前的行)。
基本用法
sql
-- 查找用户数超过 100 的城市
SELECT
city,
COUNT(*) AS user_count
FROM users
GROUP BY city
HAVING COUNT(*) > 100;WHERE vs HAVING
sql
-- WHERE:在分组前过滤
SELECT city, COUNT(*) AS user_count
FROM users
WHERE age >= 18 -- 先过滤年龄
GROUP BY city
HAVING COUNT(*) > 50; -- 再过滤分组结果执行顺序:
- FROM - 选择表
- WHERE - 过滤行
- GROUP BY - 分组
- HAVING - 过滤分组
- SELECT - 选择列
- ORDER BY - 排序
多个聚合函数
sql
-- 每个用户的订单统计
SELECT
user_id,
COUNT(*) AS order_count,
SUM(total_amount) AS total_spent,
AVG(total_amount) AS avg_order_value,
MAX(total_amount) AS max_order,
MIN(total_amount) AS min_order
FROM orders
GROUP BY user_id;按表达式分组
sql
-- 按年份分组统计订单
SELECT
YEAR(order_date) AS year,
COUNT(*) AS order_count,
SUM(total_amount) AS total_sales
FROM orders
GROUP BY YEAR(order_date)
ORDER BY year;GROUP_CONCAT(MySQL)
sql
-- 将分组内的值连接成字符串
SELECT
user_id,
GROUP_CONCAT(product_name) AS products
FROM order_items
GROUP BY user_id;WITH ROLLUP
sql
-- 添加小计和总计行
SELECT
city,
gender,
COUNT(*) AS user_count
FROM users
GROUP BY city, gender WITH ROLLUP;实战示例
示例 1:销售统计
sql
-- 按月统计销售额
SELECT
DATE_FORMAT(order_date, '%Y-%m') AS month,
COUNT(*) AS order_count,
SUM(total_amount) AS total_sales,
AVG(total_amount) AS avg_order_value
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY DATE_FORMAT(order_date, '%Y-%m')
ORDER BY month;示例 2:用户行为分析
sql
-- 分析用户购买行为
SELECT
u.id,
u.username,
COUNT(o.id) AS order_count,
SUM(o.total_amount) AS total_spent,
AVG(o.total_amount) AS avg_order_value,
MAX(o.order_date) AS last_order_date
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.username
HAVING order_count > 0
ORDER BY total_spent DESC
LIMIT 20;示例 3:商品分类统计
sql
-- 统计每个分类的商品信息
SELECT
category,
COUNT(*) AS product_count,
AVG(price) AS avg_price,
SUM(stock) AS total_stock,
SUM(sales_count) AS total_sales
FROM products
WHERE is_active = TRUE
GROUP BY category
HAVING product_count >= 5
ORDER BY total_sales DESC;示例 4:时间段分析
sql
-- 按小时统计订单分布
SELECT
HOUR(created_at) AS hour,
COUNT(*) AS order_count,
SUM(total_amount) AS total_sales
FROM orders
WHERE DATE(created_at) = CURDATE()
GROUP BY HOUR(created_at)
ORDER BY hour;常见错误
错误 1:SELECT 中包含非分组列
sql
-- 错误:name 不在 GROUP BY 中
SELECT city, name, COUNT(*)
FROM users
GROUP BY city;
-- 正确:只选择分组列或聚合函数
SELECT city, COUNT(*)
FROM users
GROUP BY city;
-- 或者将 name 也加入 GROUP BY
SELECT city, name, COUNT(*)
FROM users
GROUP BY city, name;错误 2:WHERE 中使用聚合函数
sql
-- 错误:WHERE 不能使用聚合函数
SELECT city, COUNT(*) AS user_count
FROM users
WHERE COUNT(*) > 100
GROUP BY city;
-- 正确:使用 HAVING
SELECT city, COUNT(*) AS user_count
FROM users
GROUP BY city
HAVING COUNT(*) > 100;错误 3:混淆 WHERE 和 HAVING
sql
-- 不推荐:在 HAVING 中过滤非聚合条件
SELECT city, COUNT(*)
FROM users
GROUP BY city
HAVING city = '北京';
-- 推荐:在 WHERE 中过滤
SELECT city, COUNT(*)
FROM users
WHERE city = '北京'
GROUP BY city;性能优化
1. 为分组列创建索引
sql
-- 为常用分组列创建索引
CREATE INDEX idx_city ON users(city);
CREATE INDEX idx_category ON products(category);2. 先过滤再分组
sql
-- 好:先用 WHERE 过滤
SELECT city, COUNT(*)
FROM users
WHERE status = 'active'
GROUP BY city;
-- 不好:分组后再过滤
SELECT city, COUNT(*)
FROM users
GROUP BY city
HAVING city IN ('北京', '上海');3. 使用覆盖索引
sql
-- 创建覆盖索引
CREATE INDEX idx_city_age ON users(city, age);
-- 查询只使用索引列
SELECT city, COUNT(*), AVG(age)
FROM users
GROUP BY city;高级用法
条件聚合
sql
-- 使用 CASE 进行条件聚合
SELECT
city,
COUNT(*) AS total_users,
SUM(CASE WHEN age < 30 THEN 1 ELSE 0 END) AS young_users,
SUM(CASE WHEN age >= 30 THEN 1 ELSE 0 END) AS older_users
FROM users
GROUP BY city;多维分组
sql
-- 按多个维度分组
SELECT
YEAR(order_date) AS year,
MONTH(order_date) AS month,
status,
COUNT(*) AS order_count,
SUM(total_amount) AS total_sales
FROM orders
GROUP BY YEAR(order_date), MONTH(order_date), status
ORDER BY year, month, status;最佳实践
- 只在 SELECT 中使用分组列或聚合函数
- 使用 WHERE 过滤行,使用 HAVING 过滤分组
- 为分组列创建索引
- 先过滤再分组以提高性能
- 使用有意义的别名
小结
- GROUP BY:按列分组数据
- 聚合函数:COUNT、SUM、AVG、MAX、MIN
- HAVING:过滤分组后的结果
- 多列分组:按多个列分组
- 性能优化:创建索引、先过滤再分组
下一步: 学习 ALTER 修改表