MySQL GROUP BY
Overview
The GROUP BY clause groups rows that have the same values into summary rows. It is often used with aggregate functions like COUNT, SUM, AVG, MIN, MAX to produce grouped summaries.
GROUP BY Syntax
sql
SELECT column1, aggregate_function(column2)
FROM table_name
WHERE condition
GROUP BY column1
[HAVING condition];Basic GROUP BY
Single Column
sql
-- Group by single column
SELECT status, COUNT(*) FROM orders GROUP BY status;
-- Group by country
SELECT country, COUNT(*) FROM users GROUP BY country;
-- Count per category
SELECT category_id, COUNT(*) AS product_count FROM products GROUP BY category_id;Multiple Columns
sql
-- Group by multiple columns
SELECT status, country, COUNT(*) FROM users GROUP BY status, country;
-- Detailed grouping
SELECT
category,
subcategory,
COUNT(*) AS product_count,
AVG(price) AS avg_price
FROM products
GROUP BY category, subcategory;Aggregate Functions
COUNT
sql
-- Count all rows
SELECT COUNT(*) FROM orders;
-- Count non-NULL values
SELECT COUNT(shipping_date) FROM orders;
-- Count distinct values
SELECT COUNT(DISTINCT country) FROM users;
-- Count per group
SELECT category, COUNT(*) FROM products GROUP BY category;SUM
sql
-- Sum of values
SELECT SUM(quantity) FROM order_items;
-- Sum per group
SELECT user_id, SUM(total) FROM orders GROUP BY user_id;
-- Sum with condition
SELECT
status,
SUM(CASE WHEN type = 'online' THEN amount ELSE 0 END) AS online_total,
SUM(CASE WHEN type = 'offline' THEN amount ELSE 0 END) AS offline_total
FROM payments
GROUP BY status;AVG
sql
-- Average value
SELECT AVG(price) FROM products;
-- Average per group
SELECT category, AVG(price) FROM products GROUP BY category;
-- Round average
SELECT category, ROUND(AVG(price), 2) FROM products GROUP BY category;MIN and MAX
sql
-- Minimum value
SELECT MIN(price) FROM products;
SELECT MIN(created_at) FROM orders;
-- Maximum value
SELECT MAX(price) FROM products;
SELECT MAX(created_at) FROM orders;
-- First and last date
SELECT
user_id,
MIN(created_at) AS first_order,
MAX(created_at) AS last_order
FROM orders
GROUP BY user_id;HAVING Clause
Basic HAVING
sql
-- Filter groups with HAVING
SELECT user_id, COUNT(*) AS order_count
FROM orders
GROUP BY user_id
HAVING COUNT(*) > 5;
-- Having with aggregate
SELECT
category,
AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 50;HAVING vs WHERE
| Aspect |-------------|-------|--------| | Filters | Rows before grouping | Groups after grouping | | With GROUP BY | Before group | After group | | Aggregate functions | Cannot use | Can use | | Performance | Faster | Slower |
sql
-- WHERE filters rows before grouping
SELECT category, COUNT(*)
FROM products
WHERE price > 10
GROUP BY category;
-- HAVING filters groups after grouping
SELECT category, COUNT(*)
FROM products
GROUP BY category
HAVING COUNT(*) > 10;Complex HAVING
sql
-- Multiple conditions
SELECT
user_id,
COUNT(*) AS orders,
SUM(total) AS revenue
FROM orders
GROUP BY user_id
HAVING COUNT(*) > 5
AND SUM(total) > 1000
AND user_id IS NOT NULL;
-- With OR
SELECT
status,
COUNT(*)
FROM orders
GROUP BY status
HAVING COUNT(*) > 100 OR AVG(total) > 500;GROUP BY with JOIN
sql
-- Group with JOIN
SELECT
c.name AS category,
COUNT(p.id) AS product_count,
AVG(p.price) AS avg_price
FROM categories c
LEFT JOIN products p ON c.id = p.category_id
GROUP BY c.id, c.name;
-- Complex JOIN with GROUP BY
SELECT
u.country,
COUNT(DISTINCT o.id) AS total_orders,
SUM(o.total) AS total_revenue,
AVG(o.total) AS avg_order_value
FROM users u
INNER JOIN orders o ON u.id = o.user_id
GROUP BY u.country;ROLLUP
Basic ROLLUP
sql
-- Generate subtotals and grand total
SELECT
category,
subcategory,
COUNT(*) AS product_count
FROM products
GROUP BY category, subcategory WITH ROLLUP;
-- Result includes:
-- - Details by category and subcategory
-- - Subtotals by category
-- - Grand totalGROUPING() Function
sql
-- Identify NULL from ROLLUP
SELECT
GROUPING(category) AS is_category_total,
GROUPING(subcategory) AS is_subcategory_total,
category,
subcategory,
COUNT(*)
FROM products
GROUP BY category, subcategory WITH ROLLUP;
-- 1 = generated by ROLLUP (supertotal row)
-- 0 = normal grouping rowCUBE
CUBE Syntax
sql
-- Generate all combinations
SELECT
category,
subcategory,
COUNT(*)
FROM products
GROUP BY category, subcategory WITH CUBE;
-- Creates summaries for:
-- - All combinations of category and subcategory
-- - Each category total
-- - Each subcategory total
-- - Grand totalGROUPING SETS
Basic GROUPING SETS
sql
-- Specific groupings
SELECT
category,
subcategory,
brand,
COUNT(*)
FROM products
GROUP BY GROUPING SETS (
(category),
(subcategory),
(brand),
() -- Grand total
);Equivalent ROLLUP and CUBE
sql
-- ROLLUP equivalent
GROUP BY a, b, c WITH ROLLUP
-- Equivalent to:
GROUP BY GROUPING SETS (
(a, b, c),
(a, b),
(a),
()
)
-- CUBE equivalent
GROUP BY a, b, c WITH CUBE
-- Equivalent to:
GROUP BY GROUPING SETS (
(a, b, c),
(a, b),
(a, c),
(b, c),
(a),
(b),
(c),
()
)DISTINCT with GROUP BY
sql
-- Count distinct values per group
SELECT
status,
COUNT(DISTINCT user_id) AS unique_users,
COUNT(*) AS total_orders
FROM orders
GROUP BY status;
-- Complex distinct count
SELECT
DATE(created_at) AS date,
COUNT(DISTINCT user_id) AS active_users,
COUNT(DISTINCT product_id) AS unique_products
FROM orders
GROUP BY DATE(created_at);Practical Examples
Sales Report
sql
-- Daily sales summary
SELECT
DATE(created_at) AS sale_date,
COUNT(*) AS orders,
SUM(total) AS revenue,
AVG(total) AS avg_order
FROM orders
WHERE created_at >= '2024-01-01'
GROUP BY DATE(created_at)
ORDER BY sale_date DESC;User Statistics
sql
-- Users by activity level
SELECT
CASE
WHEN login_count = 0 THEN 'Never'
WHEN login_count < 10 THEN 'Rare'
WHEN login_count < 50 THEN 'Regular'
ELSE 'Frequent'
END AS activity_level,
COUNT(*) AS user_count
FROM users
GROUP BY activity_level;Product Analytics
sql
-- Products by category with stats
SELECT
c.name AS category,
COUNT(p.id) AS products,
MIN(p.price) AS min_price,
MAX(p.price) AS max_price,
ROUND(AVG(p.price), 2) AS avg_price,
SUM(p.stock) AS total_stock
FROM categories c
LEFT JOIN products p ON c.id = p.category_id
GROUP BY c.id, c.name
HAVING COUNT(p.id) > 0
ORDER BY products DESC;Performance Considerations
Index Usage
sql
-- Index on GROUP BY columns helps
CREATE INDEX idx_orders_user_id ON orders(user_id);
SELECT user_id, COUNT(*) FROM orders GROUP BY user_id;
-- Index for WHERE + GROUP BY
CREATE INDEX idx_products_category_price ON products(category, price);
SELECT category, AVG(price) FROM products WHERE price > 10 GROUP BY category;Optimization Tips
sql
-- Filter before grouping
SELECT category, COUNT(*)
FROM products
WHERE is_active = 1 -- Filter first
GROUP BY category;
-- Use appropriate data types
-- Numeric grouping is faster than string grouping
-- Avoid SELECT * with GROUP BY
-- Select only necessary columnsTroubleshooting
Common Issues
sql
-- Column not in GROUP BY or aggregate
-- Error: Expression #X of SELECT list is not in GROUP BY
-- Solution: Add column to GROUP BY or use aggregate
-- Unexpected results
-- Check: GROUP BY columns, aggregate functions
-- Verify: Filter conditions in WHERE vs HAVINGDebugging
sql
-- Check group formation
SELECT category, COUNT(*) FROM products GROUP BY category;
-- Verify aggregate values
SELECT
category,
COUNT(*) AS cnt,
SUM(price) AS total,
AVG(price) AS avg
FROM products
GROUP BY category;Summary
GROUP BY clause provides:
- Grouping: Group rows by column values
- Aggregates: COUNT, SUM, AVG, MIN, MAX
- HAVING: Filter groups after grouping
- Advanced Grouping: ROLLUP, CUBE, GROUPING SETS
- Performance: Use indexes on grouping columns
- Use Cases: Reports, analytics, summaries
Previous: ORDER BY
Next: JOINs