Skip to content

MySQL GROUP BY

Overview

The GROUP BY clause groups rows that have the same values into summary rows. It is often used with aggregate functions like COUNT, SUM, AVG, MIN, MAX to produce grouped summaries.

GROUP BY Syntax

sql
SELECT column1, aggregate_function(column2)
FROM table_name
WHERE condition
GROUP BY column1
[HAVING condition];

Basic GROUP BY

Single Column

sql
-- Group by single column
SELECT status, COUNT(*) FROM orders GROUP BY status;

-- Group by country
SELECT country, COUNT(*) FROM users GROUP BY country;

-- Count per category
SELECT category_id, COUNT(*) AS product_count FROM products GROUP BY category_id;

Multiple Columns

sql
-- Group by multiple columns
SELECT status, country, COUNT(*) FROM users GROUP BY status, country;

-- Detailed grouping
SELECT 
    category, 
    subcategory, 
    COUNT(*) AS product_count,
    AVG(price) AS avg_price
FROM products 
GROUP BY category, subcategory;

Aggregate Functions

COUNT

sql
-- Count all rows
SELECT COUNT(*) FROM orders;

-- Count non-NULL values
SELECT COUNT(shipping_date) FROM orders;

-- Count distinct values
SELECT COUNT(DISTINCT country) FROM users;

-- Count per group
SELECT category, COUNT(*) FROM products GROUP BY category;

SUM

sql
-- Sum of values
SELECT SUM(quantity) FROM order_items;

-- Sum per group
SELECT user_id, SUM(total) FROM orders GROUP BY user_id;

-- Sum with condition
SELECT 
    status,
    SUM(CASE WHEN type = 'online' THEN amount ELSE 0 END) AS online_total,
    SUM(CASE WHEN type = 'offline' THEN amount ELSE 0 END) AS offline_total
FROM payments
GROUP BY status;

AVG

sql
-- Average value
SELECT AVG(price) FROM products;

-- Average per group
SELECT category, AVG(price) FROM products GROUP BY category;

-- Round average
SELECT category, ROUND(AVG(price), 2) FROM products GROUP BY category;

MIN and MAX

sql
-- Minimum value
SELECT MIN(price) FROM products;
SELECT MIN(created_at) FROM orders;

-- Maximum value
SELECT MAX(price) FROM products;
SELECT MAX(created_at) FROM orders;

-- First and last date
SELECT 
    user_id,
    MIN(created_at) AS first_order,
    MAX(created_at) AS last_order
FROM orders
GROUP BY user_id;

HAVING Clause

Basic HAVING

sql
-- Filter groups with HAVING
SELECT user_id, COUNT(*) AS order_count
FROM orders
GROUP BY user_id
HAVING COUNT(*) > 5;

-- Having with aggregate
SELECT 
    category,
    AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 50;

HAVING vs WHERE

| Aspect |-------------|-------|--------| | Filters | Rows before grouping | Groups after grouping | | With GROUP BY | Before group | After group | | Aggregate functions | Cannot use | Can use | | Performance | Faster | Slower |

sql
-- WHERE filters rows before grouping
SELECT category, COUNT(*)
FROM products
WHERE price > 10
GROUP BY category;

-- HAVING filters groups after grouping
SELECT category, COUNT(*)
FROM products
GROUP BY category
HAVING COUNT(*) > 10;

Complex HAVING

sql
-- Multiple conditions
SELECT 
    user_id,
    COUNT(*) AS orders,
    SUM(total) AS revenue
FROM orders
GROUP BY user_id
HAVING COUNT(*) > 5 
   AND SUM(total) > 1000
   AND user_id IS NOT NULL;

-- With OR
SELECT 
    status,
    COUNT(*)
FROM orders
GROUP BY status
HAVING COUNT(*) > 100 OR AVG(total) > 500;

GROUP BY with JOIN

sql
-- Group with JOIN
SELECT 
    c.name AS category,
    COUNT(p.id) AS product_count,
    AVG(p.price) AS avg_price
FROM categories c
LEFT JOIN products p ON c.id = p.category_id
GROUP BY c.id, c.name;

-- Complex JOIN with GROUP BY
SELECT 
    u.country,
    COUNT(DISTINCT o.id) AS total_orders,
    SUM(o.total) AS total_revenue,
    AVG(o.total) AS avg_order_value
FROM users u
INNER JOIN orders o ON u.id = o.user_id
GROUP BY u.country;

ROLLUP

Basic ROLLUP

sql
-- Generate subtotals and grand total
SELECT 
    category,
    subcategory,
    COUNT(*) AS product_count
FROM products
GROUP BY category, subcategory WITH ROLLUP;

-- Result includes:
-- - Details by category and subcategory
-- - Subtotals by category
-- - Grand total

GROUPING() Function

sql
-- Identify NULL from ROLLUP
SELECT 
    GROUPING(category) AS is_category_total,
    GROUPING(subcategory) AS is_subcategory_total,
    category,
    subcategory,
    COUNT(*)
FROM products
GROUP BY category, subcategory WITH ROLLUP;

-- 1 = generated by ROLLUP (supertotal row)
-- 0 = normal grouping row

CUBE

CUBE Syntax

sql
-- Generate all combinations
SELECT 
    category,
    subcategory,
    COUNT(*)
FROM products
GROUP BY category, subcategory WITH CUBE;

-- Creates summaries for:
-- - All combinations of category and subcategory
-- - Each category total
-- - Each subcategory total
-- - Grand total

GROUPING SETS

Basic GROUPING SETS

sql
-- Specific groupings
SELECT 
    category,
    subcategory,
    brand,
    COUNT(*)
FROM products
GROUP BY GROUPING SETS (
    (category),
    (subcategory),
    (brand),
    ()  -- Grand total
);

Equivalent ROLLUP and CUBE

sql
-- ROLLUP equivalent
GROUP BY a, b, c WITH ROLLUP
-- Equivalent to:
GROUP BY GROUPING SETS (
    (a, b, c),
    (a, b),
    (a),
    ()
)

-- CUBE equivalent
GROUP BY a, b, c WITH CUBE
-- Equivalent to:
GROUP BY GROUPING SETS (
    (a, b, c),
    (a, b),
    (a, c),
    (b, c),
    (a),
    (b),
    (c),
    ()
)

DISTINCT with GROUP BY

sql
-- Count distinct values per group
SELECT 
    status,
    COUNT(DISTINCT user_id) AS unique_users,
    COUNT(*) AS total_orders
FROM orders
GROUP BY status;

-- Complex distinct count
SELECT 
    DATE(created_at) AS date,
    COUNT(DISTINCT user_id) AS active_users,
    COUNT(DISTINCT product_id) AS unique_products
FROM orders
GROUP BY DATE(created_at);

Practical Examples

Sales Report

sql
-- Daily sales summary
SELECT 
    DATE(created_at) AS sale_date,
    COUNT(*) AS orders,
    SUM(total) AS revenue,
    AVG(total) AS avg_order
FROM orders
WHERE created_at >= '2024-01-01'
GROUP BY DATE(created_at)
ORDER BY sale_date DESC;

User Statistics

sql
-- Users by activity level
SELECT 
    CASE 
        WHEN login_count = 0 THEN 'Never'
        WHEN login_count < 10 THEN 'Rare'
        WHEN login_count < 50 THEN 'Regular'
        ELSE 'Frequent'
    END AS activity_level,
    COUNT(*) AS user_count
FROM users
GROUP BY activity_level;

Product Analytics

sql
-- Products by category with stats
SELECT 
    c.name AS category,
    COUNT(p.id) AS products,
    MIN(p.price) AS min_price,
    MAX(p.price) AS max_price,
    ROUND(AVG(p.price), 2) AS avg_price,
    SUM(p.stock) AS total_stock
FROM categories c
LEFT JOIN products p ON c.id = p.category_id
GROUP BY c.id, c.name
HAVING COUNT(p.id) > 0
ORDER BY products DESC;

Performance Considerations

Index Usage

sql
-- Index on GROUP BY columns helps
CREATE INDEX idx_orders_user_id ON orders(user_id);
SELECT user_id, COUNT(*) FROM orders GROUP BY user_id;

-- Index for WHERE + GROUP BY
CREATE INDEX idx_products_category_price ON products(category, price);
SELECT category, AVG(price) FROM products WHERE price > 10 GROUP BY category;

Optimization Tips

sql
-- Filter before grouping
SELECT category, COUNT(*)
FROM products
WHERE is_active = 1        -- Filter first
GROUP BY category;

-- Use appropriate data types
-- Numeric grouping is faster than string grouping

-- Avoid SELECT * with GROUP BY
-- Select only necessary columns

Troubleshooting

Common Issues

sql
-- Column not in GROUP BY or aggregate
-- Error: Expression #X of SELECT list is not in GROUP BY
-- Solution: Add column to GROUP BY or use aggregate

-- Unexpected results
-- Check: GROUP BY columns, aggregate functions
-- Verify: Filter conditions in WHERE vs HAVING

Debugging

sql
-- Check group formation
SELECT category, COUNT(*) FROM products GROUP BY category;

-- Verify aggregate values
SELECT 
    category,
    COUNT(*) AS cnt,
    SUM(price) AS total,
    AVG(price) AS avg
FROM products
GROUP BY category;

Summary

GROUP BY clause provides:

  • Grouping: Group rows by column values
  • Aggregates: COUNT, SUM, AVG, MIN, MAX
  • HAVING: Filter groups after grouping
  • Advanced Grouping: ROLLUP, CUBE, GROUPING SETS
  • Performance: Use indexes on grouping columns
  • Use Cases: Reports, analytics, summaries

Previous: ORDER BY

Next: JOINs

Content is for learning and research only.