PostgreSQL GROUP BY
Overview
The GROUP BY clause groups rows that have the same values in specified columns into summary rows. It is typically used with aggregate functions (COUNT, SUM, AVG, MAX, MIN) to perform calculations on each group of rows.
Basic Syntax
sql
SELECT column1, aggregate_function(column2)
FROM table_name
GROUP BY column1;Basic GROUP BY
Single Column Grouping
sql
-- Count users by country
SELECT country, COUNT(*) AS user_count
FROM users
GROUP BY country;
-- Sum orders by user
SELECT user_id, SUM(total) AS total_spent
FROM orders
GROUP BY user_id;
-- Average price by category
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category;Multiple Column Grouping
sql
-- Group by country and city
SELECT country, city, COUNT(*) AS user_count
FROM users
GROUP BY country, city;
-- Group by multiple columns
SELECT
category,
brand,
COUNT(*) AS product_count,
AVG(price) AS avg_price
FROM products
GROUP BY category, brand;
-- Group by status and date
SELECT
status,
DATE(created_at) AS order_date,
COUNT(*) AS order_count
FROM orders
GROUP BY status, DATE(created_at);Aggregate Functions with GROUP BY
COUNT
sql
-- Count products per category
SELECT category, COUNT(*) AS product_count
FROM products
GROUP BY category;
-- Count distinct values
SELECT category, COUNT(DISTINCT brand) AS brand_count
FROM products
GROUP BY category;
-- Count non-NULL values
SELECT category, COUNT(description) AS products_with_description
FROM products
GROUP BY category;SUM
sql
-- Total sales by product
SELECT product_id, SUM(quantity) AS total_sold
FROM order_items
GROUP BY product_id;
-- Total revenue by category
SELECT
category,
SUM(price * quantity) AS total_revenue
FROM products p
JOIN order_items oi ON p.id = oi.product_id
GROUP BY category;AVG
sql
-- Average price by category
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category;
-- Average order value by user
SELECT user_id, AVG(total) AS avg_order_value
FROM orders
GROUP BY user_id;
-- Average with rounding
SELECT
category,
ROUND(AVG(price), 2) AS avg_price
FROM products
GROUP BY category;MIN and MAX
sql
-- Price range by category
SELECT
category,
MIN(price) AS min_price,
MAX(price) AS max_price,
MAX(price) - MIN(price) AS price_range
FROM products
GROUP BY category;
-- First and last order dates
SELECT
user_id,
MIN(order_date) AS first_order,
MAX(order_date) AS last_order
FROM orders
GROUP BY user_id;Multiple Aggregates
sql
-- Comprehensive statistics
SELECT
category,
COUNT(*) AS product_count,
SUM(stock) AS total_stock,
AVG(price) AS avg_price,
MIN(price) AS min_price,
MAX(price) AS max_price,
STDDEV(price) AS price_stddev
FROM products
GROUP BY category;GROUP BY with WHERE
WHERE filters rows before grouping.
sql
-- Group only active users
SELECT country, COUNT(*) AS active_users
FROM users
WHERE status = 'active'
GROUP BY country;
-- Group orders from 2024
SELECT
user_id,
COUNT(*) AS order_count,
SUM(total) AS total_spent
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY user_id;
-- Filter and group
SELECT
category,
AVG(price) AS avg_price
FROM products
WHERE stock > 0 AND price > 10
GROUP BY category;GROUP BY with HAVING
HAVING filters groups after aggregation.
sql
-- Categories with more than 10 products
SELECT category, COUNT(*) AS product_count
FROM products
GROUP BY category
HAVING COUNT(*) > 10;
-- Users who spent more than $1000
SELECT user_id, SUM(total) AS total_spent
FROM orders
GROUP BY user_id
HAVING SUM(total) > 1000;
-- Categories with average price > $50
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 50;
-- Multiple HAVING conditions
SELECT
category,
COUNT(*) AS product_count,
AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING COUNT(*) > 5 AND AVG(price) < 100;GROUP BY with ORDER BY
sql
-- Group and sort by count
SELECT category, COUNT(*) AS product_count
FROM products
GROUP BY category
ORDER BY product_count DESC;
-- Group and sort by aggregate
SELECT
user_id,
SUM(total) AS total_spent
FROM orders
GROUP BY user_id
ORDER BY total_spent DESC
LIMIT 10;
-- Sort by multiple aggregates
SELECT
category,
COUNT(*) AS product_count,
AVG(price) AS avg_price
FROM products
GROUP BY category
ORDER BY product_count DESC, avg_price DESC;GROUP BY with JOIN
sql
-- Group joined tables
SELECT
u.name,
COUNT(o.id) AS order_count,
SUM(o.total) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;
-- Multiple joins with grouping
SELECT
c.category_name,
COUNT(DISTINCT p.id) AS product_count,
COUNT(DISTINCT oi.order_id) AS order_count,
SUM(oi.quantity) AS total_sold
FROM categories c
LEFT JOIN products p ON c.id = p.category_id
LEFT JOIN order_items oi ON p.id = oi.product_id
GROUP BY c.id, c.category_name;GROUP BY with Expressions
sql
-- Group by calculated value
SELECT
EXTRACT(YEAR FROM order_date) AS year,
COUNT(*) AS order_count
FROM orders
GROUP BY EXTRACT(YEAR FROM order_date);
-- Group by date truncation
SELECT
DATE_TRUNC('month', order_date) AS month,
SUM(total) AS monthly_revenue
FROM orders
GROUP BY DATE_TRUNC('month', order_date)
ORDER BY month;
-- Group by CASE expression
SELECT
CASE
WHEN price < 10 THEN 'Budget'
WHEN price < 50 THEN 'Mid-range'
ELSE 'Premium'
END AS price_category,
COUNT(*) AS product_count
FROM products
GROUP BY
CASE
WHEN price < 10 THEN 'Budget'
WHEN price < 50 THEN 'Mid-range'
ELSE 'Premium'
END;GROUP BY with Date/Time
sql
-- Group by year
SELECT
EXTRACT(YEAR FROM order_date) AS year,
COUNT(*) AS order_count,
SUM(total) AS total_revenue
FROM orders
GROUP BY EXTRACT(YEAR FROM order_date);
-- Group by month
SELECT
TO_CHAR(order_date, 'YYYY-MM') AS month,
COUNT(*) AS order_count
FROM orders
GROUP BY TO_CHAR(order_date, 'YYYY-MM')
ORDER BY month;
-- Group by day of week
SELECT
TO_CHAR(order_date, 'Day') AS day_of_week,
COUNT(*) AS order_count
FROM orders
GROUP BY TO_CHAR(order_date, 'Day'), EXTRACT(DOW FROM order_date)
ORDER BY EXTRACT(DOW FROM order_date);
-- Group by hour
SELECT
EXTRACT(HOUR FROM created_at) AS hour,
COUNT(*) AS event_count
FROM events
GROUP BY EXTRACT(HOUR FROM created_at)
ORDER BY hour;GROUPING SETS
GROUPING SETS allows you to specify multiple grouping sets in a single query.
sql
-- Multiple grouping levels
SELECT
category,
brand,
COUNT(*) AS product_count
FROM products
GROUP BY GROUPING SETS (
(category, brand),
(category),
()
);
-- Equivalent to UNION of multiple GROUP BY queries
SELECT category, brand, COUNT(*) FROM products GROUP BY category, brand
UNION ALL
SELECT category, NULL, COUNT(*) FROM products GROUP BY category
UNION ALL
SELECT NULL, NULL, COUNT(*) FROM products;ROLLUP
ROLLUP creates subtotals and grand totals.
sql
-- Hierarchical totals
SELECT
category,
brand,
COUNT(*) AS product_count,
SUM(price) AS total_value
FROM products
GROUP BY ROLLUP (category, brand);
-- Year and month rollup
SELECT
EXTRACT(YEAR FROM order_date) AS year,
EXTRACT(MONTH FROM order_date) AS month,
SUM(total) AS revenue
FROM orders
GROUP BY ROLLUP (
EXTRACT(YEAR FROM order_date),
EXTRACT(MONTH FROM order_date)
);CUBE
CUBE creates all possible combinations of groupings.
sql
-- All combinations
SELECT
category,
brand,
COUNT(*) AS product_count
FROM products
GROUP BY CUBE (category, brand);
-- Results include:
-- (category, brand)
-- (category, NULL)
-- (NULL, brand)
-- (NULL, NULL)GROUPING Function
The GROUPING function identifies which columns are aggregated.
sql
-- Identify aggregated rows
SELECT
category,
brand,
COUNT(*) AS product_count,
GROUPING(category) AS is_category_total,
GROUPING(brand) AS is_brand_total
FROM products
GROUP BY ROLLUP (category, brand);
-- Use GROUPING for labels
SELECT
CASE WHEN GROUPING(category) = 1 THEN 'All Categories' ELSE category END AS category,
CASE WHEN GROUPING(brand) = 1 THEN 'All Brands' ELSE brand END AS brand,
COUNT(*) AS product_count
FROM products
GROUP BY ROLLUP (category, brand);GROUP BY with Subqueries
sql
-- Group by subquery result
SELECT
price_range,
COUNT(*) AS product_count
FROM (
SELECT
CASE
WHEN price < 10 THEN 'Budget'
WHEN price < 50 THEN 'Mid-range'
ELSE 'Premium'
END AS price_range
FROM products
) AS categorized
GROUP BY price_range;
-- Aggregate of aggregates
SELECT
AVG(order_count) AS avg_orders_per_user
FROM (
SELECT user_id, COUNT(*) AS order_count
FROM orders
GROUP BY user_id
) AS user_orders;Performance Considerations
sql
-- Create indexes on GROUP BY columns
CREATE INDEX idx_products_category ON products(category);
CREATE INDEX idx_orders_user_id ON orders(user_id);
-- Use EXPLAIN to analyze
EXPLAIN ANALYZE
SELECT category, COUNT(*)
FROM products
GROUP BY category;
-- Avoid grouping on expressions when possible
-- Bad: GROUP BY UPPER(category)
-- Good: Normalize data, then GROUP BY categoryCommon Patterns
Top N per Group
sql
-- Top 3 products per category by price
WITH ranked AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) AS rank
FROM products
)
SELECT * FROM ranked WHERE rank <= 3;Running Totals
sql
-- Running total by date
SELECT
order_date,
SUM(total) AS daily_total,
SUM(SUM(total)) OVER (ORDER BY order_date) AS running_total
FROM orders
GROUP BY order_date
ORDER BY order_date;Percentage of Total
sql
-- Category percentage
SELECT
category,
COUNT(*) AS product_count,
ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 2) AS percentage
FROM products
GROUP BY category;Best Practices
Include all non-aggregated columns in GROUP BY
sql-- Good SELECT category, brand, COUNT(*) FROM products GROUP BY category, brand; -- Error: brand not in GROUP BY SELECT category, brand, COUNT(*) FROM products GROUP BY category;Use HAVING for aggregate conditions
sql-- Good: HAVING for aggregates SELECT category, COUNT(*) FROM products GROUP BY category HAVING COUNT(*) > 10; -- Good: WHERE for row filtering SELECT category, COUNT(*) FROM products WHERE price > 10 GROUP BY category;Order of execution
FROM → WHERE → GROUP BY → HAVING → SELECT → ORDER BY → LIMITUse meaningful aliases
sqlSELECT category, COUNT(*) AS product_count, AVG(price) AS average_price FROM products GROUP BY category;
Summary
GROUP BY clause key points:
- Groups rows with same values
- Used with aggregates: COUNT, SUM, AVG, MIN, MAX
- WHERE: Filters before grouping
- HAVING: Filters after grouping
- GROUPING SETS: Multiple groupings
- ROLLUP: Hierarchical totals
- CUBE: All combinations
- Performance: Index GROUP BY columns