Skip to content

PostgreSQL GROUP BY

Overview

The GROUP BY clause groups rows that have the same values in specified columns into summary rows. It is typically used with aggregate functions (COUNT, SUM, AVG, MAX, MIN) to perform calculations on each group of rows.

Basic Syntax

sql
SELECT column1, aggregate_function(column2)
FROM table_name
GROUP BY column1;

Basic GROUP BY

Single Column Grouping

sql
-- Count users by country
SELECT country, COUNT(*) AS user_count
FROM users
GROUP BY country;

-- Sum orders by user
SELECT user_id, SUM(total) AS total_spent
FROM orders
GROUP BY user_id;

-- Average price by category
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category;

Multiple Column Grouping

sql
-- Group by country and city
SELECT country, city, COUNT(*) AS user_count
FROM users
GROUP BY country, city;

-- Group by multiple columns
SELECT 
    category,
    brand,
    COUNT(*) AS product_count,
    AVG(price) AS avg_price
FROM products
GROUP BY category, brand;

-- Group by status and date
SELECT 
    status,
    DATE(created_at) AS order_date,
    COUNT(*) AS order_count
FROM orders
GROUP BY status, DATE(created_at);

Aggregate Functions with GROUP BY

COUNT

sql
-- Count products per category
SELECT category, COUNT(*) AS product_count
FROM products
GROUP BY category;

-- Count distinct values
SELECT category, COUNT(DISTINCT brand) AS brand_count
FROM products
GROUP BY category;

-- Count non-NULL values
SELECT category, COUNT(description) AS products_with_description
FROM products
GROUP BY category;

SUM

sql
-- Total sales by product
SELECT product_id, SUM(quantity) AS total_sold
FROM order_items
GROUP BY product_id;

-- Total revenue by category
SELECT 
    category,
    SUM(price * quantity) AS total_revenue
FROM products p
JOIN order_items oi ON p.id = oi.product_id
GROUP BY category;

AVG

sql
-- Average price by category
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category;

-- Average order value by user
SELECT user_id, AVG(total) AS avg_order_value
FROM orders
GROUP BY user_id;

-- Average with rounding
SELECT 
    category,
    ROUND(AVG(price), 2) AS avg_price
FROM products
GROUP BY category;

MIN and MAX

sql
-- Price range by category
SELECT 
    category,
    MIN(price) AS min_price,
    MAX(price) AS max_price,
    MAX(price) - MIN(price) AS price_range
FROM products
GROUP BY category;

-- First and last order dates
SELECT 
    user_id,
    MIN(order_date) AS first_order,
    MAX(order_date) AS last_order
FROM orders
GROUP BY user_id;

Multiple Aggregates

sql
-- Comprehensive statistics
SELECT 
    category,
    COUNT(*) AS product_count,
    SUM(stock) AS total_stock,
    AVG(price) AS avg_price,
    MIN(price) AS min_price,
    MAX(price) AS max_price,
    STDDEV(price) AS price_stddev
FROM products
GROUP BY category;

GROUP BY with WHERE

WHERE filters rows before grouping.

sql
-- Group only active users
SELECT country, COUNT(*) AS active_users
FROM users
WHERE status = 'active'
GROUP BY country;

-- Group orders from 2024
SELECT 
    user_id,
    COUNT(*) AS order_count,
    SUM(total) AS total_spent
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY user_id;

-- Filter and group
SELECT 
    category,
    AVG(price) AS avg_price
FROM products
WHERE stock > 0 AND price > 10
GROUP BY category;

GROUP BY with HAVING

HAVING filters groups after aggregation.

sql
-- Categories with more than 10 products
SELECT category, COUNT(*) AS product_count
FROM products
GROUP BY category
HAVING COUNT(*) > 10;

-- Users who spent more than $1000
SELECT user_id, SUM(total) AS total_spent
FROM orders
GROUP BY user_id
HAVING SUM(total) > 1000;

-- Categories with average price > $50
SELECT category, AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 50;

-- Multiple HAVING conditions
SELECT 
    category,
    COUNT(*) AS product_count,
    AVG(price) AS avg_price
FROM products
GROUP BY category
HAVING COUNT(*) > 5 AND AVG(price) < 100;

GROUP BY with ORDER BY

sql
-- Group and sort by count
SELECT category, COUNT(*) AS product_count
FROM products
GROUP BY category
ORDER BY product_count DESC;

-- Group and sort by aggregate
SELECT 
    user_id,
    SUM(total) AS total_spent
FROM orders
GROUP BY user_id
ORDER BY total_spent DESC
LIMIT 10;

-- Sort by multiple aggregates
SELECT 
    category,
    COUNT(*) AS product_count,
    AVG(price) AS avg_price
FROM products
GROUP BY category
ORDER BY product_count DESC, avg_price DESC;

GROUP BY with JOIN

sql
-- Group joined tables
SELECT 
    u.name,
    COUNT(o.id) AS order_count,
    SUM(o.total) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;

-- Multiple joins with grouping
SELECT 
    c.category_name,
    COUNT(DISTINCT p.id) AS product_count,
    COUNT(DISTINCT oi.order_id) AS order_count,
    SUM(oi.quantity) AS total_sold
FROM categories c
LEFT JOIN products p ON c.id = p.category_id
LEFT JOIN order_items oi ON p.id = oi.product_id
GROUP BY c.id, c.category_name;

GROUP BY with Expressions

sql
-- Group by calculated value
SELECT 
    EXTRACT(YEAR FROM order_date) AS year,
    COUNT(*) AS order_count
FROM orders
GROUP BY EXTRACT(YEAR FROM order_date);

-- Group by date truncation
SELECT 
    DATE_TRUNC('month', order_date) AS month,
    SUM(total) AS monthly_revenue
FROM orders
GROUP BY DATE_TRUNC('month', order_date)
ORDER BY month;

-- Group by CASE expression
SELECT 
    CASE 
        WHEN price < 10 THEN 'Budget'
        WHEN price < 50 THEN 'Mid-range'
        ELSE 'Premium'
    END AS price_category,
    COUNT(*) AS product_count
FROM products
GROUP BY 
    CASE 
        WHEN price < 10 THEN 'Budget'
        WHEN price < 50 THEN 'Mid-range'
        ELSE 'Premium'
    END;

GROUP BY with Date/Time

sql
-- Group by year
SELECT 
    EXTRACT(YEAR FROM order_date) AS year,
    COUNT(*) AS order_count,
    SUM(total) AS total_revenue
FROM orders
GROUP BY EXTRACT(YEAR FROM order_date);

-- Group by month
SELECT 
    TO_CHAR(order_date, 'YYYY-MM') AS month,
    COUNT(*) AS order_count
FROM orders
GROUP BY TO_CHAR(order_date, 'YYYY-MM')
ORDER BY month;

-- Group by day of week
SELECT 
    TO_CHAR(order_date, 'Day') AS day_of_week,
    COUNT(*) AS order_count
FROM orders
GROUP BY TO_CHAR(order_date, 'Day'), EXTRACT(DOW FROM order_date)
ORDER BY EXTRACT(DOW FROM order_date);

-- Group by hour
SELECT 
    EXTRACT(HOUR FROM created_at) AS hour,
    COUNT(*) AS event_count
FROM events
GROUP BY EXTRACT(HOUR FROM created_at)
ORDER BY hour;

GROUPING SETS

GROUPING SETS allows you to specify multiple grouping sets in a single query.

sql
-- Multiple grouping levels
SELECT 
    category,
    brand,
    COUNT(*) AS product_count
FROM products
GROUP BY GROUPING SETS (
    (category, brand),
    (category),
    ()
);

-- Equivalent to UNION of multiple GROUP BY queries
SELECT category, brand, COUNT(*) FROM products GROUP BY category, brand
UNION ALL
SELECT category, NULL, COUNT(*) FROM products GROUP BY category
UNION ALL
SELECT NULL, NULL, COUNT(*) FROM products;

ROLLUP

ROLLUP creates subtotals and grand totals.

sql
-- Hierarchical totals
SELECT 
    category,
    brand,
    COUNT(*) AS product_count,
    SUM(price) AS total_value
FROM products
GROUP BY ROLLUP (category, brand);

-- Year and month rollup
SELECT 
    EXTRACT(YEAR FROM order_date) AS year,
    EXTRACT(MONTH FROM order_date) AS month,
    SUM(total) AS revenue
FROM orders
GROUP BY ROLLUP (
    EXTRACT(YEAR FROM order_date),
    EXTRACT(MONTH FROM order_date)
);

CUBE

CUBE creates all possible combinations of groupings.

sql
-- All combinations
SELECT 
    category,
    brand,
    COUNT(*) AS product_count
FROM products
GROUP BY CUBE (category, brand);

-- Results include:
-- (category, brand)
-- (category, NULL)
-- (NULL, brand)
-- (NULL, NULL)

GROUPING Function

The GROUPING function identifies which columns are aggregated.

sql
-- Identify aggregated rows
SELECT 
    category,
    brand,
    COUNT(*) AS product_count,
    GROUPING(category) AS is_category_total,
    GROUPING(brand) AS is_brand_total
FROM products
GROUP BY ROLLUP (category, brand);

-- Use GROUPING for labels
SELECT 
    CASE WHEN GROUPING(category) = 1 THEN 'All Categories' ELSE category END AS category,
    CASE WHEN GROUPING(brand) = 1 THEN 'All Brands' ELSE brand END AS brand,
    COUNT(*) AS product_count
FROM products
GROUP BY ROLLUP (category, brand);

GROUP BY with Subqueries

sql
-- Group by subquery result
SELECT 
    price_range,
    COUNT(*) AS product_count
FROM (
    SELECT 
        CASE 
            WHEN price < 10 THEN 'Budget'
            WHEN price < 50 THEN 'Mid-range'
            ELSE 'Premium'
        END AS price_range
    FROM products
) AS categorized
GROUP BY price_range;

-- Aggregate of aggregates
SELECT 
    AVG(order_count) AS avg_orders_per_user
FROM (
    SELECT user_id, COUNT(*) AS order_count
    FROM orders
    GROUP BY user_id
) AS user_orders;

Performance Considerations

sql
-- Create indexes on GROUP BY columns
CREATE INDEX idx_products_category ON products(category);
CREATE INDEX idx_orders_user_id ON orders(user_id);

-- Use EXPLAIN to analyze
EXPLAIN ANALYZE
SELECT category, COUNT(*)
FROM products
GROUP BY category;

-- Avoid grouping on expressions when possible
-- Bad: GROUP BY UPPER(category)
-- Good: Normalize data, then GROUP BY category

Common Patterns

Top N per Group

sql
-- Top 3 products per category by price
WITH ranked AS (
    SELECT 
        *,
        ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) AS rank
    FROM products
)
SELECT * FROM ranked WHERE rank <= 3;

Running Totals

sql
-- Running total by date
SELECT 
    order_date,
    SUM(total) AS daily_total,
    SUM(SUM(total)) OVER (ORDER BY order_date) AS running_total
FROM orders
GROUP BY order_date
ORDER BY order_date;

Percentage of Total

sql
-- Category percentage
SELECT 
    category,
    COUNT(*) AS product_count,
    ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 2) AS percentage
FROM products
GROUP BY category;

Best Practices

  1. Include all non-aggregated columns in GROUP BY

    sql
    -- Good
    SELECT category, brand, COUNT(*)
    FROM products
    GROUP BY category, brand;
    
    -- Error: brand not in GROUP BY
    SELECT category, brand, COUNT(*)
    FROM products
    GROUP BY category;
  2. Use HAVING for aggregate conditions

    sql
    -- Good: HAVING for aggregates
    SELECT category, COUNT(*)
    FROM products
    GROUP BY category
    HAVING COUNT(*) > 10;
    
    -- Good: WHERE for row filtering
    SELECT category, COUNT(*)
    FROM products
    WHERE price > 10
    GROUP BY category;
  3. Order of execution

    FROM → WHERE → GROUP BY → HAVING → SELECT → ORDER BY → LIMIT
  4. Use meaningful aliases

    sql
    SELECT 
        category,
        COUNT(*) AS product_count,
        AVG(price) AS average_price
    FROM products
    GROUP BY category;

Summary

GROUP BY clause key points:

  • Groups rows with same values
  • Used with aggregates: COUNT, SUM, AVG, MIN, MAX
  • WHERE: Filters before grouping
  • HAVING: Filters after grouping
  • GROUPING SETS: Multiple groupings
  • ROLLUP: Hierarchical totals
  • CUBE: All combinations
  • Performance: Index GROUP BY columns

Content is for learning and research only.