Skip to content

PostgreSQL WITH Clause (Common Table Expressions)

What is WITH?

The WITH clause, also known as Common Table Expressions (CTEs), allows you to define temporary named result sets that can be referenced within a SELECT, INSERT, UPDATE, or DELETE statement. CTEs make complex queries more readable and maintainable.

Basic Syntax

sql
WITH cte_name AS (
    SELECT column1, column2, ...
    FROM table_name
    WHERE condition
)
SELECT * FROM cte_name;

Simple CTE Examples

Example 1: Basic CTE

sql
-- Define a CTE for high-value customers
WITH high_value_customers AS (
    SELECT customer_id, customer_name, total_spent
    FROM customers
    WHERE total_spent > 10000
)
SELECT * FROM high_value_customers
ORDER BY total_spent DESC;

Example 2: CTE with Aggregation

sql
-- Calculate monthly sales
WITH monthly_sales AS (
    SELECT 
        DATE_TRUNC('month', order_date) as month,
        SUM(total_amount) as total_sales
    FROM orders
    GROUP BY DATE_TRUNC('month', order_date)
)
SELECT * FROM monthly_sales
ORDER BY month DESC;

Multiple CTEs

You can define multiple CTEs in a single query:

sql
WITH 
customer_orders AS (
    SELECT customer_id, COUNT(*) as order_count
    FROM orders
    GROUP BY customer_id
),
customer_revenue AS (
    SELECT customer_id, SUM(total_amount) as total_revenue
    FROM orders
    GROUP BY customer_id
)
SELECT 
    c.customer_name,
    co.order_count,
    cr.total_revenue
FROM customers c
JOIN customer_orders co ON c.customer_id = co.customer_id
JOIN customer_revenue cr ON c.customer_id = cr.customer_id
WHERE co.order_count > 5;

Recursive CTEs

Recursive CTEs allow you to query hierarchical or tree-structured data:

Basic Recursive CTE Syntax

sql
WITH RECURSIVE cte_name AS (
    -- Base case (anchor member)
    SELECT ...
    UNION ALL
    -- Recursive case (recursive member)
    SELECT ...
    FROM cte_name
    WHERE condition
)
SELECT * FROM cte_name;

Example 1: Number Sequence

sql
-- Generate numbers from 1 to 10
WITH RECURSIVE numbers AS (
    SELECT 1 as n
    UNION ALL
    SELECT n + 1
    FROM numbers
    WHERE n < 10
)
SELECT * FROM numbers;

Example 2: Employee Hierarchy

sql
-- Get all employees under a manager
WITH RECURSIVE employee_hierarchy AS (
    -- Base case: Start with the CEO
    SELECT employee_id, employee_name, manager_id, 1 as level
    FROM employees
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- Recursive case: Get subordinates
    SELECT e.employee_id, e.employee_name, e.manager_id, eh.level + 1
    FROM employees e
    JOIN employee_hierarchy eh ON e.manager_id = eh.employee_id
)
SELECT * FROM employee_hierarchy
ORDER BY level, employee_name;

Example 3: Category Tree

sql
-- Get all subcategories
WITH RECURSIVE category_tree AS (
    -- Base case: Root categories
    SELECT category_id, category_name, parent_id, 0 as depth
    FROM categories
    WHERE parent_id IS NULL
    
    UNION ALL
    
    -- Recursive case: Child categories
    SELECT c.category_id, c.category_name, c.parent_id, ct.depth + 1
    FROM categories c
    JOIN category_tree ct ON c.parent_id = ct.category_id
)
SELECT 
    REPEAT('  ', depth) || category_name as category_hierarchy,
    depth
FROM category_tree
ORDER BY category_id;

CTE with INSERT, UPDATE, DELETE

CTE with INSERT

sql
-- Insert top customers into a VIP table
WITH top_customers AS (
    SELECT customer_id, SUM(total_amount) as total_spent
    FROM orders
    GROUP BY customer_id
    HAVING SUM(total_amount) > 50000
)
INSERT INTO vip_customers (customer_id, tier)
SELECT customer_id, 'Gold'
FROM top_customers;

CTE with UPDATE

sql
-- Update product prices based on sales performance
WITH product_sales AS (
    SELECT product_id, SUM(quantity) as total_sold
    FROM order_items
    WHERE order_date >= CURRENT_DATE - INTERVAL '30 days'
    GROUP BY product_id
)
UPDATE products p
SET price = price * 1.1
FROM product_sales ps
WHERE p.product_id = ps.product_id
  AND ps.total_sold > 100;

CTE with DELETE

sql
-- Delete inactive customers
WITH inactive_customers AS (
    SELECT customer_id
    FROM customers c
    WHERE NOT EXISTS (
        SELECT 1 FROM orders o
        WHERE o.customer_id = c.customer_id
          AND o.order_date >= CURRENT_DATE - INTERVAL '2 years'
    )
)
DELETE FROM customers
WHERE customer_id IN (SELECT customer_id FROM inactive_customers);

Practical Examples

Example 1: Sales Analysis

sql
-- Compare current month vs previous month sales
WITH current_month AS (
    SELECT SUM(total_amount) as sales
    FROM orders
    WHERE DATE_TRUNC('month', order_date) = DATE_TRUNC('month', CURRENT_DATE)
),
previous_month AS (
    SELECT SUM(total_amount) as sales
    FROM orders
    WHERE DATE_TRUNC('month', order_date) = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month')
)
SELECT 
    cm.sales as current_sales,
    pm.sales as previous_sales,
    ROUND(((cm.sales - pm.sales) / pm.sales * 100), 2) as growth_percentage
FROM current_month cm, previous_month pm;

Example 2: Customer Segmentation

sql
-- Segment customers by purchase behavior
WITH customer_stats AS (
    SELECT 
        customer_id,
        COUNT(*) as order_count,
        SUM(total_amount) as total_spent,
        AVG(total_amount) as avg_order_value,
        MAX(order_date) as last_order_date
    FROM orders
    GROUP BY customer_id
)
SELECT 
    customer_id,
    CASE 
        WHEN total_spent > 10000 AND order_count > 20 THEN 'VIP'
        WHEN total_spent > 5000 OR order_count > 10 THEN 'Regular'
        WHEN last_order_date < CURRENT_DATE - INTERVAL '6 months' THEN 'Inactive'
        ELSE 'New'
    END as customer_segment,
    order_count,
    total_spent,
    avg_order_value
FROM customer_stats;

Example 3: Running Totals

sql
-- Calculate running total of daily sales
WITH daily_sales AS (
    SELECT 
        DATE(order_date) as sale_date,
        SUM(total_amount) as daily_total
    FROM orders
    GROUP BY DATE(order_date)
)
SELECT 
    sale_date,
    daily_total,
    SUM(daily_total) OVER (ORDER BY sale_date) as running_total
FROM daily_sales
ORDER BY sale_date;

Performance Considerations

1. Materialized CTEs

By default, CTEs are optimization fences. Use MATERIALIZED or NOT MATERIALIZED:

sql
-- Force materialization (PostgreSQL 12+)
WITH MATERIALIZED high_value_orders AS (
    SELECT * FROM orders WHERE total_amount > 1000
)
SELECT * FROM high_value_orders;

-- Prevent materialization (inline the CTE)
WITH NOT MATERIALIZED recent_orders AS (
    SELECT * FROM orders WHERE order_date >= CURRENT_DATE - INTERVAL '7 days'
)
SELECT * FROM recent_orders;

2. When to Use CTEs

CTEs are best for:

  • Improving query readability
  • Recursive queries
  • Multiple references to the same subquery
  • Breaking down complex logic

3. Performance Tips

sql
-- Good: CTE referenced multiple times
WITH customer_orders AS (
    SELECT customer_id, COUNT(*) as order_count
    FROM orders
    GROUP BY customer_id
)
SELECT 
    (SELECT AVG(order_count) FROM customer_orders) as avg_orders,
    (SELECT MAX(order_count) FROM customer_orders) as max_orders;

-- Consider: For single use, subquery might be better
SELECT customer_id, order_count
FROM (
    SELECT customer_id, COUNT(*) as order_count
    FROM orders
    GROUP BY customer_id
) subquery;

Advanced Patterns

Pattern 1: Data Deduplication

sql
-- Remove duplicates keeping the latest record
WITH ranked_records AS (
    SELECT *,
           ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at DESC) as rn
    FROM user_signups
)
DELETE FROM user_signups
WHERE id IN (
    SELECT id FROM ranked_records WHERE rn > 1
);

Pattern 2: Gap Analysis

sql
-- Find missing invoice numbers
WITH RECURSIVE invoice_range AS (
    SELECT MIN(invoice_number) as num FROM invoices
    UNION ALL
    SELECT num + 1
    FROM invoice_range
    WHERE num < (SELECT MAX(invoice_number) FROM invoices)
)
SELECT ir.num as missing_invoice
FROM invoice_range ir
LEFT JOIN invoices i ON ir.num = i.invoice_number
WHERE i.invoice_number IS NULL;

Pattern 3: Pivot Data

sql
-- Pivot monthly sales by product category
WITH monthly_category_sales AS (
    SELECT 
        DATE_TRUNC('month', o.order_date) as month,
        p.category,
        SUM(oi.quantity * oi.price) as sales
    FROM orders o
    JOIN order_items oi ON o.order_id = oi.order_id
    JOIN products p ON oi.product_id = p.product_id
    GROUP BY DATE_TRUNC('month', o.order_date), p.category
)
SELECT 
    month,
    SUM(CASE WHEN category = 'Electronics' THEN sales ELSE 0 END) as electronics,
    SUM(CASE WHEN category = 'Clothing' THEN sales ELSE 0 END) as clothing,
    SUM(CASE WHEN category = 'Books' THEN sales ELSE 0 END) as books
FROM monthly_category_sales
GROUP BY month
ORDER BY month;

Common Mistakes and Solutions

Mistake 1: Infinite Recursion

sql
-- Wrong: No termination condition
WITH RECURSIVE infinite AS (
    SELECT 1 as n
    UNION ALL
    SELECT n + 1 FROM infinite  -- Never stops!
)
SELECT * FROM infinite;

-- Correct: Add termination condition
WITH RECURSIVE finite AS (
    SELECT 1 as n
    UNION ALL
    SELECT n + 1 FROM finite WHERE n < 100
)
SELECT * FROM finite;

Mistake 2: Referencing CTE Before Definition

sql
-- Wrong: cte2 references cte1 before it's defined
WITH 
cte2 AS (SELECT * FROM cte1),
cte1 AS (SELECT * FROM table1)
SELECT * FROM cte2;

-- Correct: Define in order
WITH 
cte1 AS (SELECT * FROM table1),
cte2 AS (SELECT * FROM cte1)
SELECT * FROM cte2;

Mistake 3: Unnecessary Complexity

sql
-- Overly complex
WITH cte1 AS (SELECT * FROM orders),
     cte2 AS (SELECT * FROM cte1 WHERE status = 'completed')
SELECT * FROM cte2;

-- Simpler
SELECT * FROM orders WHERE status = 'completed';

Best Practices

  1. Use Descriptive Names: Name CTEs clearly to indicate their purpose
  2. Break Down Complex Queries: Use multiple CTEs for readability
  3. Consider Performance: Use MATERIALIZED/NOT MATERIALIZED when appropriate
  4. Limit Recursion Depth: Always include termination conditions
  5. Document Complex Logic: Add comments for complex recursive CTEs
  6. Test Recursive CTEs: Verify termination and correctness
  7. Use for Readability: CTEs excel at making queries more maintainable

Summary

WITH clauses (CTEs) are powerful tools for query organization:

  • Define temporary named result sets
  • Improve query readability and maintainability
  • Support recursive queries for hierarchical data
  • Can be used with SELECT, INSERT, UPDATE, DELETE
  • Multiple CTEs can be defined in a single query
  • MATERIALIZED/NOT MATERIALIZED controls optimization
  • Essential for complex data transformations and analysis

Mastering CTEs is crucial for writing clean, maintainable PostgreSQL queries.

Content is for learning and research only.