PostgreSQL WITH Clause (Common Table Expressions)
What is WITH?
The WITH clause, also known as Common Table Expressions (CTEs), allows you to define temporary named result sets that can be referenced within a SELECT, INSERT, UPDATE, or DELETE statement. CTEs make complex queries more readable and maintainable.
Basic Syntax
sql
WITH cte_name AS (
SELECT column1, column2, ...
FROM table_name
WHERE condition
)
SELECT * FROM cte_name;Simple CTE Examples
Example 1: Basic CTE
sql
-- Define a CTE for high-value customers
WITH high_value_customers AS (
SELECT customer_id, customer_name, total_spent
FROM customers
WHERE total_spent > 10000
)
SELECT * FROM high_value_customers
ORDER BY total_spent DESC;Example 2: CTE with Aggregation
sql
-- Calculate monthly sales
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', order_date) as month,
SUM(total_amount) as total_sales
FROM orders
GROUP BY DATE_TRUNC('month', order_date)
)
SELECT * FROM monthly_sales
ORDER BY month DESC;Multiple CTEs
You can define multiple CTEs in a single query:
sql
WITH
customer_orders AS (
SELECT customer_id, COUNT(*) as order_count
FROM orders
GROUP BY customer_id
),
customer_revenue AS (
SELECT customer_id, SUM(total_amount) as total_revenue
FROM orders
GROUP BY customer_id
)
SELECT
c.customer_name,
co.order_count,
cr.total_revenue
FROM customers c
JOIN customer_orders co ON c.customer_id = co.customer_id
JOIN customer_revenue cr ON c.customer_id = cr.customer_id
WHERE co.order_count > 5;Recursive CTEs
Recursive CTEs allow you to query hierarchical or tree-structured data:
Basic Recursive CTE Syntax
sql
WITH RECURSIVE cte_name AS (
-- Base case (anchor member)
SELECT ...
UNION ALL
-- Recursive case (recursive member)
SELECT ...
FROM cte_name
WHERE condition
)
SELECT * FROM cte_name;Example 1: Number Sequence
sql
-- Generate numbers from 1 to 10
WITH RECURSIVE numbers AS (
SELECT 1 as n
UNION ALL
SELECT n + 1
FROM numbers
WHERE n < 10
)
SELECT * FROM numbers;Example 2: Employee Hierarchy
sql
-- Get all employees under a manager
WITH RECURSIVE employee_hierarchy AS (
-- Base case: Start with the CEO
SELECT employee_id, employee_name, manager_id, 1 as level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case: Get subordinates
SELECT e.employee_id, e.employee_name, e.manager_id, eh.level + 1
FROM employees e
JOIN employee_hierarchy eh ON e.manager_id = eh.employee_id
)
SELECT * FROM employee_hierarchy
ORDER BY level, employee_name;Example 3: Category Tree
sql
-- Get all subcategories
WITH RECURSIVE category_tree AS (
-- Base case: Root categories
SELECT category_id, category_name, parent_id, 0 as depth
FROM categories
WHERE parent_id IS NULL
UNION ALL
-- Recursive case: Child categories
SELECT c.category_id, c.category_name, c.parent_id, ct.depth + 1
FROM categories c
JOIN category_tree ct ON c.parent_id = ct.category_id
)
SELECT
REPEAT(' ', depth) || category_name as category_hierarchy,
depth
FROM category_tree
ORDER BY category_id;CTE with INSERT, UPDATE, DELETE
CTE with INSERT
sql
-- Insert top customers into a VIP table
WITH top_customers AS (
SELECT customer_id, SUM(total_amount) as total_spent
FROM orders
GROUP BY customer_id
HAVING SUM(total_amount) > 50000
)
INSERT INTO vip_customers (customer_id, tier)
SELECT customer_id, 'Gold'
FROM top_customers;CTE with UPDATE
sql
-- Update product prices based on sales performance
WITH product_sales AS (
SELECT product_id, SUM(quantity) as total_sold
FROM order_items
WHERE order_date >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY product_id
)
UPDATE products p
SET price = price * 1.1
FROM product_sales ps
WHERE p.product_id = ps.product_id
AND ps.total_sold > 100;CTE with DELETE
sql
-- Delete inactive customers
WITH inactive_customers AS (
SELECT customer_id
FROM customers c
WHERE NOT EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.customer_id
AND o.order_date >= CURRENT_DATE - INTERVAL '2 years'
)
)
DELETE FROM customers
WHERE customer_id IN (SELECT customer_id FROM inactive_customers);Practical Examples
Example 1: Sales Analysis
sql
-- Compare current month vs previous month sales
WITH current_month AS (
SELECT SUM(total_amount) as sales
FROM orders
WHERE DATE_TRUNC('month', order_date) = DATE_TRUNC('month', CURRENT_DATE)
),
previous_month AS (
SELECT SUM(total_amount) as sales
FROM orders
WHERE DATE_TRUNC('month', order_date) = DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month')
)
SELECT
cm.sales as current_sales,
pm.sales as previous_sales,
ROUND(((cm.sales - pm.sales) / pm.sales * 100), 2) as growth_percentage
FROM current_month cm, previous_month pm;Example 2: Customer Segmentation
sql
-- Segment customers by purchase behavior
WITH customer_stats AS (
SELECT
customer_id,
COUNT(*) as order_count,
SUM(total_amount) as total_spent,
AVG(total_amount) as avg_order_value,
MAX(order_date) as last_order_date
FROM orders
GROUP BY customer_id
)
SELECT
customer_id,
CASE
WHEN total_spent > 10000 AND order_count > 20 THEN 'VIP'
WHEN total_spent > 5000 OR order_count > 10 THEN 'Regular'
WHEN last_order_date < CURRENT_DATE - INTERVAL '6 months' THEN 'Inactive'
ELSE 'New'
END as customer_segment,
order_count,
total_spent,
avg_order_value
FROM customer_stats;Example 3: Running Totals
sql
-- Calculate running total of daily sales
WITH daily_sales AS (
SELECT
DATE(order_date) as sale_date,
SUM(total_amount) as daily_total
FROM orders
GROUP BY DATE(order_date)
)
SELECT
sale_date,
daily_total,
SUM(daily_total) OVER (ORDER BY sale_date) as running_total
FROM daily_sales
ORDER BY sale_date;Performance Considerations
1. Materialized CTEs
By default, CTEs are optimization fences. Use MATERIALIZED or NOT MATERIALIZED:
sql
-- Force materialization (PostgreSQL 12+)
WITH MATERIALIZED high_value_orders AS (
SELECT * FROM orders WHERE total_amount > 1000
)
SELECT * FROM high_value_orders;
-- Prevent materialization (inline the CTE)
WITH NOT MATERIALIZED recent_orders AS (
SELECT * FROM orders WHERE order_date >= CURRENT_DATE - INTERVAL '7 days'
)
SELECT * FROM recent_orders;2. When to Use CTEs
CTEs are best for:
- Improving query readability
- Recursive queries
- Multiple references to the same subquery
- Breaking down complex logic
3. Performance Tips
sql
-- Good: CTE referenced multiple times
WITH customer_orders AS (
SELECT customer_id, COUNT(*) as order_count
FROM orders
GROUP BY customer_id
)
SELECT
(SELECT AVG(order_count) FROM customer_orders) as avg_orders,
(SELECT MAX(order_count) FROM customer_orders) as max_orders;
-- Consider: For single use, subquery might be better
SELECT customer_id, order_count
FROM (
SELECT customer_id, COUNT(*) as order_count
FROM orders
GROUP BY customer_id
) subquery;Advanced Patterns
Pattern 1: Data Deduplication
sql
-- Remove duplicates keeping the latest record
WITH ranked_records AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at DESC) as rn
FROM user_signups
)
DELETE FROM user_signups
WHERE id IN (
SELECT id FROM ranked_records WHERE rn > 1
);Pattern 2: Gap Analysis
sql
-- Find missing invoice numbers
WITH RECURSIVE invoice_range AS (
SELECT MIN(invoice_number) as num FROM invoices
UNION ALL
SELECT num + 1
FROM invoice_range
WHERE num < (SELECT MAX(invoice_number) FROM invoices)
)
SELECT ir.num as missing_invoice
FROM invoice_range ir
LEFT JOIN invoices i ON ir.num = i.invoice_number
WHERE i.invoice_number IS NULL;Pattern 3: Pivot Data
sql
-- Pivot monthly sales by product category
WITH monthly_category_sales AS (
SELECT
DATE_TRUNC('month', o.order_date) as month,
p.category,
SUM(oi.quantity * oi.price) as sales
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.product_id
GROUP BY DATE_TRUNC('month', o.order_date), p.category
)
SELECT
month,
SUM(CASE WHEN category = 'Electronics' THEN sales ELSE 0 END) as electronics,
SUM(CASE WHEN category = 'Clothing' THEN sales ELSE 0 END) as clothing,
SUM(CASE WHEN category = 'Books' THEN sales ELSE 0 END) as books
FROM monthly_category_sales
GROUP BY month
ORDER BY month;Common Mistakes and Solutions
Mistake 1: Infinite Recursion
sql
-- Wrong: No termination condition
WITH RECURSIVE infinite AS (
SELECT 1 as n
UNION ALL
SELECT n + 1 FROM infinite -- Never stops!
)
SELECT * FROM infinite;
-- Correct: Add termination condition
WITH RECURSIVE finite AS (
SELECT 1 as n
UNION ALL
SELECT n + 1 FROM finite WHERE n < 100
)
SELECT * FROM finite;Mistake 2: Referencing CTE Before Definition
sql
-- Wrong: cte2 references cte1 before it's defined
WITH
cte2 AS (SELECT * FROM cte1),
cte1 AS (SELECT * FROM table1)
SELECT * FROM cte2;
-- Correct: Define in order
WITH
cte1 AS (SELECT * FROM table1),
cte2 AS (SELECT * FROM cte1)
SELECT * FROM cte2;Mistake 3: Unnecessary Complexity
sql
-- Overly complex
WITH cte1 AS (SELECT * FROM orders),
cte2 AS (SELECT * FROM cte1 WHERE status = 'completed')
SELECT * FROM cte2;
-- Simpler
SELECT * FROM orders WHERE status = 'completed';Best Practices
- Use Descriptive Names: Name CTEs clearly to indicate their purpose
- Break Down Complex Queries: Use multiple CTEs for readability
- Consider Performance: Use MATERIALIZED/NOT MATERIALIZED when appropriate
- Limit Recursion Depth: Always include termination conditions
- Document Complex Logic: Add comments for complex recursive CTEs
- Test Recursive CTEs: Verify termination and correctness
- Use for Readability: CTEs excel at making queries more maintainable
Summary
WITH clauses (CTEs) are powerful tools for query organization:
- Define temporary named result sets
- Improve query readability and maintainability
- Support recursive queries for hierarchical data
- Can be used with SELECT, INSERT, UPDATE, DELETE
- Multiple CTEs can be defined in a single query
- MATERIALIZED/NOT MATERIALIZED controls optimization
- Essential for complex data transformations and analysis
Mastering CTEs is crucial for writing clean, maintainable PostgreSQL queries.