PostgreSQL JOIN
Overview
JOIN clauses combine rows from two or more tables based on related columns. JOINs are fundamental to relational databases and allow you to retrieve data from multiple tables in a single query.
Types of JOINs
INNER JOIN
Returns only the records that have matching values in both tables.
-- Basic INNER JOIN
SELECT
users.name,
orders.order_number,
orders.amount
FROM users
INNER JOIN orders ON users.id = orders.user_id;
-- Using table aliases
SELECT
u.name,
o.order_number,
o.amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- Multiple conditions
SELECT
u.name,
o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.user_id AND o.status = 'completed';
-- Join with WHERE
SELECT
u.name,
o.amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.amount > 100;LEFT JOIN (LEFT OUTER JOIN)
Returns all records from the left table and matched records from the right table. If there's no match, NULL values are returned for right table columns.
-- Basic LEFT JOIN
SELECT
users.name,
orders.order_number
FROM users
LEFT JOIN orders ON users.id = orders.user_id;
-- Find users without orders
SELECT
u.name
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.id IS NULL;
-- Count orders per user (including users with no orders)
SELECT
u.name,
COUNT(o.id) AS order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;RIGHT JOIN (RIGHT OUTER JOIN)
Returns all records from the right table and matched records from the left table. If there's no match, NULL values are returned for left table columns.
-- Basic RIGHT JOIN
SELECT
users.name,
orders.order_number
FROM users
RIGHT JOIN orders ON users.id = orders.user_id;
-- Find orders without users (orphaned orders)
SELECT
o.order_number
FROM users u
RIGHT JOIN orders o ON u.id = o.user_id
WHERE u.id IS NULL;FULL OUTER JOIN
Returns all records when there is a match in either the left or right table. Records without matches will have NULL values for the non-matching side.
-- Basic FULL OUTER JOIN
SELECT
users.name,
orders.order_number
FROM users
FULL OUTER JOIN orders ON users.id = orders.user_id;
-- Find unmatched records from both tables
SELECT
u.name,
o.order_number
FROM users u
FULL OUTER JOIN orders o ON u.id = o.user_id
WHERE u.id IS NULL OR o.id IS NULL;CROSS JOIN
Returns the Cartesian product of both tables (every row from the first table combined with every row from the second table).
-- Basic CROSS JOIN
SELECT
colors.name AS color,
sizes.name AS size
FROM colors
CROSS JOIN sizes;
-- Alternative syntax (implicit CROSS JOIN)
SELECT
c.name AS color,
s.name AS size
FROM colors c, sizes s;
-- CROSS JOIN with WHERE (acts like INNER JOIN)
SELECT
c.name AS color,
s.name AS size
FROM colors c
CROSS JOIN sizes s
WHERE c.id = s.color_id;SELF JOIN
Joins a table to itself. Useful for hierarchical data or comparing rows within the same table.
-- Find employees and their managers
SELECT
e1.name AS employee,
e2.name AS manager
FROM employees e1
LEFT JOIN employees e2 ON e1.manager_id = e2.id;
-- Find users in the same city
SELECT
u1.name AS user1,
u2.name AS user2,
u1.city
FROM users u1
INNER JOIN users u2 ON u1.city = u2.city AND u1.id < u2.id;
-- Find products in the same price range
SELECT
p1.name AS product1,
p2.name AS product2,
p1.price
FROM products p1
INNER JOIN products p2 ON ABS(p1.price - p2.price) < 10 AND p1.id < p2.id;Multiple JOINs
Joining Three or More Tables
-- Join three tables
SELECT
u.name AS customer,
o.order_number,
p.product_name,
oi.quantity,
oi.price
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id;
-- Mix different JOIN types
SELECT
u.name,
o.order_number,
p.product_name
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
LEFT JOIN order_items oi ON o.id = oi.order_id
LEFT JOIN products p ON oi.product_id = p.id;
-- Complex multi-table join
SELECT
c.name AS customer,
o.order_date,
p.product_name,
cat.category_name,
oi.quantity,
oi.price
FROM customers c
INNER JOIN orders o ON c.id = o.customer_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
INNER JOIN categories cat ON p.category_id = cat.id
WHERE o.order_date >= '2024-01-01';JOIN with Aggregates
-- Total spent per user
SELECT
u.name,
COUNT(o.id) AS order_count,
COALESCE(SUM(o.amount), 0) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;
-- Products never ordered
SELECT
p.product_name,
p.price
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
WHERE oi.id IS NULL;
-- Average order value by category
SELECT
c.category_name,
COUNT(DISTINCT o.id) AS order_count,
AVG(o.amount) AS avg_order_value
FROM categories c
INNER JOIN products p ON c.id = p.category_id
INNER JOIN order_items oi ON p.id = oi.product_id
INNER JOIN orders o ON oi.order_id = o.id
GROUP BY c.id, c.category_name
ORDER BY avg_order_value DESC;
-- Top customers by spending
SELECT
u.name,
u.email,
COUNT(o.id) AS order_count,
SUM(o.amount) AS total_spent
FROM users u
INNER JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, u.email
HAVING SUM(o.amount) > 1000
ORDER BY total_spent DESC
LIMIT 10;JOIN with Subqueries
-- Join with subquery
SELECT
u.name,
recent_orders.order_count
FROM users u
INNER JOIN (
SELECT
user_id,
COUNT(*) AS order_count
FROM orders
WHERE order_date >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY user_id
) AS recent_orders ON u.id = recent_orders.user_id;
-- Complex example with multiple subqueries
SELECT
u.name,
u.email,
stats.total_orders,
stats.total_spent,
recent.recent_orders
FROM users u
INNER JOIN (
SELECT
user_id,
COUNT(*) AS total_orders,
SUM(amount) AS total_spent
FROM orders
WHERE status = 'completed'
GROUP BY user_id
) AS stats ON u.id = stats.user_id
LEFT JOIN (
SELECT
user_id,
COUNT(*) AS recent_orders
FROM orders
WHERE order_date >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY user_id
) AS recent ON u.id = recent.user_id
WHERE stats.total_spent > 1000;JOIN with USING Clause
When join columns have the same name, you can use the USING clause for cleaner syntax.
-- Instead of ON
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.id;
-- Use USING
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o USING (id);
-- Multiple columns
SELECT *
FROM table1 t1
INNER JOIN table2 t2 USING (id, category_id);NATURAL JOIN
Automatically joins tables based on columns with the same name. Use with caution as it can be unpredictable.
-- NATURAL JOIN (not recommended for production)
SELECT *
FROM users
NATURAL JOIN orders;
-- Equivalent to
SELECT *
FROM users u
INNER JOIN orders o ON u.id = o.id
AND u.created_at = o.created_at
-- ... and all other matching column namesPerformance Tips
-- 1. Create indexes on JOIN columns
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_order_items_order_id ON order_items(order_id);
CREATE INDEX idx_order_items_product_id ON order_items(product_id);
-- 2. Use EXPLAIN to analyze queries
EXPLAIN ANALYZE
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- 3. Avoid SELECT * in JOINs
-- Good: Select only needed columns
SELECT u.name, u.email, o.order_number, o.amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- Avoid: Selecting all columns
SELECT *
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- 4. Filter early with WHERE
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.status = 'active' -- Filter before joining when possible
AND o.created_at >= '2024-01-01';
-- 5. Use appropriate JOIN type
-- Use INNER JOIN when you only need matching records
-- Use LEFT JOIN when you need all records from the left tableCommon Patterns
Find Records Without Matches
-- Users without orders
SELECT u.name, u.email
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.id IS NULL;
-- Products never sold
SELECT p.product_name, p.price
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
WHERE oi.id IS NULL;Many-to-Many Relationships
-- Students and their courses (through enrollments table)
SELECT
s.student_name,
c.course_name,
e.enrollment_date,
e.grade
FROM students s
INNER JOIN enrollments e ON s.id = e.student_id
INNER JOIN courses c ON e.course_id = c.id
ORDER BY s.student_name, c.course_name;Hierarchical Data
-- Organization hierarchy (employees and managers)
SELECT
e.name AS employee,
e.title AS employee_title,
m.name AS manager,
m.title AS manager_title,
d.department_name
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id
INNER JOIN departments d ON e.department_id = d.id
ORDER BY d.department_name, m.name, e.name;Latest Record per Group
-- Latest order for each user
SELECT
u.name,
o.order_number,
o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN (
SELECT user_id, MAX(order_date) AS latest_date
FROM orders
GROUP BY user_id
) AS latest ON o.user_id = latest.user_id AND o.order_date = latest.latest_date;Best Practices
Use explicit JOIN syntax
sql-- Good: Explicit JOIN SELECT * FROM users u INNER JOIN orders o ON u.id = o.user_id; -- Avoid: Implicit JOIN SELECT * FROM users u, orders o WHERE u.id = o.user_id;Use table aliases
sql-- Good: Clear aliases SELECT u.name, o.order_number FROM users u INNER JOIN orders o ON u.id = o.user_id;Choose the right JOIN type
- Use INNER JOIN when you only need matching records
- Use LEFT JOIN when you need all records from the left table
- Use FULL OUTER JOIN when you need all records from both tables
- Avoid CROSS JOIN unless you specifically need a Cartesian product
Index foreign key columns
sqlCREATE INDEX idx_orders_user_id ON orders(user_id);Filter early
sql-- Apply WHERE conditions before or during JOIN when possible SELECT u.name, o.order_number FROM users u INNER JOIN orders o ON u.id = o.user_id AND o.status = 'completed' WHERE u.status = 'active';
Summary
JOIN types and their uses:
- INNER JOIN: Returns only matching records from both tables
- LEFT JOIN: Returns all records from left table + matches from right
- RIGHT JOIN: Returns all records from right table + matches from left
- FULL OUTER JOIN: Returns all records from both tables
- CROSS JOIN: Returns Cartesian product of both tables
- SELF JOIN: Joins a table to itself
Key points:
- Use appropriate JOIN type for your needs
- Index JOIN columns for better performance
- Use table aliases for readability
- Avoid SELECT * in production queries
- Test complex queries with EXPLAIN ANALYZE