Skip to content

PostgreSQL JOIN

Overview

JOIN clauses combine rows from two or more tables based on related columns. JOINs are fundamental to relational databases and allow you to retrieve data from multiple tables in a single query.

Types of JOINs

INNER JOIN

Returns only the records that have matching values in both tables.

sql
-- Basic INNER JOIN
SELECT 
    users.name,
    orders.order_number,
    orders.amount
FROM users
INNER JOIN orders ON users.id = orders.user_id;

-- Using table aliases
SELECT 
    u.name,
    o.order_number,
    o.amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- Multiple conditions
SELECT 
    u.name,
    o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.user_id AND o.status = 'completed';

-- Join with WHERE
SELECT 
    u.name,
    o.amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.amount > 100;

LEFT JOIN (LEFT OUTER JOIN)

Returns all records from the left table and matched records from the right table. If there's no match, NULL values are returned for right table columns.

sql
-- Basic LEFT JOIN
SELECT 
    users.name,
    orders.order_number
FROM users
LEFT JOIN orders ON users.id = orders.user_id;

-- Find users without orders
SELECT 
    u.name
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.id IS NULL;

-- Count orders per user (including users with no orders)
SELECT 
    u.name,
    COUNT(o.id) AS order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;

RIGHT JOIN (RIGHT OUTER JOIN)

Returns all records from the right table and matched records from the left table. If there's no match, NULL values are returned for left table columns.

sql
-- Basic RIGHT JOIN
SELECT 
    users.name,
    orders.order_number
FROM users
RIGHT JOIN orders ON users.id = orders.user_id;

-- Find orders without users (orphaned orders)
SELECT 
    o.order_number
FROM users u
RIGHT JOIN orders o ON u.id = o.user_id
WHERE u.id IS NULL;

FULL OUTER JOIN

Returns all records when there is a match in either the left or right table. Records without matches will have NULL values for the non-matching side.

sql
-- Basic FULL OUTER JOIN
SELECT 
    users.name,
    orders.order_number
FROM users
FULL OUTER JOIN orders ON users.id = orders.user_id;

-- Find unmatched records from both tables
SELECT 
    u.name,
    o.order_number
FROM users u
FULL OUTER JOIN orders o ON u.id = o.user_id
WHERE u.id IS NULL OR o.id IS NULL;

CROSS JOIN

Returns the Cartesian product of both tables (every row from the first table combined with every row from the second table).

sql
-- Basic CROSS JOIN
SELECT 
    colors.name AS color,
    sizes.name AS size
FROM colors
CROSS JOIN sizes;

-- Alternative syntax (implicit CROSS JOIN)
SELECT 
    c.name AS color,
    s.name AS size
FROM colors c, sizes s;

-- CROSS JOIN with WHERE (acts like INNER JOIN)
SELECT 
    c.name AS color,
    s.name AS size
FROM colors c
CROSS JOIN sizes s
WHERE c.id = s.color_id;

SELF JOIN

Joins a table to itself. Useful for hierarchical data or comparing rows within the same table.

sql
-- Find employees and their managers
SELECT 
    e1.name AS employee,
    e2.name AS manager
FROM employees e1
LEFT JOIN employees e2 ON e1.manager_id = e2.id;

-- Find users in the same city
SELECT 
    u1.name AS user1,
    u2.name AS user2,
    u1.city
FROM users u1
INNER JOIN users u2 ON u1.city = u2.city AND u1.id < u2.id;

-- Find products in the same price range
SELECT 
    p1.name AS product1,
    p2.name AS product2,
    p1.price
FROM products p1
INNER JOIN products p2 ON ABS(p1.price - p2.price) < 10 AND p1.id < p2.id;

Multiple JOINs

Joining Three or More Tables

sql
-- Join three tables
SELECT 
    u.name AS customer,
    o.order_number,
    p.product_name,
    oi.quantity,
    oi.price
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id;

-- Mix different JOIN types
SELECT 
    u.name,
    o.order_number,
    p.product_name
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
LEFT JOIN order_items oi ON o.id = oi.order_id
LEFT JOIN products p ON oi.product_id = p.id;

-- Complex multi-table join
SELECT 
    c.name AS customer,
    o.order_date,
    p.product_name,
    cat.category_name,
    oi.quantity,
    oi.price
FROM customers c
INNER JOIN orders o ON c.id = o.customer_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
INNER JOIN categories cat ON p.category_id = cat.id
WHERE o.order_date >= '2024-01-01';

JOIN with Aggregates

sql
-- Total spent per user
SELECT 
    u.name,
    COUNT(o.id) AS order_count,
    COALESCE(SUM(o.amount), 0) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;

-- Products never ordered
SELECT 
    p.product_name,
    p.price
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
WHERE oi.id IS NULL;

-- Average order value by category
SELECT 
    c.category_name,
    COUNT(DISTINCT o.id) AS order_count,
    AVG(o.amount) AS avg_order_value
FROM categories c
INNER JOIN products p ON c.id = p.category_id
INNER JOIN order_items oi ON p.id = oi.product_id
INNER JOIN orders o ON oi.order_id = o.id
GROUP BY c.id, c.category_name
ORDER BY avg_order_value DESC;

-- Top customers by spending
SELECT 
    u.name,
    u.email,
    COUNT(o.id) AS order_count,
    SUM(o.amount) AS total_spent
FROM users u
INNER JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, u.email
HAVING SUM(o.amount) > 1000
ORDER BY total_spent DESC
LIMIT 10;

JOIN with Subqueries

sql
-- Join with subquery
SELECT 
    u.name,
    recent_orders.order_count
FROM users u
INNER JOIN (
    SELECT 
        user_id,
        COUNT(*) AS order_count
    FROM orders
    WHERE order_date >= CURRENT_DATE - INTERVAL '30 days'
    GROUP BY user_id
) AS recent_orders ON u.id = recent_orders.user_id;

-- Complex example with multiple subqueries
SELECT 
    u.name,
    u.email,
    stats.total_orders,
    stats.total_spent,
    recent.recent_orders
FROM users u
INNER JOIN (
    SELECT 
        user_id,
        COUNT(*) AS total_orders,
        SUM(amount) AS total_spent
    FROM orders
    WHERE status = 'completed'
    GROUP BY user_id
) AS stats ON u.id = stats.user_id
LEFT JOIN (
    SELECT 
        user_id,
        COUNT(*) AS recent_orders
    FROM orders
    WHERE order_date >= CURRENT_DATE - INTERVAL '30 days'
    GROUP BY user_id
) AS recent ON u.id = recent.user_id
WHERE stats.total_spent > 1000;

JOIN with USING Clause

When join columns have the same name, you can use the USING clause for cleaner syntax.

sql
-- Instead of ON
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.id;

-- Use USING
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o USING (id);

-- Multiple columns
SELECT *
FROM table1 t1
INNER JOIN table2 t2 USING (id, category_id);

NATURAL JOIN

Automatically joins tables based on columns with the same name. Use with caution as it can be unpredictable.

sql
-- NATURAL JOIN (not recommended for production)
SELECT *
FROM users
NATURAL JOIN orders;

-- Equivalent to
SELECT *
FROM users u
INNER JOIN orders o ON u.id = o.id 
    AND u.created_at = o.created_at
    -- ... and all other matching column names

Performance Tips

sql
-- 1. Create indexes on JOIN columns
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_order_items_order_id ON order_items(order_id);
CREATE INDEX idx_order_items_product_id ON order_items(product_id);

-- 2. Use EXPLAIN to analyze queries
EXPLAIN ANALYZE
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- 3. Avoid SELECT * in JOINs
-- Good: Select only needed columns
SELECT u.name, u.email, o.order_number, o.amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- Avoid: Selecting all columns
SELECT *
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- 4. Filter early with WHERE
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.status = 'active'  -- Filter before joining when possible
    AND o.created_at >= '2024-01-01';

-- 5. Use appropriate JOIN type
-- Use INNER JOIN when you only need matching records
-- Use LEFT JOIN when you need all records from the left table

Common Patterns

Find Records Without Matches

sql
-- Users without orders
SELECT u.name, u.email
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.id IS NULL;

-- Products never sold
SELECT p.product_name, p.price
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
WHERE oi.id IS NULL;

Many-to-Many Relationships

sql
-- Students and their courses (through enrollments table)
SELECT 
    s.student_name,
    c.course_name,
    e.enrollment_date,
    e.grade
FROM students s
INNER JOIN enrollments e ON s.id = e.student_id
INNER JOIN courses c ON e.course_id = c.id
ORDER BY s.student_name, c.course_name;

Hierarchical Data

sql
-- Organization hierarchy (employees and managers)
SELECT 
    e.name AS employee,
    e.title AS employee_title,
    m.name AS manager,
    m.title AS manager_title,
    d.department_name
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id
INNER JOIN departments d ON e.department_id = d.id
ORDER BY d.department_name, m.name, e.name;

Latest Record per Group

sql
-- Latest order for each user
SELECT 
    u.name,
    o.order_number,
    o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN (
    SELECT user_id, MAX(order_date) AS latest_date
    FROM orders
    GROUP BY user_id
) AS latest ON o.user_id = latest.user_id AND o.order_date = latest.latest_date;

Best Practices

  1. Use explicit JOIN syntax

    sql
    -- Good: Explicit JOIN
    SELECT * FROM users u
    INNER JOIN orders o ON u.id = o.user_id;
    
    -- Avoid: Implicit JOIN
    SELECT * FROM users u, orders o
    WHERE u.id = o.user_id;
  2. Use table aliases

    sql
    -- Good: Clear aliases
    SELECT u.name, o.order_number
    FROM users u
    INNER JOIN orders o ON u.id = o.user_id;
  3. Choose the right JOIN type

    • Use INNER JOIN when you only need matching records
    • Use LEFT JOIN when you need all records from the left table
    • Use FULL OUTER JOIN when you need all records from both tables
    • Avoid CROSS JOIN unless you specifically need a Cartesian product
  4. Index foreign key columns

    sql
    CREATE INDEX idx_orders_user_id ON orders(user_id);
  5. Filter early

    sql
    -- Apply WHERE conditions before or during JOIN when possible
    SELECT u.name, o.order_number
    FROM users u
    INNER JOIN orders o ON u.id = o.user_id AND o.status = 'completed'
    WHERE u.status = 'active';

Summary

JOIN types and their uses:

  • INNER JOIN: Returns only matching records from both tables
  • LEFT JOIN: Returns all records from left table + matches from right
  • RIGHT JOIN: Returns all records from right table + matches from left
  • FULL OUTER JOIN: Returns all records from both tables
  • CROSS JOIN: Returns Cartesian product of both tables
  • SELF JOIN: Joins a table to itself

Key points:

  • Use appropriate JOIN type for your needs
  • Index JOIN columns for better performance
  • Use table aliases for readability
  • Avoid SELECT * in production queries
  • Test complex queries with EXPLAIN ANALYZE

Content is for learning and research only.