Skip to content

PostgreSQL JOIN

概述

JOIN子句基于相关列组合两个或多个表中的行。JOIN是关系数据库的基础,允许您在单个查询中从多个表检索数据。

JOIN类型

INNER JOIN(内连接)

只返回两个表中具有匹配值的记录。

sql
-- 基本INNER JOIN
SELECT 
    users.name,
    orders.order_number,
    orders.amount
FROM users
INNER JOIN orders ON users.id = orders.user_id;

-- 使用表别名
SELECT 
    u.name,
    o.order_number,
    o.amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- 多个条件
SELECT 
    u.name,
    o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.user_id AND o.status = 'completed';

-- 带WHERE的JOIN
SELECT 
    u.name,
    o.amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.amount > 100;

LEFT JOIN(左连接)

返回左表中的所有记录以及右表中的匹配记录。如果没有匹配,右表列返回NULL值。

sql
-- 基本LEFT JOIN
SELECT 
    users.name,
    orders.order_number
FROM users
LEFT JOIN orders ON users.id = orders.user_id;

-- 查找没有订单的用户
SELECT 
    u.name
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.id IS NULL;

-- 统计每个用户的订单数(包括没有订单的用户)
SELECT 
    u.name,
    COUNT(o.id) AS order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;

RIGHT JOIN(右连接)

返回右表中的所有记录以及左表中的匹配记录。如果没有匹配,左表列返回NULL值。

sql
-- 基本RIGHT JOIN
SELECT 
    users.name,
    orders.order_number
FROM users
RIGHT JOIN orders ON users.id = orders.user_id;

-- 查找没有用户的订单(孤立订单)
SELECT 
    o.order_number
FROM users u
RIGHT JOIN orders o ON u.id = o.user_id
WHERE u.id IS NULL;

FULL OUTER JOIN(全外连接)

当左表或右表中存在匹配时返回所有记录。没有匹配的记录将在非匹配侧显示NULL值。

sql
-- 基本FULL OUTER JOIN
SELECT 
    users.name,
    orders.order_number
FROM users
FULL OUTER JOIN orders ON users.id = orders.user_id;

-- 查找两个表中不匹配的记录
SELECT 
    u.name,
    o.order_number
FROM users u
FULL OUTER JOIN orders o ON u.id = o.user_id
WHERE u.id IS NULL OR o.id IS NULL;

CROSS JOIN(交叉连接)

返回两个表的笛卡尔积(第一个表的每一行与第二个表的每一行组合)。

sql
-- 基本CROSS JOIN
SELECT 
    colors.name AS color,
    sizes.name AS size
FROM colors
CROSS JOIN sizes;

-- 替代语法(隐式CROSS JOIN)
SELECT 
    c.name AS color,
    s.name AS size
FROM colors c, sizes s;

-- 带WHERE的CROSS JOIN(类似INNER JOIN)
SELECT 
    c.name AS color,
    s.name AS size
FROM colors c
CROSS JOIN sizes s
WHERE c.id = s.color_id;

SELF JOIN(自连接)

将表连接到自身。对于层次数据或比较同一表中的行很有用。

sql
-- 查找员工及其经理
SELECT 
    e1.name AS employee,
    e2.name AS manager
FROM employees e1
LEFT JOIN employees e2 ON e1.manager_id = e2.id;

-- 查找同一城市的用户
SELECT 
    u1.name AS user1,
    u2.name AS user2,
    u1.city
FROM users u1
INNER JOIN users u2 ON u1.city = u2.city AND u1.id < u2.id;

-- 查找价格相近的产品
SELECT 
    p1.name AS product1,
    p2.name AS product2,
    p1.price
FROM products p1
INNER JOIN products p2 ON ABS(p1.price - p2.price) < 10 AND p1.id < p2.id;

多表连接

连接三个或更多表

sql
-- 连接三个表
SELECT 
    u.name AS customer,
    o.order_number,
    p.product_name,
    oi.quantity,
    oi.price
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id;

-- 混合不同的JOIN类型
SELECT 
    u.name,
    o.order_number,
    p.product_name
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
LEFT JOIN order_items oi ON o.id = oi.order_id
LEFT JOIN products p ON oi.product_id = p.id;

-- 复杂的多表连接
SELECT 
    c.name AS customer,
    o.order_date,
    p.product_name,
    cat.category_name,
    oi.quantity,
    oi.price
FROM customers c
INNER JOIN orders o ON c.id = o.customer_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
INNER JOIN categories cat ON p.category_id = cat.id
WHERE o.order_date >= '2024-01-01';

带聚合的JOIN

sql
-- 每个用户的总消费
SELECT 
    u.name,
    COUNT(o.id) AS order_count,
    COALESCE(SUM(o.amount), 0) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;

-- 从未被订购的产品
SELECT 
    p.product_name,
    p.price
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
WHERE oi.id IS NULL;

-- 按类别的平均订单价值
SELECT 
    c.category_name,
    COUNT(DISTINCT o.id) AS order_count,
    AVG(o.amount) AS avg_order_value
FROM categories c
INNER JOIN products p ON c.id = p.category_id
INNER JOIN order_items oi ON p.id = oi.product_id
INNER JOIN orders o ON oi.order_id = o.id
GROUP BY c.id, c.category_name
ORDER BY avg_order_value DESC;

-- 消费最多的客户
SELECT 
    u.name,
    u.email,
    COUNT(o.id) AS order_count,
    SUM(o.amount) AS total_spent
FROM users u
INNER JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name, u.email
HAVING SUM(o.amount) > 1000
ORDER BY total_spent DESC
LIMIT 10;

带子查询的JOIN

sql
-- 与子查询连接
SELECT 
    u.name,
    recent_orders.order_count
FROM users u
INNER JOIN (
    SELECT 
        user_id,
        COUNT(*) AS order_count
    FROM orders
    WHERE order_date >= CURRENT_DATE - INTERVAL '30 days'
    GROUP BY user_id
) AS recent_orders ON u.id = recent_orders.user_id;

-- 带多个子查询的复杂示例
SELECT 
    u.name,
    u.email,
    stats.total_orders,
    stats.total_spent,
    recent.recent_orders
FROM users u
INNER JOIN (
    SELECT 
        user_id,
        COUNT(*) AS total_orders,
        SUM(amount) AS total_spent
    FROM orders
    WHERE status = 'completed'
    GROUP BY user_id
) AS stats ON u.id = stats.user_id
LEFT JOIN (
    SELECT 
        user_id,
        COUNT(*) AS recent_orders
    FROM orders
    WHERE order_date >= CURRENT_DATE - INTERVAL '30 days'
    GROUP BY user_id
) AS recent ON u.id = recent.user_id
WHERE stats.total_spent > 1000;

使用USING子句的JOIN

当连接列具有相同名称时,可以使用USING子句获得更简洁的语法。

sql
-- 使用ON
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.id;

-- 使用USING
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o USING (id);

-- 多列
SELECT *
FROM table1 t1
INNER JOIN table2 t2 USING (id, category_id);

NATURAL JOIN

根据具有相同名称的列自动连接表。谨慎使用,因为它可能不可预测。

sql
-- NATURAL JOIN(不推荐用于生产环境)
SELECT *
FROM users
NATURAL JOIN orders;

-- 等同于
SELECT *
FROM users u
INNER JOIN orders o ON u.id = o.id 
    AND u.created_at = o.created_at
    -- ... 以及所有其他匹配的列名

性能提示

sql
-- 1. 在JOIN列上创建索引
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_order_items_order_id ON order_items(order_id);
CREATE INDEX idx_order_items_product_id ON order_items(product_id);

-- 2. 使用EXPLAIN分析查询
EXPLAIN ANALYZE
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- 3. 避免在JOIN中使用SELECT *
-- 好:只选择需要的列
SELECT u.name, u.email, o.order_number, o.amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- 避免:选择所有列
SELECT *
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- 4. 使用WHERE提前过滤
SELECT u.name, o.order_number
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.status = 'active'  -- 尽可能在连接前过滤
    AND o.created_at >= '2024-01-01';

-- 5. 使用适当的JOIN类型
-- 只需要匹配记录时使用INNER JOIN
-- 需要左表所有记录时使用LEFT JOIN

常见模式

查找无匹配的记录

sql
-- 没有订单的用户
SELECT u.name, u.email
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.id IS NULL;

-- 从未售出的产品
SELECT p.product_name, p.price
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
WHERE oi.id IS NULL;

多对多关系

sql
-- 学生及其课程(通过enrollments表)
SELECT 
    s.student_name,
    c.course_name,
    e.enrollment_date,
    e.grade
FROM students s
INNER JOIN enrollments e ON s.id = e.student_id
INNER JOIN courses c ON e.course_id = c.id
ORDER BY s.student_name, c.course_name;

层次数据

sql
-- 组织层次结构(员工和经理)
SELECT 
    e.name AS employee,
    e.title AS employee_title,
    m.name AS manager,
    m.title AS manager_title,
    d.department_name
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id
INNER JOIN departments d ON e.department_id = d.id
ORDER BY d.department_name, m.name, e.name;

每组的最新记录

sql
-- 每个用户的最新订单
SELECT 
    u.name,
    o.order_number,
    o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN (
    SELECT user_id, MAX(order_date) AS latest_date
    FROM orders
    GROUP BY user_id
) AS latest ON o.user_id = latest.user_id AND o.order_date = latest.latest_date;

最佳实践

  1. 使用显式JOIN语法

    sql
    -- 好:显式JOIN
    SELECT * FROM users u
    INNER JOIN orders o ON u.id = o.user_id;
    
    -- 避免:隐式JOIN
    SELECT * FROM users u, orders o
    WHERE u.id = o.user_id;
  2. 使用表别名

    sql
    -- 好:清晰的别名
    SELECT u.name, o.order_number
    FROM users u
    INNER JOIN orders o ON u.id = o.user_id;
  3. 选择正确的JOIN类型

    • 只需要匹配记录时使用INNER JOIN
    • 需要左表所有记录时使用LEFT JOIN
    • 需要两个表所有记录时使用FULL OUTER JOIN
    • 除非特别需要笛卡尔积,否则避免使用CROSS JOIN
  4. 索引外键列

    sql
    CREATE INDEX idx_orders_user_id ON orders(user_id);
  5. 提前过滤

    sql
    -- 尽可能在JOIN期间或之前应用WHERE条件
    SELECT u.name, o.order_number
    FROM users u
    INNER JOIN orders o ON u.id = o.user_id AND o.status = 'completed'
    WHERE u.status = 'active';

小结

JOIN类型及其用途:

  • INNER JOIN:只返回两个表中的匹配记录
  • LEFT JOIN:返回左表所有记录+右表匹配记录
  • RIGHT JOIN:返回右表所有记录+左表匹配记录
  • FULL OUTER JOIN:返回两个表的所有记录
  • CROSS JOIN:返回两个表的笛卡尔积
  • SELF JOIN:将表连接到自身

关键点:

  • 根据需要使用适当的JOIN类型
  • 为JOIN列创建索引以提高性能
  • 使用表别名提高可读性
  • 避免在生产查询中使用SELECT *
  • 使用EXPLAIN ANALYZE测试复杂查询