PostgreSQL UNION
概述
UNION运算符将两个或多个SELECT语句的结果集组合成单个结果集。默认情况下它会删除重复行。当您需要组合来自具有相似结构的多个表或查询的数据时,UNION非常有用。
基本语法
sql
SELECT column1, column2, ...
FROM table1
UNION
SELECT column1, column2, ...
FROM table2;UNION与UNION ALL
UNION(删除重复)
sql
-- 组合结果并删除重复
SELECT id, name FROM current_users
UNION
SELECT id, name FROM archived_users;UNION ALL(保留重复)
sql
-- 组合结果并保留所有行(更快)
SELECT id, name FROM current_users
UNION ALL
SELECT id, name FROM archived_users;UNION的规则
- 相同数量的列:所有SELECT语句必须具有相同数量的列
- 兼容的数据类型:对应的列必须具有兼容的数据类型
- 列顺序:列按位置匹配,而不是按名称
- 列名:结果使用第一个SELECT的列名
sql
-- 有效的UNION
SELECT id, name, email FROM users
UNION
SELECT id, full_name, contact_email FROM customers;
-- 无效的UNION(列数不同)
SELECT id, name FROM users
UNION
SELECT id, name, email FROM customers; -- 错误!基本UNION示例
组合相似的表
sql
-- 组合活跃和非活跃用户
SELECT id, name, 'active' AS status FROM active_users
UNION
SELECT id, name, 'inactive' AS status FROM inactive_users;
-- 组合当前和历史数据
SELECT order_id, customer_id, order_date FROM current_orders
UNION
SELECT order_id, customer_id, order_date FROM archived_orders
ORDER BY order_date DESC;组合不同的表
sql
-- 组合员工和承包商
SELECT
id,
name,
email,
'employee' AS type
FROM employees
UNION
SELECT
id,
name,
email,
'contractor' AS type
FROM contractors;UNION与WHERE子句
sql
-- 组合前过滤
SELECT id, name, email FROM users WHERE country = 'USA'
UNION
SELECT id, name, email FROM customers WHERE country = 'USA';
-- 每个查询使用不同的过滤器
SELECT id, name, 'premium' AS tier FROM users WHERE subscription = 'premium'
UNION
SELECT id, name, 'basic' AS tier FROM users WHERE subscription = 'basic'
ORDER BY name;UNION与ORDER BY
sql
-- ORDER BY应用于整个结果集
SELECT name, email FROM users
UNION
SELECT name, email FROM customers
ORDER BY name;
-- 按列位置排序
SELECT name, email, created_at FROM users
UNION
SELECT name, email, created_at FROM customers
ORDER BY 3 DESC; -- 按第三列(created_at)排序
-- 使用别名排序
SELECT name, email, created_at AS registration_date FROM users
UNION
SELECT name, email, created_at FROM customers
ORDER BY registration_date DESC;UNION与LIMIT
sql
-- LIMIT应用于整个结果集
SELECT name, email FROM users
UNION
SELECT name, email FROM customers
ORDER BY name
LIMIT 100;
-- 限制单个查询(使用子查询)
(SELECT name, email FROM users ORDER BY created_at DESC LIMIT 10)
UNION
(SELECT name, email FROM customers ORDER BY created_at DESC LIMIT 10);多个UNION操作
sql
-- 组合三个或更多查询
SELECT id, name FROM employees
UNION
SELECT id, name FROM contractors
UNION
SELECT id, name FROM consultants;
-- 混合UNION和UNION ALL
SELECT id, name FROM current_users
UNION ALL
SELECT id, name FROM archived_users
UNION -- 此UNION从组合结果中删除重复
SELECT id, name FROM deleted_users;UNION与聚合
sql
-- 组合聚合结果
SELECT 'Users' AS source, COUNT(*) AS count FROM users
UNION
SELECT 'Customers' AS source, COUNT(*) AS count FROM customers
UNION
SELECT 'Vendors' AS source, COUNT(*) AS count FROM vendors;
-- 月度统计
SELECT
'January' AS month,
SUM(amount) AS total
FROM orders
WHERE EXTRACT(MONTH FROM order_date) = 1
UNION
SELECT
'February' AS month,
SUM(amount) AS total
FROM orders
WHERE EXTRACT(MONTH FROM order_date) = 2
ORDER BY month;UNION与子查询
sql
-- 组合子查询的结果
SELECT user_id, total_spent FROM (
SELECT user_id, SUM(amount) AS total_spent
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY user_id
) AS q1
UNION
SELECT customer_id, total_spent FROM (
SELECT customer_id, SUM(amount) AS total_spent
FROM invoices
WHERE invoice_date >= '2024-01-01'
GROUP BY customer_id
) AS q2;UNION与JOIN
sql
-- 组合连接的结果
SELECT
u.name,
o.order_number,
'order' AS type
FROM users u
INNER JOIN orders o ON u.id = o.user_id
UNION
SELECT
c.name,
i.invoice_number,
'invoice' AS type
FROM customers c
INNER JOIN invoices i ON c.id = i.customer_id;UNION与CASE
sql
-- 条件UNION
SELECT
id,
name,
CASE
WHEN status = 'active' THEN '活跃用户'
ELSE '非活跃用户'
END AS status_label
FROM users
UNION
SELECT
id,
name,
'客户' AS status_label
FROM customers;实际示例
组合搜索结果
sql
-- 跨多个表搜索
SELECT
id,
name,
email,
'user' AS source
FROM users
WHERE name ILIKE '%john%' OR email ILIKE '%john%'
UNION
SELECT
id,
company_name AS name,
contact_email AS email,
'company' AS source
FROM companies
WHERE company_name ILIKE '%john%' OR contact_email ILIKE '%john%'
ORDER BY name;创建报告
sql
-- 月度销售报告
SELECT
'Q1' AS quarter,
SUM(amount) AS total_sales
FROM orders
WHERE order_date BETWEEN '2024-01-01' AND '2024-03-31'
UNION
SELECT
'Q2' AS quarter,
SUM(amount) AS total_sales
FROM orders
WHERE order_date BETWEEN '2024-04-01' AND '2024-06-30'
UNION
SELECT
'Q3' AS quarter,
SUM(amount) AS total_sales
FROM orders
WHERE order_date BETWEEN '2024-07-01' AND '2024-09-30'
UNION
SELECT
'Q4' AS quarter,
SUM(amount) AS total_sales
FROM orders
WHERE order_date BETWEEN '2024-10-01' AND '2024-12-31';组合历史和当前数据
sql
-- 所有交易(当前和归档)
SELECT
transaction_id,
user_id,
amount,
transaction_date,
'current' AS data_source
FROM current_transactions
WHERE transaction_date >= '2024-01-01'
UNION ALL
SELECT
transaction_id,
user_id,
amount,
transaction_date,
'archived' AS data_source
FROM archived_transactions
WHERE transaction_date >= '2024-01-01'
ORDER BY transaction_date DESC;创建查找列表
sql
-- 所有联系方式
SELECT DISTINCT email AS contact, 'email' AS type FROM users WHERE email IS NOT NULL
UNION
SELECT DISTINCT phone AS contact, 'phone' AS type FROM users WHERE phone IS NOT NULL
UNION
SELECT DISTINCT mobile AS contact, 'mobile' AS type FROM users WHERE mobile IS NOT NULL
ORDER BY type, contact;性能考虑
UNION与UNION ALL
sql
-- UNION(较慢 - 删除重复)
SELECT id, name FROM table1
UNION
SELECT id, name FROM table2;
-- UNION ALL(较快 - 保留重复)
SELECT id, name FROM table1
UNION ALL
SELECT id, name FROM table2;使用UNION ALL的情况:
- 您知道没有重复
- 您想保留重复
- 性能至关重要
使用UNION的情况:
- 您需要删除重复
- 数据质量要求唯一结果
索引
sql
-- 在WHERE子句中使用的列上创建索引
CREATE INDEX idx_users_country ON users(country);
CREATE INDEX idx_customers_country ON customers(country);
-- 查询将受益于索引
SELECT id, name FROM users WHERE country = 'USA'
UNION
SELECT id, name FROM customers WHERE country = 'USA';使用EXPLAIN
sql
-- 分析UNION查询性能
EXPLAIN ANALYZE
SELECT id, name FROM users
UNION
SELECT id, name FROM customers;常见模式
跨表去重
sql
-- 查找所有表中的唯一电子邮件
SELECT email FROM users
UNION
SELECT email FROM customers
UNION
SELECT email FROM subscribers;组合部分结果
sql
-- 组合不同时间段的结果
SELECT * FROM sales_2023
UNION ALL
SELECT * FROM sales_2024;创建汇总行
sql
-- 向结果添加总计行
SELECT category, SUM(amount) AS total FROM sales GROUP BY category
UNION
SELECT 'TOTAL' AS category, SUM(amount) AS total FROM sales
ORDER BY category;最佳实践
尽可能使用UNION ALL以获得更好的性能
sql-- 如果您知道没有重复,则更快 SELECT id, name FROM table1 UNION ALL SELECT id, name FROM table2;确保列兼容性
sql-- 好:相同的数据类型 SELECT id::INTEGER, name::TEXT FROM table1 UNION SELECT id::INTEGER, name::TEXT FROM table2;使用有意义的列名
sql-- 好:第一个SELECT的清晰列名 SELECT id AS user_id, name AS user_name FROM users UNION SELECT id, name FROM customers;在UNION之前过滤
sql-- 好:在每个SELECT中过滤 SELECT id, name FROM users WHERE active = true UNION SELECT id, name FROM customers WHERE active = true;对复杂查询使用括号
sql(SELECT id, name FROM users ORDER BY created_at DESC LIMIT 10) UNION (SELECT id, name FROM customers ORDER BY created_at DESC LIMIT 10);
要避免的常见错误
sql
-- ❌ 列数不同
SELECT id, name FROM users
UNION
SELECT id, name, email FROM customers; -- 错误!
-- ✅ 列数相同
SELECT id, name, NULL AS email FROM users
UNION
SELECT id, name, email FROM customers;
-- ❌ 单个SELECT中的ORDER BY(没有括号)
SELECT id, name FROM users ORDER BY name -- 错误!
UNION
SELECT id, name FROM customers;
-- ✅ UNION后的ORDER BY
SELECT id, name FROM users
UNION
SELECT id, name FROM customers
ORDER BY name;
-- ✅ 单个SELECT中的ORDER BY(带括号)
(SELECT id, name FROM users ORDER BY name LIMIT 10)
UNION
(SELECT id, name FROM customers ORDER BY name LIMIT 10);小结
UNION运算符要点:
- UNION:组合结果并删除重复
- UNION ALL:组合结果并保留重复(更快)
- 所有SELECT语句必须具有:
- 相同数量的列
- 兼容的数据类型
- 按位置匹配的列
- ORDER BY应用于整个结果集
- 当重复不是问题时使用UNION ALL
- 在UNION之前过滤数据以获得更好的性能