Spanner子查询 #
一、子查询概述 #
1.1 子查询类型 #
text
Spanner子查询类型
├── 标量子查询 (Scalar Subquery)
│ └── 返回单个值
│
├── 数组子查询 (Array Subquery)
│ └── 返回数组
│
├── 表子查询 (Table Subquery)
│ └── 返回多行多列
│
├── 相关子查询 (Correlated Subquery)
│ └── 引用外部查询
│
└── EXISTS/IN子查询
└── 条件判断
1.2 示例表结构 #
sql
CREATE TABLE users (
user_id INT64 NOT NULL,
name STRING(100) NOT NULL,
dept_id INT64,
salary FLOAT64
) PRIMARY KEY (user_id);
CREATE TABLE departments (
dept_id INT64 NOT NULL,
dept_name STRING(100) NOT NULL,
budget FLOAT64
) PRIMARY KEY (dept_id);
CREATE TABLE orders (
order_id INT64 NOT NULL,
user_id INT64 NOT NULL,
amount FLOAT64,
order_date DATE
) PRIMARY KEY (order_id);
二、标量子查询 #
2.1 基本用法 #
sql
-- 标量子查询返回单个值
SELECT
name,
(SELECT dept_name FROM departments WHERE dept_id = u.dept_id) AS dept_name
FROM users u;
-- 在WHERE中使用
SELECT * FROM users
WHERE salary > (SELECT AVG(salary) FROM users);
-- 在表达式中使用
SELECT
name,
salary,
salary - (SELECT AVG(salary) FROM users) AS diff_from_avg
FROM users;
2.2 返回单个值 #
sql
-- 子查询必须只返回一行一列
SELECT name FROM users
WHERE dept_id = (SELECT dept_id FROM departments WHERE dept_name = 'Engineering');
-- 使用聚合函数确保返回单个值
SELECT name FROM users
WHERE salary > (SELECT MAX(budget) FROM departments);
2.3 注意事项 #
sql
-- 如果子查询返回多行会报错
SELECT name FROM users
WHERE dept_id = (SELECT dept_id FROM departments); -- ERROR
-- 使用聚合或LIMIT确保单行
SELECT name FROM users
WHERE dept_id = (SELECT dept_id FROM departments LIMIT 1); -- OK
-- 处理NULL值
SELECT name FROM users
WHERE dept_id = (SELECT dept_id FROM departments WHERE dept_name = 'NonExist');
-- 如果子查询返回NULL,条件为NULL,不匹配任何行
三、表子查询 #
3.1 在FROM中使用 #
sql
-- 子查询作为表
SELECT u.name, o.total
FROM users u,
(SELECT user_id, SUM(amount) AS total
FROM orders
GROUP BY user_id) o
WHERE u.user_id = o.user_id;
-- 使用别名
SELECT *
FROM (
SELECT user_id, COUNT(*) AS order_count
FROM orders
GROUP BY user_id
) AS user_orders
WHERE order_count > 5;
3.2 WITH子句 #
sql
-- 使用WITH子句提高可读性
WITH
user_orders AS (
SELECT user_id, SUM(amount) AS total_amount
FROM orders
GROUP BY user_id
),
active_users AS (
SELECT user_id, name
FROM users
WHERE status = 'active'
)
SELECT a.name, o.total_amount
FROM active_users a
LEFT JOIN user_orders o ON a.user_id = o.user_id;
-- 多个CTE
WITH
dept_avg AS (
SELECT dept_id, AVG(salary) AS avg_salary
FROM users
GROUP BY dept_id
),
high_earners AS (
SELECT u.user_id, u.name, u.salary, d.avg_salary
FROM users u
JOIN dept_avg d ON u.dept_id = d.dept_id
WHERE u.salary > d.avg_salary
)
SELECT * FROM high_earners;
3.3 嵌套子查询 #
sql
-- 嵌套子查询
SELECT name FROM users
WHERE dept_id IN (
SELECT dept_id FROM departments
WHERE budget > (
SELECT AVG(budget) FROM departments
)
);
四、相关子查询 #
4.1 基本概念 #
text
相关子查询特点:
├── 引用外部查询的列
├── 每行执行一次
├── 性能开销较大
└── 可用JOIN优化
4.2 相关子查询示例 #
sql
-- 查找每个部门薪资最高的员工
SELECT u.name, u.salary, u.dept_id
FROM users u
WHERE u.salary = (
SELECT MAX(salary)
FROM users u2
WHERE u2.dept_id = u.dept_id -- 引用外部查询
);
-- 查找订单数超过平均值的用户
SELECT u.name
FROM users u
WHERE (
SELECT COUNT(*)
FROM orders o
WHERE o.user_id = u.user_id
) > (
SELECT AVG(order_count)
FROM (
SELECT COUNT(*) AS order_count
FROM orders
GROUP BY user_id
)
);
4.3 相关子查询优化 #
sql
-- 原始相关子查询(性能差)
SELECT u.name
FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.user_id AND o.amount > 1000
);
-- 优化为JOIN(性能好)
SELECT DISTINCT u.name
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE o.amount > 1000;
-- 使用WITH子句
WITH high_value_orders AS (
SELECT DISTINCT user_id
FROM orders
WHERE amount > 1000
)
SELECT u.name
FROM users u
INNER JOIN high_value_orders h ON u.user_id = h.user_id;
五、EXISTS子查询 #
5.1 EXISTS用法 #
sql
-- EXISTS: 存在匹配则返回TRUE
SELECT u.name
FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.user_id
);
-- NOT EXISTS: 不存在匹配则返回TRUE
SELECT u.name
FROM users u
WHERE NOT EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.user_id
);
5.2 EXISTS vs IN #
sql
-- EXISTS: 存在性检查
SELECT u.name
FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.user_id AND o.amount > 1000
);
-- IN: 值匹配
SELECT u.name
FROM users u
WHERE u.user_id IN (
SELECT user_id FROM orders WHERE amount > 1000
);
-- 性能差异:
-- EXISTS: 找到第一个匹配就停止
-- IN: 需要返回所有匹配值
5.3 复杂EXISTS #
sql
-- 多条件EXISTS
SELECT u.name
FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.user_id
AND o.amount > 1000
AND o.order_date >= DATE '2024-01-01'
);
-- 双重EXISTS
SELECT d.dept_name
FROM departments d
WHERE EXISTS (
SELECT 1 FROM users u
WHERE u.dept_id = d.dept_id
AND EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.user_id AND o.amount > 10000
)
);
六、IN子查询 #
6.1 IN用法 #
sql
-- IN子查询
SELECT name FROM users
WHERE user_id IN (SELECT user_id FROM orders);
-- NOT IN子查询
SELECT name FROM users
WHERE user_id NOT IN (SELECT user_id FROM orders);
-- 多列IN
SELECT * FROM orders
WHERE (user_id, order_date) IN (
SELECT user_id, MAX(order_date)
FROM orders
GROUP BY user_id
);
6.2 IN vs JOIN #
sql
-- IN子查询
SELECT u.name
FROM users u
WHERE u.user_id IN (SELECT user_id FROM orders WHERE amount > 1000);
-- JOIN实现
SELECT DISTINCT u.name
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE o.amount > 1000;
-- 性能建议:
-- 子查询结果集小时用IN
-- 需要JOIN列时用JOIN
6.3 NULL处理 #
sql
-- NOT IN与NULL的问题
SELECT name FROM users
WHERE user_id NOT IN (SELECT user_id FROM orders WHERE user_id IS NOT NULL);
-- 如果子查询有NULL,NOT IN结果为空
-- 使用NOT EXISTS替代
SELECT name FROM users u
WHERE NOT EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.user_id);
-- 不受NULL影响
七、数组子查询 #
7.1 返回数组 #
sql
-- 数组子查询
SELECT
u.name,
ARRAY(SELECT o.order_id FROM orders o WHERE o.user_id = u.user_id) AS order_ids
FROM users u;
-- 使用ARRAY_AGG替代
SELECT
u.name,
ARRAY_AGG(o.order_id) AS order_ids
FROM users u
LEFT JOIN orders o ON u.user_id = o.user_id
GROUP BY u.user_id, u.name;
7.2 UNNEST与子查询 #
sql
-- UNNEST数组子查询
SELECT u.name, order_id
FROM users u,
UNNEST(ARRAY(SELECT o.order_id FROM orders o WHERE o.user_id = u.user_id)) AS order_id;
-- 等价于JOIN
SELECT u.name, o.order_id
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id;
八、LATERAL子查询 #
8.1 LATERAL概念 #
sql
-- LATERAL允许子查询引用外部表
SELECT u.name, recent_orders.order_id
FROM users u,
LATERAL (
SELECT order_id
FROM orders
WHERE user_id = u.user_id
ORDER BY order_date DESC
LIMIT 3
) AS recent_orders;
8.2 LATERAL应用 #
sql
-- 获取每个用户最近的订单
SELECT
u.name,
recent.order_id,
recent.amount
FROM users u,
LATERAL (
SELECT order_id, amount
FROM orders
WHERE user_id = u.user_id
ORDER BY order_date DESC
LIMIT 1
) AS recent;
-- 获取每个部门薪资最高的员工
SELECT d.dept_name, top_emp.name, top_emp.salary
FROM departments d,
LATERAL (
SELECT name, salary
FROM users
WHERE dept_id = d.dept_id
ORDER BY salary DESC
LIMIT 1
) AS top_emp;
九、子查询性能 #
9.1 性能考虑 #
text
子查询性能因素:
├── 子查询执行次数
├── 子查询结果集大小
├── 是否使用索引
└── 是否可以优化为JOIN
9.2 优化建议 #
sql
-- 不推荐: 相关子查询
SELECT u.name
FROM users u
WHERE u.salary > (
SELECT AVG(salary) FROM users u2 WHERE u2.dept_id = u.dept_id
);
-- 推荐: 使用JOIN和WITH
WITH dept_avg AS (
SELECT dept_id, AVG(salary) AS avg_salary
FROM users
GROUP BY dept_id
)
SELECT u.name
FROM users u
INNER JOIN dept_avg d ON u.dept_id = d.dept_id
WHERE u.salary > d.avg_salary;
9.3 查看执行计划 #
sql
-- 使用EXPLAIN分析子查询
EXPLAIN SELECT u.name
FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.user_id
);
十、子查询最佳实践 #
10.1 选择合适的类型 #
text
子查询选择建议:
├── 单值比较: 标量子查询
├── 存在性检查: EXISTS
├── 值列表匹配: IN
├── 需要外部引用: LATERAL
└── 复杂逻辑: WITH子句
10.2 性能建议 #
text
性能优化建议:
├── 避免深层嵌套
├── 使用WITH提高可读性
├── 相关子查询考虑JOIN替代
├── 创建合适的索引
└── 使用EXPLAIN分析
10.3 可读性建议 #
text
可读性建议:
├── 使用WITH子句分解复杂查询
├── 添加有意义的别名
├── 格式化SQL语句
├── 添加注释说明
└── 避免过度嵌套
十一、总结 #
子查询类型对比:
| 类型 | 返回值 | 使用场景 |
|---|---|---|
| 标量子查询 | 单个值 | 比较、计算 |
| 表子查询 | 多行多列 | FROM子句 |
| 相关子查询 | 依赖外部 | 行级条件 |
| EXISTS | 布尔值 | 存在性检查 |
| IN | 值列表 | 值匹配 |
| LATERAL | 多行 | 引用外部表 |
最佳实践:
text
1. 选择合适的子查询类型
└── 根据需求选择
2. 使用WITH子句
└── 提高可读性
3. 优化相关子查询
└── 考虑JOIN替代
4. 处理NULL值
└── 使用NOT EXISTS替代NOT IN
5. 分析执行计划
└── 使用EXPLAIN
下一步,让我们学习高级查询!
最后更新:2026-03-27