Spanner多表连接 #
一、JOIN概述 #
1.1 连接类型 #
text
Spanner支持的JOIN类型
├── INNER JOIN
│ └── 只返回匹配的行
│
├── LEFT [OUTER] JOIN
│ └── 返回左表所有行,右表无匹配则为NULL
│
├── RIGHT [OUTER] JOIN
│ └── 返回右表所有行,左表无匹配则为NULL
│
├── FULL [OUTER] JOIN
│ └── 返回两表所有行,无匹配则为NULL
│
└── CROSS JOIN
└── 笛卡尔积
1.2 示例表结构 #
sql
-- 用户表
CREATE TABLE users (
user_id INT64 NOT NULL,
name STRING(100) NOT NULL,
email STRING(255),
dept_id INT64
) PRIMARY KEY (user_id);
-- 部门表
CREATE TABLE departments (
dept_id INT64 NOT NULL,
dept_name STRING(100) NOT NULL
) PRIMARY KEY (dept_id);
-- 订单表(交错表)
CREATE TABLE orders (
user_id INT64 NOT NULL,
order_id INT64 NOT NULL,
product_name STRING(200),
amount FLOAT64,
order_date DATE
) PRIMARY KEY (user_id, order_id)
INTERLEAVE IN PARENT users ON DELETE CASCADE;
二、INNER JOIN #
2.1 基本语法 #
sql
SELECT columns
FROM table1
INNER JOIN table2 ON table1.column = table2.column;
2.2 内连接示例 #
sql
-- 基本内连接
SELECT u.user_id, u.name, d.dept_name
FROM users u
INNER JOIN departments d ON u.dept_id = d.dept_id;
-- 使用WHERE替代ON
SELECT u.user_id, u.name, d.dept_name
FROM users u, departments d
WHERE u.dept_id = d.dept_id;
-- 多表连接
SELECT u.user_id, u.name, d.dept_name, o.order_id
FROM users u
INNER JOIN departments d ON u.dept_id = d.dept_id
INNER JOIN orders o ON u.user_id = o.user_id;
2.3 连接条件 #
sql
-- 单条件连接
SELECT * FROM users u
INNER JOIN departments d ON u.dept_id = d.dept_id;
-- 多条件连接
SELECT * FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
AND o.amount > 100;
-- 使用表达式连接
SELECT * FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE o.order_date >= DATE '2024-01-01';
三、LEFT JOIN #
3.1 基本语法 #
sql
SELECT columns
FROM table1
LEFT JOIN table2 ON table1.column = table2.column;
3.2 左连接示例 #
sql
-- 左连接: 返回所有用户,即使没有部门
SELECT u.user_id, u.name, d.dept_name
FROM users u
LEFT JOIN departments d ON u.dept_id = d.dept_id;
-- 查找没有部门的用户
SELECT u.user_id, u.name
FROM users u
LEFT JOIN departments d ON u.dept_id = d.dept_id
WHERE d.dept_id IS NULL;
-- 多表左连接
SELECT u.user_id, u.name, d.dept_name, o.order_id
FROM users u
LEFT JOIN departments d ON u.dept_id = d.dept_id
LEFT JOIN orders o ON u.user_id = o.user_id;
3.3 LEFT JOIN vs INNER JOIN #
sql
-- INNER JOIN: 只返回有匹配的行
SELECT u.name, o.order_id
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id;
-- 结果: 只有下过订单的用户
-- LEFT JOIN: 返回所有左表行
SELECT u.name, o.order_id
FROM users u
LEFT JOIN orders o ON u.user_id = o.user_id;
-- 结果: 所有用户,没订单的order_id为NULL
四、RIGHT JOIN和FULL JOIN #
4.1 RIGHT JOIN #
sql
-- 右连接: 返回所有部门,即使没有用户
SELECT u.user_id, u.name, d.dept_name
FROM users u
RIGHT JOIN departments d ON u.dept_id = d.dept_id;
-- 等价于LEFT JOIN交换表顺序
SELECT u.user_id, u.name, d.dept_name
FROM departments d
LEFT JOIN users u ON u.dept_id = d.dept_id;
4.2 FULL JOIN #
sql
-- 全连接: 返回所有用户和所有部门
SELECT u.user_id, u.name, d.dept_name
FROM users u
FULL JOIN departments d ON u.dept_id = d.dept_id;
-- 查找没有匹配的行
SELECT u.user_id, u.name, d.dept_name
FROM users u
FULL JOIN departments d ON u.dept_id = d.dept_id
WHERE u.user_id IS NULL OR d.dept_id IS NULL;
五、CROSS JOIN #
5.1 笛卡尔积 #
sql
-- 交叉连接: 每个用户与每个部门组合
SELECT u.name, d.dept_name
FROM users u
CROSS JOIN departments d;
-- 等价于
SELECT u.name, d.dept_name
FROM users u, departments d;
5.2 使用场景 #
sql
-- 生成所有组合
-- 例如: 每个用户每个月的报告模板
SELECT u.user_id, m.month
FROM users u
CROSS JOIN (
SELECT 1 AS month UNION ALL
SELECT 2 UNION ALL
SELECT 3 UNION ALL
SELECT 4 UNION ALL
SELECT 5 UNION ALL
SELECT 6
) m;
六、交错表连接 #
6.1 交错表优势 #
text
交错表连接优势:
├── 数据物理存储在一起
├── 减少网络传输
├── 提高JOIN性能
└── 自动级联删除
6.2 交错表查询 #
sql
-- 交错表JOIN(性能最优)
SELECT u.name, o.order_id, o.product_name, o.amount
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.user_id = 1;
-- 使用交错表前缀扫描
SELECT u.name, o.order_id, o.product_name
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.user_id = 1 AND o.order_id BETWEEN 100 AND 200;
6.3 交错表JOIN优化 #
sql
-- 推荐: 使用主键范围查询
SELECT u.name, o.order_id, o.product_name
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.user_id = 1;
-- 不推荐: 跨用户查询(失去交错优势)
SELECT u.name, o.order_id, o.product_name
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE o.amount > 1000;
七、自连接 #
7.1 同表连接 #
sql
-- 员工与经理关系
CREATE TABLE employees (
emp_id INT64 NOT NULL,
name STRING(100) NOT NULL,
manager_id INT64
) PRIMARY KEY (emp_id);
-- 查询员工及其经理
SELECT e.name AS employee, m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.emp_id;
-- 查询同一经理下的员工
SELECT e1.name, e2.name, e1.manager_id
FROM employees e1
INNER JOIN employees e2 ON e1.manager_id = e2.manager_id
WHERE e1.emp_id < e2.emp_id;
7.2 层级查询 #
sql
-- 查询所有下属(需要多次JOIN或递归)
SELECT e1.name AS level1, e2.name AS level2, e3.name AS level3
FROM employees e1
LEFT JOIN employees e2 ON e2.manager_id = e1.emp_id
LEFT JOIN employees e3 ON e3.manager_id = e2.emp_id
WHERE e1.manager_id IS NULL; -- 顶级经理
八、JOIN与子查询 #
8.1 连接子查询 #
sql
-- 连接子查询结果
SELECT u.name, o.total_amount
FROM users u
INNER JOIN (
SELECT user_id, SUM(amount) AS total_amount
FROM orders
GROUP BY user_id
) o ON u.user_id = o.user_id;
-- 使用WITH子句提高可读性
WITH user_orders AS (
SELECT user_id, SUM(amount) AS total_amount
FROM orders
GROUP BY user_id
)
SELECT u.name, o.total_amount
FROM users u
INNER JOIN user_orders o ON u.user_id = o.user_id;
8.2 LATERAL连接 #
sql
-- LATERAL允许子查询引用外部表
SELECT u.name, o.order_id
FROM users u,
LATERAL (
SELECT order_id
FROM orders
WHERE user_id = u.user_id
ORDER BY order_date DESC
LIMIT 3
) o;
九、JOIN性能优化 #
9.1 使用索引 #
sql
-- 创建连接列索引
CREATE INDEX idx_users_dept ON users(dept_id);
CREATE INDEX idx_orders_user ON orders(user_id);
-- 使用索引的JOIN
SELECT u.name, o.order_id
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.dept_id = 1;
9.2 交错表优化 #
sql
-- 将频繁JOIN的表设计为交错表
CREATE TABLE orders (
user_id INT64 NOT NULL,
order_id INT64 NOT NULL,
...
) PRIMARY KEY (user_id, order_id)
INTERLEAVE IN PARENT users ON DELETE CASCADE;
-- 查询时利用交错优势
SELECT u.name, o.order_id
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.user_id = 1; -- 主键前缀查询
9.3 JOIN顺序 #
sql
-- 小表驱动大表
SELECT *
FROM small_table s
INNER JOIN large_table l ON s.id = l.id;
-- 使用WHERE尽早过滤
SELECT u.name, o.order_id
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.status = 'active' -- 先过滤用户
AND o.order_date >= DATE '2024-01-01'; -- 再过滤订单
9.4 查询提示 #
sql
-- 强制JOIN顺序
SELECT u.name, o.order_id
FROM users u@{FORCE_JOIN_ORDER=TRUE}
INNER JOIN orders o ON u.user_id = o.user_id;
-- 强制使用索引
SELECT u.name, o.order_id
FROM users u@{FORCE_INDEX=idx_users_status}
INNER JOIN orders o ON u.user_id = o.user_id;
十、JOIN执行计划 #
10.1 查看执行计划 #
sql
EXPLAIN SELECT u.name, o.order_id
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.user_id = 1;
10.2 执行计划类型 #
text
JOIN执行计划类型:
├── Hash Join
│ └── 构建哈希表进行连接
│
├── Cross Apply
│ └── 嵌套循环连接
│
├── Distributed Cross Apply
│ └── 分布式嵌套循环
│
└── Merge Join
└── 有序数据合并连接
十一、JOIN最佳实践 #
11.1 设计建议 #
text
JOIN设计建议:
├── 频繁JOIN的表使用交错表
├── 连接列创建索引
├── 合理设计主键
└── 控制JOIN表数量
11.2 查询建议 #
text
JOIN查询建议:
├── 使用WHERE尽早过滤
├── 只选择需要的列
├── 避免SELECT *
├── 使用EXPLAIN分析
└── 合理使用查询提示
11.3 性能建议 #
text
JOIN性能建议:
├── 小表驱动大表
├── 使用交错表优化
├── 创建合适的索引
├── 避免笛卡尔积
└── 分解复杂查询
十二、总结 #
JOIN类型对比:
| 类型 | 说明 | 使用场景 |
|---|---|---|
| INNER JOIN | 只返回匹配行 | 需要两表都有数据 |
| LEFT JOIN | 返回左表所有行 | 左表为主 |
| RIGHT JOIN | 返回右表所有行 | 右表为主 |
| FULL JOIN | 返回两表所有行 | 需要所有数据 |
| CROSS JOIN | 笛卡尔积 | 生成组合 |
最佳实践:
text
1. 使用交错表
└── 频繁JOIN的父子表
2. 创建索引
└── 连接列上创建索引
3. 优化查询
└── 使用WHERE过滤
4. 分析执行计划
└── 使用EXPLAIN
5. 控制JOIN复杂度
└── 避免过多表JOIN
下一步,让我们学习子查询!
最后更新:2026-03-27