Spanner多表连接 #

一、JOIN概述 #

1.1 连接类型 #

text
Spanner支持的JOIN类型
├── INNER JOIN
│   └── 只返回匹配的行
│
├── LEFT [OUTER] JOIN
│   └── 返回左表所有行,右表无匹配则为NULL
│
├── RIGHT [OUTER] JOIN
│   └── 返回右表所有行,左表无匹配则为NULL
│
├── FULL [OUTER] JOIN
│   └── 返回两表所有行,无匹配则为NULL
│
└── CROSS JOIN
    └── 笛卡尔积

1.2 示例表结构 #

sql
-- 用户表
CREATE TABLE users (
    user_id INT64 NOT NULL,
    name STRING(100) NOT NULL,
    email STRING(255),
    dept_id INT64
) PRIMARY KEY (user_id);

-- 部门表
CREATE TABLE departments (
    dept_id INT64 NOT NULL,
    dept_name STRING(100) NOT NULL
) PRIMARY KEY (dept_id);

-- 订单表(交错表)
CREATE TABLE orders (
    user_id INT64 NOT NULL,
    order_id INT64 NOT NULL,
    product_name STRING(200),
    amount FLOAT64,
    order_date DATE
) PRIMARY KEY (user_id, order_id)
  INTERLEAVE IN PARENT users ON DELETE CASCADE;

二、INNER JOIN #

2.1 基本语法 #

sql
SELECT columns
FROM table1
INNER JOIN table2 ON table1.column = table2.column;

2.2 内连接示例 #

sql
-- 基本内连接
SELECT u.user_id, u.name, d.dept_name
FROM users u
INNER JOIN departments d ON u.dept_id = d.dept_id;

-- 使用WHERE替代ON
SELECT u.user_id, u.name, d.dept_name
FROM users u, departments d
WHERE u.dept_id = d.dept_id;

-- 多表连接
SELECT u.user_id, u.name, d.dept_name, o.order_id
FROM users u
INNER JOIN departments d ON u.dept_id = d.dept_id
INNER JOIN orders o ON u.user_id = o.user_id;

2.3 连接条件 #

sql
-- 单条件连接
SELECT * FROM users u
INNER JOIN departments d ON u.dept_id = d.dept_id;

-- 多条件连接
SELECT * FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
    AND o.amount > 100;

-- 使用表达式连接
SELECT * FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE o.order_date >= DATE '2024-01-01';

三、LEFT JOIN #

3.1 基本语法 #

sql
SELECT columns
FROM table1
LEFT JOIN table2 ON table1.column = table2.column;

3.2 左连接示例 #

sql
-- 左连接: 返回所有用户,即使没有部门
SELECT u.user_id, u.name, d.dept_name
FROM users u
LEFT JOIN departments d ON u.dept_id = d.dept_id;

-- 查找没有部门的用户
SELECT u.user_id, u.name
FROM users u
LEFT JOIN departments d ON u.dept_id = d.dept_id
WHERE d.dept_id IS NULL;

-- 多表左连接
SELECT u.user_id, u.name, d.dept_name, o.order_id
FROM users u
LEFT JOIN departments d ON u.dept_id = d.dept_id
LEFT JOIN orders o ON u.user_id = o.user_id;

3.3 LEFT JOIN vs INNER JOIN #

sql
-- INNER JOIN: 只返回有匹配的行
SELECT u.name, o.order_id
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id;
-- 结果: 只有下过订单的用户

-- LEFT JOIN: 返回所有左表行
SELECT u.name, o.order_id
FROM users u
LEFT JOIN orders o ON u.user_id = o.user_id;
-- 结果: 所有用户,没订单的order_id为NULL

四、RIGHT JOIN和FULL JOIN #

4.1 RIGHT JOIN #

sql
-- 右连接: 返回所有部门,即使没有用户
SELECT u.user_id, u.name, d.dept_name
FROM users u
RIGHT JOIN departments d ON u.dept_id = d.dept_id;

-- 等价于LEFT JOIN交换表顺序
SELECT u.user_id, u.name, d.dept_name
FROM departments d
LEFT JOIN users u ON u.dept_id = d.dept_id;

4.2 FULL JOIN #

sql
-- 全连接: 返回所有用户和所有部门
SELECT u.user_id, u.name, d.dept_name
FROM users u
FULL JOIN departments d ON u.dept_id = d.dept_id;

-- 查找没有匹配的行
SELECT u.user_id, u.name, d.dept_name
FROM users u
FULL JOIN departments d ON u.dept_id = d.dept_id
WHERE u.user_id IS NULL OR d.dept_id IS NULL;

五、CROSS JOIN #

5.1 笛卡尔积 #

sql
-- 交叉连接: 每个用户与每个部门组合
SELECT u.name, d.dept_name
FROM users u
CROSS JOIN departments d;

-- 等价于
SELECT u.name, d.dept_name
FROM users u, departments d;

5.2 使用场景 #

sql
-- 生成所有组合
-- 例如: 每个用户每个月的报告模板
SELECT u.user_id, m.month
FROM users u
CROSS JOIN (
    SELECT 1 AS month UNION ALL
    SELECT 2 UNION ALL
    SELECT 3 UNION ALL
    SELECT 4 UNION ALL
    SELECT 5 UNION ALL
    SELECT 6
) m;

六、交错表连接 #

6.1 交错表优势 #

text
交错表连接优势:
├── 数据物理存储在一起
├── 减少网络传输
├── 提高JOIN性能
└── 自动级联删除

6.2 交错表查询 #

sql
-- 交错表JOIN(性能最优)
SELECT u.name, o.order_id, o.product_name, o.amount
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.user_id = 1;

-- 使用交错表前缀扫描
SELECT u.name, o.order_id, o.product_name
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.user_id = 1 AND o.order_id BETWEEN 100 AND 200;

6.3 交错表JOIN优化 #

sql
-- 推荐: 使用主键范围查询
SELECT u.name, o.order_id, o.product_name
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.user_id = 1;

-- 不推荐: 跨用户查询(失去交错优势)
SELECT u.name, o.order_id, o.product_name
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE o.amount > 1000;

七、自连接 #

7.1 同表连接 #

sql
-- 员工与经理关系
CREATE TABLE employees (
    emp_id INT64 NOT NULL,
    name STRING(100) NOT NULL,
    manager_id INT64
) PRIMARY KEY (emp_id);

-- 查询员工及其经理
SELECT e.name AS employee, m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.emp_id;

-- 查询同一经理下的员工
SELECT e1.name, e2.name, e1.manager_id
FROM employees e1
INNER JOIN employees e2 ON e1.manager_id = e2.manager_id
WHERE e1.emp_id < e2.emp_id;

7.2 层级查询 #

sql
-- 查询所有下属(需要多次JOIN或递归)
SELECT e1.name AS level1, e2.name AS level2, e3.name AS level3
FROM employees e1
LEFT JOIN employees e2 ON e2.manager_id = e1.emp_id
LEFT JOIN employees e3 ON e3.manager_id = e2.emp_id
WHERE e1.manager_id IS NULL;  -- 顶级经理

八、JOIN与子查询 #

8.1 连接子查询 #

sql
-- 连接子查询结果
SELECT u.name, o.total_amount
FROM users u
INNER JOIN (
    SELECT user_id, SUM(amount) AS total_amount
    FROM orders
    GROUP BY user_id
) o ON u.user_id = o.user_id;

-- 使用WITH子句提高可读性
WITH user_orders AS (
    SELECT user_id, SUM(amount) AS total_amount
    FROM orders
    GROUP BY user_id
)
SELECT u.name, o.total_amount
FROM users u
INNER JOIN user_orders o ON u.user_id = o.user_id;

8.2 LATERAL连接 #

sql
-- LATERAL允许子查询引用外部表
SELECT u.name, o.order_id
FROM users u,
LATERAL (
    SELECT order_id 
    FROM orders 
    WHERE user_id = u.user_id 
    ORDER BY order_date DESC 
    LIMIT 3
) o;

九、JOIN性能优化 #

9.1 使用索引 #

sql
-- 创建连接列索引
CREATE INDEX idx_users_dept ON users(dept_id);
CREATE INDEX idx_orders_user ON orders(user_id);

-- 使用索引的JOIN
SELECT u.name, o.order_id
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.dept_id = 1;

9.2 交错表优化 #

sql
-- 将频繁JOIN的表设计为交错表
CREATE TABLE orders (
    user_id INT64 NOT NULL,
    order_id INT64 NOT NULL,
    ...
) PRIMARY KEY (user_id, order_id)
  INTERLEAVE IN PARENT users ON DELETE CASCADE;

-- 查询时利用交错优势
SELECT u.name, o.order_id
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.user_id = 1;  -- 主键前缀查询

9.3 JOIN顺序 #

sql
-- 小表驱动大表
SELECT *
FROM small_table s
INNER JOIN large_table l ON s.id = l.id;

-- 使用WHERE尽早过滤
SELECT u.name, o.order_id
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.status = 'active'  -- 先过滤用户
  AND o.order_date >= DATE '2024-01-01';  -- 再过滤订单

9.4 查询提示 #

sql
-- 强制JOIN顺序
SELECT u.name, o.order_id
FROM users u@{FORCE_JOIN_ORDER=TRUE}
INNER JOIN orders o ON u.user_id = o.user_id;

-- 强制使用索引
SELECT u.name, o.order_id
FROM users u@{FORCE_INDEX=idx_users_status}
INNER JOIN orders o ON u.user_id = o.user_id;

十、JOIN执行计划 #

10.1 查看执行计划 #

sql
EXPLAIN SELECT u.name, o.order_id
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE u.user_id = 1;

10.2 执行计划类型 #

text
JOIN执行计划类型:
├── Hash Join
│   └── 构建哈希表进行连接
│
├── Cross Apply
│   └── 嵌套循环连接
│
├── Distributed Cross Apply
│   └── 分布式嵌套循环
│
└── Merge Join
    └── 有序数据合并连接

十一、JOIN最佳实践 #

11.1 设计建议 #

text
JOIN设计建议:
├── 频繁JOIN的表使用交错表
├── 连接列创建索引
├── 合理设计主键
└── 控制JOIN表数量

11.2 查询建议 #

text
JOIN查询建议:
├── 使用WHERE尽早过滤
├── 只选择需要的列
├── 避免SELECT *
├── 使用EXPLAIN分析
└── 合理使用查询提示

11.3 性能建议 #

text
JOIN性能建议:
├── 小表驱动大表
├── 使用交错表优化
├── 创建合适的索引
├── 避免笛卡尔积
└── 分解复杂查询

十二、总结 #

JOIN类型对比:

类型 说明 使用场景
INNER JOIN 只返回匹配行 需要两表都有数据
LEFT JOIN 返回左表所有行 左表为主
RIGHT JOIN 返回右表所有行 右表为主
FULL JOIN 返回两表所有行 需要所有数据
CROSS JOIN 笛卡尔积 生成组合

最佳实践:

text
1. 使用交错表
   └── 频繁JOIN的父子表

2. 创建索引
   └── 连接列上创建索引

3. 优化查询
   └── 使用WHERE过滤

4. 分析执行计划
   └── 使用EXPLAIN

5. 控制JOIN复杂度
   └── 避免过多表JOIN

下一步,让我们学习子查询!

最后更新:2026-03-27