多表连接查询 #
一、JOIN基础 #
1.1 连接类型概览 #
text
JOIN 类型
├── INNER JOIN (内连接)
│ └── 只返回匹配的行
│
├── OUTER JOIN (外连接)
│ ├── LEFT JOIN (左外连接)
│ ├── RIGHT JOIN (右外连接)
│ └── FULL JOIN (全外连接)
│
├── CROSS JOIN (交叉连接)
│ └── 笛卡尔积
│
└── NATURAL JOIN (自然连接)
└── 自动匹配同名列
1.2 示例表 #
sql
-- 创建示例表
CREATE TABLE departments (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(100) NOT NULL,
location VARCHAR(100)
);
CREATE TABLE employees (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(100) NOT NULL,
department_id BIGINT,
salary DECIMAL(10, 2),
manager_id BIGINT
);
CREATE TABLE projects (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(100) NOT NULL,
department_id BIGINT
);
INSERT INTO departments (name, location) VALUES
('Engineering', 'Building A'),
('Sales', 'Building B'),
('Marketing', 'Building C'),
('HR', 'Building D');
INSERT INTO employees (name, department_id, salary, manager_id) VALUES
('Alice', 1, 90000, NULL),
('Bob', 1, 80000, 1),
('Carol', 1, 85000, 1),
('David', 2, 75000, NULL),
('Eve', 2, 70000, 4),
('Frank', NULL, 60000, NULL);
INSERT INTO projects (name, department_id) VALUES
('Project Alpha', 1),
('Project Beta', 1),
('Project Gamma', 2),
('Project Delta', NULL);
二、INNER JOIN #
2.1 基本内连接 #
sql
-- 内连接: 只返回两表都匹配的行
SELECT
e.name AS employee_name,
d.name AS department_name
FROM employees e
INNER JOIN departments d ON e.department_id = d.id;
-- 等价写法
SELECT
e.name AS employee_name,
d.name AS department_name
FROM employees e, departments d
WHERE e.department_id = d.id;
2.2 多表内连接 #
sql
-- 三表连接
SELECT
e.name AS employee,
d.name AS department,
p.name AS project
FROM employees e
INNER JOIN departments d ON e.department_id = d.id
INNER JOIN projects p ON d.id = p.department_id;
-- 多条件连接
SELECT
e.name,
d.name AS department
FROM employees e
INNER JOIN departments d ON e.department_id = d.id AND d.location = 'Building A';
2.3 自连接 #
sql
-- 自连接: 员工和经理关系
SELECT
e.name AS employee,
m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;
-- 只查看有经理的员工
SELECT
e.name AS employee,
m.name AS manager
FROM employees e
INNER JOIN employees m ON e.manager_id = m.id;
三、LEFT JOIN #
3.1 左外连接 #
sql
-- 左连接: 返回左表所有行,右表无匹配则为NULL
SELECT
e.name AS employee,
d.name AS department
FROM employees e
LEFT JOIN departments d ON e.department_id = d.id;
-- 结果包含 Frank (department_id 为 NULL)
3.2 只查询左表独有数据 #
sql
-- 查询没有部门的员工
SELECT e.name
FROM employees e
LEFT JOIN departments d ON e.department_id = d.id
WHERE d.id IS NULL;
-- 等价于 NOT IN
SELECT name FROM employees
WHERE department_id NOT IN (SELECT id FROM departments)
OR department_id IS NULL;
3.3 多表左连接 #
sql
-- 多表左连接
SELECT
e.name AS employee,
d.name AS department,
p.name AS project
FROM employees e
LEFT JOIN departments d ON e.department_id = d.id
LEFT JOIN projects p ON d.id = p.department_id;
四、RIGHT JOIN #
4.1 右外连接 #
sql
-- 右连接: 返回右表所有行,左表无匹配则为NULL
SELECT
e.name AS employee,
d.name AS department
FROM employees e
RIGHT JOIN departments d ON e.department_id = d.id;
-- 结果包含 HR 部门 (没有员工)
4.2 转换为LEFT JOIN #
sql
-- 右连接可以转换为左连接
SELECT
e.name AS employee,
d.name AS department
FROM departments d
LEFT JOIN employees e ON e.department_id = d.id;
五、CROSS JOIN #
5.1 交叉连接 #
sql
-- 交叉连接: 笛卡尔积
SELECT
e.name AS employee,
d.name AS department
FROM employees e
CROSS JOIN departments d;
-- 等价于
SELECT
e.name AS employee,
d.name AS department
FROM employees e, departments d;
-- 如果 employees 有 6 行,departments 有 4 行
-- 结果有 6 * 4 = 24 行
5.2 使用场景 #
sql
-- 生成所有组合
-- 例如: 所有员工和所有月份的组合
CREATE TABLE months (
month INT,
month_name VARCHAR(20)
);
INSERT INTO months VALUES
(1, 'January'), (2, 'February'), (3, 'March');
SELECT
e.name,
m.month_name
FROM employees e
CROSS JOIN months m
ORDER BY e.name, m.month;
六、JOIN优化 #
6.1 Join方式 #
text
TiDB Join 实现方式
┌─────────────────────────────────────────────────────────────┐
│ │
│ Hash Join: │
│ ├── 构建小表的 Hash 表 │
│ ├── 探测大表 │
│ ├── 适合: 大表 Join 大表 │
│ └── 内存消耗较大 │
│ │
│ Merge Join: │
│ ├── 两表按 Join Key 排序 │
│ ├── 顺序扫描匹配 │
│ ├── 适合: 已排序数据 │
│ └── 内存消耗小 │
│ │
│ Index Join: │
│ ├── 遍历外表 │
│ ├── 通过索引查找内表 │
│ ├── 适合: 外表小、内表有索引 │
│ └── 随机 IO 较多 │
│ │
└─────────────────────────────────────────────────────────────┘
6.2 使用Hints #
sql
-- 强制使用 Hash Join
SELECT /*+ HASH_JOIN(e, d) */
e.name, d.name
FROM employees e
JOIN departments d ON e.department_id = d.id;
-- 强制使用 Merge Join
SELECT /*+ MERGE_JOIN(e, d) */
e.name, d.name
FROM employees e
JOIN departments d ON e.department_id = d.id;
-- 强制使用 Index Join
SELECT /*+ INL_JOIN(d) */
e.name, d.name
FROM employees e
JOIN departments d ON e.department_id = d.id;
-- 指定 Join 顺序
SELECT /*+ LEADING(d, e) */
e.name, d.name
FROM employees e
JOIN departments d ON e.department_id = d.id;
6.3 Join优化建议 #
text
Join 优化建议
┌─────────────────────────────────────────────────────────────┐
│ │
│ 1. 小表驱动大表 │
│ ├── 让小表作为驱动表 │
│ └── 减少循环次数 │
│ │
│ 2. 在连接列上创建索引 │
│ ├── Join 条件列建索引 │
│ └── 提高 Index Join 效率 │
│ │
│ 3. 只选择需要的列 │
│ ├── 避免 SELECT * │
│ └── 减少数据传输 │
│ │
│ 4. 过滤条件尽早应用 │
│ ├── WHERE 条件下推 │
│ └── 减少参与 Join 的数据量 │
│ │
│ 5. 使用合适的 Join 类型 │
│ ├── 明确需求选择 Join 类型 │
│ └── 避免 OUTER JOIN 滥用 │
│ │
└─────────────────────────────────────────────────────────────┘
七、执行计划分析 #
7.1 查看Join执行计划 #
sql
-- 查看执行计划
EXPLAIN SELECT
e.name, d.name
FROM employees e
JOIN departments d ON e.department_id = d.id;
-- 详细执行计划
EXPLAIN ANALYZE SELECT
e.name, d.name
FROM employees e
JOIN departments d ON e.department_id = d.id;
7.2 分析Join性能 #
sql
-- 查看执行时间和资源消耗
EXPLAIN ANALYZE SELECT
e.name, d.name
FROM employees e
JOIN departments d ON e.department_id = d.id;
-- 关注指标:
-- - execution info: 执行时间
-- - operator info: Join 类型
-- - actRows: 实际行数
-- - memory: 内存使用
八、常见问题 #
8.1 笛卡尔积 #
sql
-- 问题: 忘记写 Join 条件
SELECT e.name, d.name
FROM employees e, departments d;
-- 产生笛卡尔积,数据量爆炸!
-- 解决: 始终写 Join 条件
SELECT e.name, d.name
FROM employees e
JOIN departments d ON e.department_id = d.id;
8.2 NULL值处理 #
sql
-- 问题: Join 条件有 NULL
SELECT e.name, d.name
FROM employees e
LEFT JOIN departments d ON e.department_id = d.id
WHERE d.name = 'Engineering';
-- 会过滤掉 department_id 为 NULL 的行
-- 解决: 使用 IS NULL 检查
SELECT e.name, d.name
FROM employees e
LEFT JOIN departments d ON e.department_id = d.id
WHERE d.name = 'Engineering' OR d.id IS NULL;
8.3 重复数据 #
sql
-- 问题: 一对多关系导致重复
SELECT e.name, p.name
FROM employees e
JOIN projects p ON e.department_id = p.department_id;
-- 一个部门多个项目,员工会重复
-- 解决: 使用 DISTINCT 或 GROUP BY
SELECT DISTINCT e.name
FROM employees e
JOIN projects p ON e.department_id = p.department_id;
九、总结 #
JOIN 语句要点:
| 类型 | 说明 |
|---|---|
| INNER JOIN | 只返回匹配行 |
| LEFT JOIN | 左表全部+右表匹配 |
| RIGHT JOIN | 右表全部+左表匹配 |
| CROSS JOIN | 笛卡尔积 |
| 自连接 | 同一表的不同行关联 |
下一步,让我们学习子查询!
最后更新:2026-03-27