PostgreSQL 高级查询 #
高级查询概述 #
PostgreSQL 提供了强大的高级查询功能,可以处理复杂的数据分析需求。
text
┌─────────────────────────────────────────────────────────────┐
│ 高级查询技术 │
├─────────────────────────────────────────────────────────────┤
│ │
│ 子查询 (Subquery) │
│ ├── 标量子查询 │
│ ├── 列子查询 │
│ ├── 行子查询 │
│ └── 表子查询 │
│ │
│ 公用表表达式 (CTE) │
│ ├── 普通 CTE │
│ └── 递归 CTE │
│ │
│ 窗口函数 (Window Functions) │
│ ├── 排名函数 │
│ ├── 聚合窗口函数 │
│ └── 分析函数 │
│ │
└─────────────────────────────────────────────────────────────┘
子查询 #
标量子查询 #
sql
-- 创建示例表
CREATE TABLE departments (
id SERIAL PRIMARY KEY,
name VARCHAR(100)
);
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
department_id INTEGER REFERENCES departments(id),
salary DECIMAL(10, 2),
hire_date DATE
);
INSERT INTO departments (name) VALUES ('Engineering'), ('Sales'), ('Marketing');
INSERT INTO employees (name, department_id, salary, hire_date) VALUES
('Alice', 1, 80000.00, '2020-01-15'),
('Bob', 2, 60000.00, '2021-03-20'),
('Charlie', 1, 90000.00, '2019-06-10'),
('Diana', 3, 70000.00, '2022-02-28'),
('Eve', 2, 65000.00, '2021-08-05'),
('Frank', 1, 85000.00, '2020-11-01');
-- 标量子查询返回单个值
SELECT
name,
salary,
(SELECT AVG(salary) FROM employees) AS avg_salary,
salary - (SELECT AVG(salary) FROM employees) AS diff_from_avg
FROM employees;
-- 在 WHERE 中使用
SELECT * FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);
列子查询 #
sql
-- 返回单列多行
SELECT * FROM employees
WHERE department_id IN (SELECT id FROM departments WHERE name IN ('Engineering', 'Sales'));
-- 使用 ANY
SELECT * FROM employees
WHERE salary > ANY (
SELECT salary FROM employees WHERE department_id = 2
);
-- 使用 ALL
SELECT * FROM employees
WHERE salary > ALL (
SELECT salary FROM employees WHERE department_id = 2
);
-- EXISTS 检查是否存在
SELECT d.name
FROM departments d
WHERE EXISTS (
SELECT 1 FROM employees e
WHERE e.department_id = d.id AND e.salary > 80000
);
-- NOT EXISTS
SELECT d.name
FROM departments d
WHERE NOT EXISTS (
SELECT 1 FROM employees e WHERE e.department_id = d.id
);
行子查询 #
sql
-- 返回单行多列
SELECT * FROM employees
WHERE (department_id, salary) = (
SELECT department_id, MAX(salary)
FROM employees
GROUP BY department_id
LIMIT 1
);
-- 比较行
SELECT * FROM employees
WHERE (department_id, salary) > (1, 80000);
表子查询(派生表) #
sql
-- 在 FROM 中使用子查询
SELECT dept_stats.department_id, dept_stats.avg_salary
FROM (
SELECT
department_id,
AVG(salary) AS avg_salary,
COUNT(*) AS emp_count
FROM employees
GROUP BY department_id
) AS dept_stats
WHERE dept_stats.emp_count > 1;
-- 使用 LATERAL(PostgreSQL 特有)
-- LATERAL 允许子查询引用外部查询的列
SELECT
e.name,
e.salary,
top_salaries.name AS higher_paid,
top_salaries.salary AS higher_salary
FROM employees e
CROSS JOIN LATERAL (
SELECT name, salary
FROM employees e2
WHERE e2.salary > e.salary
ORDER BY salary
LIMIT 2
) AS top_salaries;
公用表表达式(CTE) #
基本 CTE #
sql
-- 使用 CTE 简化复杂查询
WITH dept_stats AS (
SELECT
department_id,
COUNT(*) AS emp_count,
AVG(salary) AS avg_salary,
MAX(salary) AS max_salary
FROM employees
GROUP BY department_id
)
SELECT
d.name AS department,
ds.emp_count,
ROUND(ds.avg_salary, 2) AS avg_salary,
ds.max_salary
FROM dept_stats ds
JOIN departments d ON d.id = ds.department_id
ORDER BY ds.avg_salary DESC;
-- 多个 CTE
WITH
high_earners AS (
SELECT * FROM employees WHERE salary > 75000
),
dept_counts AS (
SELECT department_id, COUNT(*) AS count
FROM employees
GROUP BY department_id
)
SELECT
he.name,
he.salary,
dc.count AS dept_size
FROM high_earners he
JOIN dept_counts dc ON he.department_id = dc.department_id;
CTE 数据修改 #
sql
-- 使用 CTE 进行数据修改
WITH deleted_employees AS (
DELETE FROM employees
WHERE salary < 60000
RETURNING *
)
INSERT INTO employee_archive
SELECT * FROM deleted_employees;
-- 使用 CTE 更新数据
WITH salary_updates AS (
SELECT
id,
salary * 1.1 AS new_salary
FROM employees
WHERE department_id = 1
)
UPDATE employees e
SET salary = su.new_salary
FROM salary_updates su
WHERE e.id = su.id;
递归 CTE #
sql
-- 创建组织结构表
CREATE TABLE org_chart (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
manager_id INTEGER REFERENCES org_chart(id)
);
INSERT INTO org_chart (name, manager_id) VALUES
('CEO', NULL), -- id: 1
('VP Engineering', 1), -- id: 2
('VP Sales', 1), -- id: 3
('Engineer 1', 2), -- id: 4
('Engineer 2', 2), -- id: 5
('Sales Rep 1', 3), -- id: 6
('Sales Rep 2', 3); -- id: 7
-- 递归查询组织结构
WITH RECURSIVE org_tree AS (
-- 基础查询:顶级管理者
SELECT
id,
name,
manager_id,
1 AS level,
name::TEXT AS path
FROM org_chart
WHERE manager_id IS NULL
UNION ALL
-- 递归查询:下属员工
SELECT
e.id,
e.name,
e.manager_id,
ot.level + 1,
ot.path || ' -> ' || e.name
FROM org_chart e
JOIN org_tree ot ON e.manager_id = ot.id
)
SELECT
REPEAT(' ', level - 1) || name AS org_structure,
level,
path
FROM org_tree
ORDER BY path;
-- 输出:
-- org_structure | level | path
-- -----------------+-------+---------------------------
-- CEO | 1 | CEO
-- VP Engineer | 2 | CEO -> VP Engineering
-- Engineer 1 | 3 | CEO -> VP Engineering -> Engineer 1
-- Engineer 2 | 3 | CEO -> VP Engineering -> Engineer 2
-- VP Sales | 2 | CEO -> VP Sales
-- Sales Rep 1| 3 | CEO -> VP Sales -> Sales Rep 1
-- Sales Rep 2| 3 | CEO -> VP Sales -> Sales Rep 2
递归 CTE 应用场景 #
sql
-- 生成数字序列
WITH RECURSIVE numbers AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1 FROM numbers WHERE n < 10
)
SELECT * FROM numbers;
-- 生成日期序列
WITH RECURSIVE date_series AS (
SELECT DATE '2026-01-01' AS date
UNION ALL
SELECT date + INTERVAL '1 day'
FROM date_series
WHERE date < DATE '2026-01-10'
)
SELECT * FROM date_series;
-- 计算阶乘
WITH RECURSIVE factorial AS (
SELECT 1 AS n, 1 AS result
UNION ALL
SELECT n + 1, result * (n + 1)
FROM factorial
WHERE n < 10
)
SELECT n, result FROM factorial;
窗口函数 #
窗口函数概述 #
sql
-- 窗口函数语法
function_name([arguments]) OVER (
[PARTITION BY partition_expression]
[ORDER BY sort_expression [ASC|DESC] [NULLS {FIRST|LAST}]]
[frame_clause]
)
排名函数 #
sql
-- ROW_NUMBER:连续编号
SELECT
name,
department_id,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num
FROM employees;
-- RANK:相同值相同排名,跳过后续排名
SELECT
name,
department_id,
salary,
RANK() OVER (ORDER BY salary DESC) AS rank
FROM employees;
-- DENSE_RANK:相同值相同排名,不跳过后续排名
SELECT
name,
department_id,
salary,
DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank
FROM employees;
-- 对比
-- name | salary | row_num | rank | dense_rank
-- ---------+---------+---------+------+------------
-- Charlie | 90000 | 1 | 1 | 1
-- Frank | 85000 | 2 | 2 | 2
-- Alice | 80000 | 3 | 3 | 3
-- Diana | 70000 | 4 | 4 | 4
-- Eve | 65000 | 5 | 5 | 5
-- Bob | 60000 | 6 | 6 | 6
-- NTILE:分成 N 组
SELECT
name,
salary,
NTILE(3) OVER (ORDER BY salary DESC) AS salary_group
FROM employees;
分区排名 #
sql
-- 按部门分区排名
SELECT
name,
department_id,
salary,
ROW_NUMBER() OVER (PARTITION BY department_id ORDER BY salary DESC) AS dept_rank,
RANK() OVER (PARTITION BY department_id ORDER BY salary DESC) AS dept_rank_with_ties
FROM employees;
-- 每个部门薪资最高的员工
WITH ranked AS (
SELECT
name,
department_id,
salary,
ROW_NUMBER() OVER (PARTITION BY department_id ORDER BY salary DESC) AS rn
FROM employees
)
SELECT name, department_id, salary
FROM ranked
WHERE rn = 1;
聚合窗口函数 #
sql
-- 累计聚合
SELECT
name,
salary,
SUM(salary) OVER (ORDER BY salary) AS running_total,
AVG(salary) OVER (ORDER BY salary) AS running_avg,
COUNT(*) OVER (ORDER BY salary) AS running_count
FROM employees;
-- 分区聚合
SELECT
name,
department_id,
salary,
SUM(salary) OVER (PARTITION BY department_id) AS dept_total,
AVG(salary) OVER (PARTITION BY department_id) AS dept_avg,
salary - AVG(salary) OVER (PARTITION BY department_id) AS diff_from_dept_avg
FROM employees;
-- 全局聚合
SELECT
name,
salary,
AVG(salary) OVER () AS global_avg,
salary - AVG(salary) OVER () AS diff_from_global_avg
FROM employees;
帧子句(Frame Clause) #
sql
-- 帧子句定义窗口范围
-- ROWS BETWEEN start AND end
-- start/end: UNBOUNDED PRECEDING, n PRECEDING, CURRENT ROW, n FOLLOWING, UNBOUNDED FOLLOWING
-- 累计和(从开始到当前行)
SELECT
name,
salary,
SUM(salary) OVER (
ORDER BY id
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_total
FROM employees;
-- 移动平均(当前行和前后各一行)
SELECT
name,
salary,
AVG(salary) OVER (
ORDER BY id
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
) AS moving_avg
FROM employees;
-- 前 N 行累计
SELECT
name,
salary,
SUM(salary) OVER (
ORDER BY id
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS last_3_sum
FROM employees;
-- ROWS vs RANGE
-- ROWS:基于物理行
-- RANGE:基于逻辑值(相同值视为同一组)
分析函数 #
sql
-- LAG:获取前 N 行的值
SELECT
name,
salary,
LAG(salary) OVER (ORDER BY salary) AS prev_salary,
salary - LAG(salary) OVER (ORDER BY salary) AS salary_diff
FROM employees;
-- LEAD:获取后 N 行的值
SELECT
name,
salary,
LEAD(salary) OVER (ORDER BY salary) AS next_salary
FROM employees;
-- 指定偏移量和默认值
SELECT
name,
salary,
LAG(salary, 2, 0) OVER (ORDER BY salary) AS prev_2_salary
FROM employees;
-- FIRST_VALUE / LAST_VALUE
SELECT
name,
department_id,
salary,
FIRST_VALUE(salary) OVER (PARTITION BY department_id ORDER BY salary DESC) AS dept_max_salary,
LAST_VALUE(salary) OVER (
PARTITION BY department_id
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS dept_min_salary
FROM employees;
-- NTH_VALUE:获取第 N 个值
SELECT
name,
salary,
NTH_VALUE(salary, 2) OVER (ORDER BY salary) AS second_salary
FROM employees;
分组比较 #
sql
-- 每个员工与部门平均比较
SELECT
name,
department_id,
salary,
AVG(salary) OVER (PARTITION BY department_id) AS dept_avg,
CASE
WHEN salary > AVG(salary) OVER (PARTITION BY department_id) THEN 'Above Average'
WHEN salary < AVG(salary) OVER (PARTITION BY department_id) THEN 'Below Average'
ELSE 'Average'
END AS salary_status
FROM employees;
CASE 表达式 #
简单 CASE #
sql
SELECT
name,
department_id,
CASE department_id
WHEN 1 THEN 'Engineering'
WHEN 2 THEN 'Sales'
WHEN 3 THEN 'Marketing'
ELSE 'Other'
END AS department_name
FROM employees;
搜索 CASE #
sql
SELECT
name,
salary,
CASE
WHEN salary >= 80000 THEN 'High'
WHEN salary >= 60000 THEN 'Medium'
ELSE 'Low'
END AS salary_level,
CASE
WHEN salary >= 80000 THEN salary * 0.1
WHEN salary >= 60000 THEN salary * 0.08
ELSE salary * 0.05
END AS bonus
FROM employees;
CASE 在聚合中 #
sql
-- 条件计数
SELECT
department_id,
COUNT(*) AS total,
SUM(CASE WHEN salary >= 80000 THEN 1 ELSE 0 END) AS high_earners,
SUM(CASE WHEN salary < 60000 THEN 1 ELSE 0 END) AS low_earners,
AVG(CASE WHEN salary >= 60000 THEN salary END) AS avg_above_60k
FROM employees
GROUP BY department_id;
-- 行转列
SELECT
department_id,
MAX(CASE WHEN name = 'Alice' THEN salary END) AS alice_salary,
MAX(CASE WHEN name = 'Bob' THEN salary END) AS bob_salary,
MAX(CASE WHEN name = 'Charlie' THEN salary END) AS charlie_salary
FROM employees
GROUP BY department_id;
学习路径 #
text
进阶阶段
├── 高级查询(本文)
├── 多表查询
├── 索引优化
└── 事务处理
下一步 #
掌握了高级查询后,接下来学习 多表查询,了解各种 JOIN 连接操作!
最后更新:2026-03-29