Oracle分析函数 #
一、分析函数概述 #
1.1 什么是分析函数 #
分析函数(Analytic Functions)是Oracle提供的用于复杂数据分析的函数。与聚合函数不同,分析函数不会减少结果集的行数,而是为每行计算一个值。
text
分析函数特点
├── 不减少行数
├── 可以访问其他行的数据
├── 支持分区和排序
└── 支持窗口定义
1.2 基本语法 #
sql
-- 分析函数基本语法
function_name(arguments) OVER (
[PARTITION BY partition_expression]
[ORDER BY sort_expression [ASC|DESC] [NULLS FIRST|LAST]]
[windowing_clause]
)
1.3 示例表结构 #
sql
-- 创建示例表
CREATE TABLE sales (
sale_id NUMBER,
employee_id NUMBER,
sale_date DATE,
amount NUMBER,
department_id NUMBER
);
-- 插入测试数据
INSERT INTO sales VALUES (1, 1, TO_DATE('2024-01-01', 'YYYY-MM-DD'), 1000, 10);
INSERT INTO sales VALUES (2, 1, TO_DATE('2024-01-02', 'YYYY-MM-DD'), 1500, 10);
INSERT INTO sales VALUES (3, 2, TO_DATE('2024-01-01', 'YYYY-MM-DD'), 2000, 10);
INSERT INTO sales VALUES (4, 2, TO_DATE('2024-01-02', 'YYYY-MM-DD'), 2500, 10);
INSERT INTO sales VALUES (5, 3, TO_DATE('2024-01-01', 'YYYY-MM-DD'), 3000, 20);
INSERT INTO sales VALUES (6, 3, TO_DATE('2024-01-02', 'YYYY-MM-DD'), 3500, 20);
INSERT INTO sales VALUES (7, 4, TO_DATE('2024-01-01', 'YYYY-MM-DD'), 4000, 20);
INSERT INTO sales VALUES (8, 4, TO_DATE('2024-01-02', 'YYYY-MM-DD'), 4500, 20);
二、排名函数 #
2.1 ROW_NUMBER #
sql
-- ROW_NUMBER:为每行分配唯一序号
SELECT
employee_id,
sale_date,
amount,
ROW_NUMBER() OVER (ORDER BY amount DESC) AS row_num
FROM sales;
-- 结果
EMPLOYEE_ID SALE_DATE AMOUNT ROW_NUM
----------- ----------- ------- -------
4 2024-01-02 4500 1
4 2024-01-01 4000 2
3 2024-01-02 3500 3
3 2024-01-01 3000 4
2 2024-01-02 2500 5
2 2024-01-01 2000 6
1 2024-01-02 1500 7
1 2024-01-01 1000 8
-- 分区排名
SELECT
employee_id,
department_id,
amount,
ROW_NUMBER() OVER (PARTITION BY department_id ORDER BY amount DESC) AS dept_rank
FROM sales;
-- 获取每个部门前2名
SELECT * FROM (
SELECT
employee_id,
department_id,
amount,
ROW_NUMBER() OVER (PARTITION BY department_id ORDER BY amount DESC) AS dept_rank
FROM sales
)
WHERE dept_rank <= 2;
2.2 RANK和DENSE_RANK #
sql
-- RANK:排名,相同值相同排名,有跳跃
-- DENSE_RANK:排名,相同值相同排名,无跳跃
SELECT
employee_id,
amount,
RANK() OVER (ORDER BY amount DESC) AS rank_num,
DENSE_RANK() OVER (ORDER BY amount DESC) AS dense_rank_num,
ROW_NUMBER() OVER (ORDER BY amount DESC) AS row_num
FROM sales;
-- 假设有重复金额
-- AMOUNT RANK DENSE_RANK ROW_NUMBER
-- 4500 1 1 1
-- 4000 2 2 2
-- 3500 3 3 3
-- 3500 3 3 4
-- 3000 5 4 5 -- RANK跳过4
-- 获取薪资排名前3的员工(包含并列)
SELECT * FROM (
SELECT
employee_id,
first_name,
salary,
DENSE_RANK() OVER (ORDER BY salary DESC) AS salary_rank
FROM employees
)
WHERE salary_rank <= 3;
2.3 NTILE #
sql
-- NTILE(n):将数据分成n组
SELECT
employee_id,
amount,
NTILE(4) OVER (ORDER BY amount DESC) AS quartile
FROM sales;
-- 结果:将8条记录分成4组,每组2条
-- AMOUNT QUARTILE
-- 4500 1
-- 4000 1
-- 3500 2
-- 3000 2
-- 2500 3
-- 2000 3
-- 1500 4
-- 1000 4
2.4 PERCENT_RANK和CUME_DIST #
sql
-- PERCENT_RANK:百分比排名(0到1)
-- CUME_DIST:累积分布(小于等于当前值的行数/总行数)
SELECT
employee_id,
amount,
PERCENT_RANK() OVER (ORDER BY amount) AS pct_rank,
CUME_DIST() OVER (ORDER BY amount) AS cume_dist
FROM sales;
-- 结果
-- AMOUNT PCT_RANK CUME_DIST
-- 1000 0.00 0.125
-- 1500 0.14 0.250
-- 2000 0.28 0.375
-- 2500 0.42 0.500
-- 3000 0.57 0.625
-- 3500 0.71 0.750
-- 4000 0.85 0.875
-- 4500 1.00 1.000
三、聚合分析函数 #
3.1 基本聚合 #
sql
-- SUM、AVG、COUNT、MAX、MIN作为分析函数
SELECT
employee_id,
sale_date,
amount,
SUM(amount) OVER () AS total_amount,
AVG(amount) OVER () AS avg_amount,
COUNT(*) OVER () AS total_count,
MAX(amount) OVER () AS max_amount,
MIN(amount) OVER () AS min_amount
FROM sales;
-- 分区聚合
SELECT
employee_id,
department_id,
amount,
SUM(amount) OVER (PARTITION BY department_id) AS dept_total,
AVG(amount) OVER (PARTITION BY department_id) AS dept_avg
FROM sales;
3.2 累积聚合 #
sql
-- 累积求和
SELECT
employee_id,
sale_date,
amount,
SUM(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_total
FROM sales;
-- 简写形式
SELECT
employee_id,
sale_date,
amount,
SUM(amount) OVER (ORDER BY sale_date) AS running_total
FROM sales;
-- 分区累积求和
SELECT
employee_id,
department_id,
sale_date,
amount,
SUM(amount) OVER (
PARTITION BY department_id
ORDER BY sale_date
) AS dept_running_total
FROM sales;
3.3 移动聚合 #
sql
-- 移动平均(3天)
SELECT
employee_id,
sale_date,
amount,
AVG(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS moving_avg_3
FROM sales;
-- 移动求和(前后各1行)
SELECT
employee_id,
sale_date,
amount,
SUM(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
) AS moving_sum_3
FROM sales;
-- 窗口子句说明
-- UNBOUNDED PRECEDING:分区的第一行
-- UNBOUNDED FOLLOWING:分区的最后一行
-- n PRECEDING:当前行之前的n行
-- n FOLLOWING:当前行之后的n行
-- CURRENT ROW:当前行
四、窗口函数 #
4.1 ROWS窗口 #
sql
-- ROWS:基于行的窗口
SELECT
employee_id,
sale_date,
amount,
SUM(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_total,
AVG(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING
) AS moving_avg
FROM sales;
4.2 RANGE窗口 #
sql
-- RANGE:基于值的窗口
SELECT
employee_id,
sale_date,
amount,
SUM(amount) OVER (
ORDER BY amount
RANGE BETWEEN 500 PRECEDING AND 500 FOLLOWING
) AS range_sum
FROM sales;
-- RANGE与ROWS区别
-- ROWS:基于物理行位置
-- RANGE:基于逻辑值范围
4.3 窗口默认值 #
sql
-- 默认窗口
-- 有ORDER BY时:RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-- 无ORDER BY时:RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-- 等价写法
SELECT
employee_id,
amount,
SUM(amount) OVER (ORDER BY amount) AS sum1,
SUM(amount) OVER (
ORDER BY amount
RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS sum2
FROM sales;
五、偏移函数 #
5.1 LAG和LEAD #
sql
-- LAG:访问前n行的值
-- LEAD:访问后n行的值
SELECT
employee_id,
sale_date,
amount,
LAG(amount) OVER (ORDER BY sale_date) AS prev_amount,
LEAD(amount) OVER (ORDER BY sale_date) AS next_amount,
LAG(amount, 2, 0) OVER (ORDER BY sale_date) AS prev_2_amount
FROM sales;
-- 计算环比增长
SELECT
employee_id,
sale_date,
amount,
LAG(amount) OVER (ORDER BY sale_date) AS prev_amount,
amount - LAG(amount) OVER (ORDER BY sale_date) AS diff,
ROUND((amount - LAG(amount) OVER (ORDER BY sale_date)) /
LAG(amount) OVER (ORDER BY sale_date) * 100, 2) AS growth_pct
FROM sales;
-- 分区偏移
SELECT
employee_id,
department_id,
sale_date,
amount,
LAG(amount) OVER (PARTITION BY department_id ORDER BY sale_date) AS prev_dept_amount
FROM sales;
5.2 FIRST_VALUE和LAST_VALUE #
sql
-- FIRST_VALUE:窗口第一行的值
-- LAST_VALUE:窗口最后一行的值
SELECT
employee_id,
sale_date,
amount,
FIRST_VALUE(amount) OVER (ORDER BY sale_date) AS first_amount,
LAST_VALUE(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_amount
FROM sales;
-- 注意:LAST_VALUE默认窗口是到当前行
-- 需要指定完整窗口才能获取真正的最后一个值
-- 分区首尾值
SELECT
employee_id,
department_id,
sale_date,
amount,
FIRST_VALUE(amount) OVER (
PARTITION BY department_id
ORDER BY sale_date
) AS dept_first_amount,
LAST_VALUE(amount) OVER (
PARTITION BY department_id
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS dept_last_amount
FROM sales;
5.3 NTH_VALUE #
sql
-- NTH_VALUE:获取窗口中第n行的值
SELECT
employee_id,
sale_date,
amount,
NTH_VALUE(amount, 2) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS second_amount
FROM sales;
-- 从最后开始计数
SELECT
employee_id,
sale_date,
amount,
NTH_VALUE(amount, 2) FROM LAST OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS second_last_amount
FROM sales;
六、比率函数 #
6.1 RATIO_TO_REPORT #
sql
-- RATIO_TO_REPORT:计算比率
SELECT
employee_id,
amount,
RATIO_TO_REPORT(amount) OVER () AS ratio,
ROUND(RATIO_TO_REPORT(amount) OVER () * 100, 2) AS pct
FROM sales;
-- 分区比率
SELECT
employee_id,
department_id,
amount,
RATIO_TO_REPORT(amount) OVER (PARTITION BY department_id) AS dept_ratio
FROM sales;
6.2 PERCENTILE_CONT和PERCENTILE_DISC #
sql
-- PERCENTILE_CONT:连续百分位数
-- PERCENTILE_DISC:离散百分位数
SELECT
department_id,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY amount) AS median,
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY amount) AS q1,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY amount) AS q3
FROM sales
GROUP BY department_id;
-- 作为分析函数使用
SELECT DISTINCT
department_id,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY amount)
OVER (PARTITION BY department_id) AS median
FROM sales;
七、LISTAGG分析函数 #
7.1 基本用法 #
sql
-- LISTAGG:字符串聚合
SELECT
department_id,
LISTAGG(employee_id, ',') WITHIN GROUP (ORDER BY employee_id) AS employees
FROM sales
GROUP BY department_id;
-- 作为分析函数
SELECT
employee_id,
department_id,
amount,
LISTAGG(employee_id, ',') WITHIN GROUP (ORDER BY employee_id)
OVER (PARTITION BY department_id) AS dept_employees
FROM sales;
7.2 处理超长字符串 #
sql
-- 12c R2+:处理超长字符串
SELECT
department_id,
LISTAGG(employee_id, ',' ON OVERFLOW TRUNCATE '...' WITH COUNT)
WITHIN GROUP (ORDER BY employee_id) AS employees
FROM sales
GROUP BY department_id;
八、复杂应用示例 #
8.1 分页查询 #
sql
-- 使用ROW_NUMBER分页
SELECT * FROM (
SELECT
e.*,
ROW_NUMBER() OVER (ORDER BY employee_id) AS rn
FROM employees e
)
WHERE rn BETWEEN 6 AND 10;
-- 使用FETCH(12c+)
SELECT * FROM employees
ORDER BY employee_id
OFFSET 5 ROWS FETCH NEXT 5 ROWS ONLY;
8.2 去重 #
sql
-- 使用ROW_NUMBER去重
SELECT * FROM (
SELECT
t.*,
ROW_NUMBER() OVER (PARTITION BY key_column ORDER BY update_date DESC) AS rn
FROM table_name t
)
WHERE rn = 1;
8.3 同比环比分析 #
sql
-- 同比环比分析
WITH monthly_sales AS (
SELECT
TO_CHAR(sale_date, 'YYYY-MM') AS month,
SUM(amount) AS total_amount
FROM sales
GROUP BY TO_CHAR(sale_date, 'YYYY-MM')
)
SELECT
month,
total_amount,
LAG(total_amount, 1) OVER (ORDER BY month) AS prev_month,
LAG(total_amount, 12) OVER (ORDER BY month) AS prev_year_month,
ROUND((total_amount - LAG(total_amount, 1) OVER (ORDER BY month)) /
LAG(total_amount, 1) OVER (ORDER BY month) * 100, 2) AS mom_growth,
ROUND((total_amount - LAG(total_amount, 12) OVER (ORDER BY month)) /
LAG(total_amount, 12) OVER (ORDER BY month) * 100, 2) AS yoy_growth
FROM monthly_sales;
8.4 连续问题 #
sql
-- 查找连续登录的用户
WITH login_data AS (
SELECT
user_id,
login_date,
login_date - ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) AS grp
FROM user_logins
)
SELECT
user_id,
MIN(login_date) AS start_date,
MAX(login_date) AS end_date,
COUNT(*) AS consecutive_days
FROM login_data
GROUP BY user_id, grp
HAVING COUNT(*) >= 3
ORDER BY user_id, start_date;
九、性能优化 #
9.1 执行计划 #
sql
-- 查看分析函数执行计划
EXPLAIN PLAN FOR
SELECT
employee_id,
amount,
SUM(amount) OVER (PARTITION BY department_id ORDER BY sale_date) AS running_total
FROM sales;
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);
9.2 优化建议 #
sql
-- 1. 减少分区数量
-- 分区越多,内存消耗越大
-- 2. 使用适当的窗口大小
-- 避免不必要的UNBOUNDED FOLLOWING
-- 3. 合并相似的分析函数
-- 多个相同窗口的分析函数可以合并计算
SELECT
employee_id,
amount,
SUM(amount) OVER (PARTITION BY department_id) AS dept_sum,
AVG(amount) OVER (PARTITION BY department_id) AS dept_avg,
COUNT(*) OVER (PARTITION BY department_id) AS dept_count
FROM sales;
-- 4. 使用索引
-- 在分区和排序列上创建索引
CREATE INDEX idx_sales_dept_date ON sales(department_id, sale_date);
十、总结 #
分析函数分类:
| 类别 | 函数 | 用途 |
|---|---|---|
| 排名 | ROW_NUMBER, RANK, DENSE_RANK | 排名计算 |
| 聚合 | SUM, AVG, COUNT, MAX, MIN | 聚合计算 |
| 偏移 | LAG, LEAD, FIRST_VALUE, LAST_VALUE | 访问其他行 |
| 比率 | RATIO_TO_REPORT, PERCENTILE | 比率计算 |
| 分布 | NTILE, PERCENT_RANK, CUME_DIST | 分布计算 |
最佳实践:
- 使用ROW_NUMBER进行分页
- 使用LAG/LEAD计算环比
- 注意窗口定义对结果的影响
- 合理使用分区减少计算量
- 在分区和排序列上创建索引
下一步,让我们学习PL/SQL编程!
最后更新:2026-03-27