MariaDB窗口函数 #
一、窗口函数概述 #
1.1 什么是窗口函数 #
text
窗口函数概念
├── 对一组行(窗口)进行计算
├── 为每行返回一个值
├── 不会减少结果行数
└── 类似于聚合函数但保留原行
窗口函数语法
├── 函数名() OVER (...)
│ ├── PARTITION BY: 分区
│ ├── ORDER BY: 排序
│ └── frame_clause: 窗口范围
1.2 基本语法 #
sql
function_name([arguments]) OVER (
[PARTITION BY partition_expression]
[ORDER BY sort_expression]
[frame_clause]
)
1.3 示例表 #
sql
CREATE TABLE sales (
id INT PRIMARY KEY,
salesperson VARCHAR(50),
region VARCHAR(50),
amount DECIMAL(10,2),
sale_date DATE
);
INSERT INTO sales VALUES
(1, 'John', 'North', 1000.00, '2024-01-15'),
(2, 'John', 'North', 1500.00, '2024-01-16'),
(3, 'Jane', 'South', 2000.00, '2024-01-15'),
(4, 'Jane', 'South', 1800.00, '2024-01-16'),
(5, 'Bob', 'North', 1200.00, '2024-01-15'),
(6, 'Bob', 'North', 900.00, '2024-01-16'),
(7, 'Alice', 'South', 2500.00, '2024-01-15'),
(8, 'Alice', 'South', 2200.00, '2024-01-16');
二、排名函数 #
2.1 ROW_NUMBER #
sql
-- 为每行分配唯一序号
SELECT
salesperson,
amount,
ROW_NUMBER() OVER (ORDER BY amount DESC) AS row_num
FROM sales;
+-------------+---------+---------+
| salesperson | amount | row_num |
+-------------+---------+---------+
| Alice | 2500.00 | 1 |
| Alice | 2200.00 | 2 |
| Jane | 2000.00 | 3 |
| Jane | 1800.00 | 4 |
| John | 1500.00 | 5 |
| Bob | 1200.00 | 6 |
| John | 1000.00 | 7 |
| Bob | 900.00 | 8 |
+-------------+---------+---------+
2.2 RANK #
sql
-- 排名,相同值相同排名,跳过后续名次
SELECT
salesperson,
amount,
RANK() OVER (ORDER BY amount DESC) AS rank
FROM sales;
-- 如果有并列:
-- 1, 2, 2, 4, 5...(跳过3)
2.3 DENSE_RANK #
sql
-- 排名,相同值相同排名,不跳过后续名次
SELECT
salesperson,
amount,
DENSE_RANK() OVER (ORDER BY amount DESC) AS dense_rank
FROM sales;
-- 如果有并列:
-- 1, 2, 2, 3, 4...(不跳过)
2.4 分区排名 #
sql
-- 按区域排名
SELECT
salesperson,
region,
amount,
ROW_NUMBER() OVER (PARTITION BY region ORDER BY amount DESC) AS region_rank
FROM sales;
+-------------+--------+---------+-------------+
| salesperson | region | amount | region_rank |
+-------------+--------+---------+-------------+
| Alice | South | 2500.00 | 1 |
| Alice | South | 2200.00 | 2 |
| Jane | South | 2000.00 | 3 |
| Jane | South | 1800.00 | 4 |
| John | North | 1500.00 | 1 |
| Bob | North | 1200.00 | 2 |
| John | North | 1000.00 | 3 |
| Bob | North | 900.00 | 4 |
+-------------+--------+---------+-------------+
2.5 NTILE #
sql
-- 将数据分成N组
SELECT
salesperson,
amount,
NTILE(4) OVER (ORDER BY amount DESC) AS quartile
FROM sales;
-- 将数据分成4组,每组2条
三、聚合窗口函数 #
3.1 SUM #
sql
-- 累计求和
SELECT
sale_date,
amount,
SUM(amount) OVER (ORDER BY sale_date) AS running_total
FROM sales;
-- 分区累计求和
SELECT
salesperson,
sale_date,
amount,
SUM(amount) OVER (PARTITION BY salesperson ORDER BY sale_date) AS running_total
FROM sales;
3.2 AVG #
sql
-- 移动平均
SELECT
sale_date,
amount,
AVG(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS moving_avg
FROM sales;
3.3 COUNT #
sql
-- 累计计数
SELECT
sale_date,
amount,
COUNT(*) OVER (ORDER BY sale_date) AS running_count
FROM sales;
-- 分区计数
SELECT
salesperson,
sale_date,
COUNT(*) OVER (PARTITION BY salesperson) AS sales_count
FROM sales;
3.4 MAX/MIN #
sql
-- 累计最大值
SELECT
sale_date,
amount,
MAX(amount) OVER (ORDER BY sale_date) AS running_max
FROM sales;
-- 分区最大值
SELECT
salesperson,
amount,
MAX(amount) OVER (PARTITION BY salesperson) AS max_amount
FROM sales;
四、偏移函数 #
4.1 LAG #
sql
-- 获取前一行的值
SELECT
sale_date,
amount,
LAG(amount) OVER (ORDER BY sale_date) AS prev_amount,
amount - LAG(amount) OVER (ORDER BY sale_date) AS diff
FROM sales;
-- 指定偏移量和默认值
SELECT
sale_date,
amount,
LAG(amount, 2, 0) OVER (ORDER BY sale_date) AS prev_2_amount
FROM sales;
4.2 LEAD #
sql
-- 获取后一行的值
SELECT
sale_date,
amount,
LEAD(amount) OVER (ORDER BY sale_date) AS next_amount
FROM sales;
-- 指定偏移量
SELECT
sale_date,
amount,
LEAD(amount, 2) OVER (ORDER BY sale_date) AS next_2_amount
FROM sales;
4.3 分区偏移 #
sql
-- 按销售员分区
SELECT
salesperson,
sale_date,
amount,
LAG(amount) OVER (PARTITION BY salesperson ORDER BY sale_date) AS prev_amount
FROM sales;
五、首尾函数 #
5.1 FIRST_VALUE #
sql
-- 获取分区第一个值
SELECT
salesperson,
sale_date,
amount,
FIRST_VALUE(amount) OVER (PARTITION BY salesperson ORDER BY sale_date) AS first_amount
FROM sales;
5.2 LAST_VALUE #
sql
-- 获取分区最后一个值
SELECT
salesperson,
sale_date,
amount,
LAST_VALUE(amount) OVER (
PARTITION BY salesperson
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_amount
FROM sales;
5.3 NTH_VALUE #
sql
-- 获取第N个值
SELECT
salesperson,
sale_date,
amount,
NTH_VALUE(amount, 2) OVER (PARTITION BY salesperson ORDER BY sale_date) AS second_amount
FROM sales;
六、窗口范围 #
6.1 ROWS范围 #
sql
-- ROWS BETWEEN start AND end
-- UNBOUNDED PRECEDING: 窗口开始
-- UNBOUNDED FOLLOWING: 窗口结束
-- CURRENT ROW: 当前行
-- N PRECEDING: 前N行
-- N FOLLOWING: 后N行
-- 从开始到当前行
SUM(amount) OVER (ORDER BY sale_date ROWS UNBOUNDED PRECEDING)
-- 前2行到当前行
AVG(amount) OVER (ORDER BY sale_date ROWS 2 PRECEDING)
-- 前2行到后2行
AVG(amount) OVER (ORDER BY sale_date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
-- 当前行到结束
SUM(amount) OVER (ORDER BY sale_date ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
6.2 RANGE范围 #
sql
-- RANGE基于值的范围
-- 适合日期等连续值
-- 当天和前一天
SUM(amount) OVER (ORDER BY sale_date RANGE BETWEEN INTERVAL 1 DAY PRECEDING AND CURRENT ROW)
七、命名窗口 #
7.1 定义命名窗口 #
sql
-- 使用WINDOW子句定义窗口
SELECT
salesperson,
amount,
ROW_NUMBER() OVER w AS row_num,
SUM(amount) OVER w AS total,
AVG(amount) OVER w AS avg_amount
FROM sales
WINDOW w AS (PARTITION BY salesperson ORDER BY sale_date);
7.2 多个窗口 #
sql
-- 定义多个窗口
SELECT
salesperson,
region,
amount,
ROW_NUMBER() OVER w1 AS region_rank,
ROW_NUMBER() OVER w2 AS global_rank
FROM sales
WINDOW
w1 AS (PARTITION BY region ORDER BY amount DESC),
w2 AS (ORDER BY amount DESC);
八、实用示例 #
8.1 每组前N条 #
sql
-- 获取每个销售员的前2笔最高销售额
WITH ranked AS (
SELECT
salesperson,
amount,
ROW_NUMBER() OVER (PARTITION BY salesperson ORDER BY amount DESC) AS rn
FROM sales
)
SELECT salesperson, amount
FROM ranked
WHERE rn <= 2;
8.2 同比环比 #
sql
-- 月度销售环比
SELECT
DATE_FORMAT(sale_date, '%Y-%m') AS month,
SUM(amount) AS total,
LAG(SUM(amount)) OVER (ORDER BY DATE_FORMAT(sale_date, '%Y-%m')) AS prev_month,
(SUM(amount) - LAG(SUM(amount)) OVER (ORDER BY DATE_FORMAT(sale_date, '%Y-%m'))) / LAG(SUM(amount)) OVER (ORDER BY DATE_FORMAT(sale_date, '%Y-%m')) * 100 AS growth_rate
FROM sales
GROUP BY DATE_FORMAT(sale_date, '%Y-%m');
8.3 百分比计算 #
sql
-- 计算每个销售占总销售额的百分比
SELECT
salesperson,
amount,
amount / SUM(amount) OVER () * 100 AS percentage
FROM sales;
-- 计算累计百分比
SELECT
salesperson,
amount,
SUM(amount) OVER (ORDER BY amount DESC) / SUM(amount) OVER () * 100 AS cumulative_pct
FROM sales;
8.4 去重保留最新 #
sql
-- 保留每个销售员最新的记录
WITH ranked AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY salesperson ORDER BY sale_date DESC) AS rn
FROM sales
)
SELECT * FROM ranked WHERE rn = 1;
九、性能优化 #
9.1 索引优化 #
sql
-- 为窗口函数创建索引
CREATE INDEX idx_sales_date ON sales(sale_date);
CREATE INDEX idx_sales_person_date ON sales(salesperson, sale_date);
9.2 避免重复计算 #
sql
-- 使用命名窗口避免重复计算
SELECT
salesperson,
amount,
ROW_NUMBER() OVER w AS rn,
RANK() OVER w AS rnk,
DENSE_RANK() OVER w AS drnk
FROM sales
WINDOW w AS (ORDER BY amount DESC);
十、总结 #
窗口函数分类:
| 类别 | 函数 |
|---|---|
| 排名 | ROW_NUMBER, RANK, DENSE_RANK, NTILE |
| 聚合 | SUM, AVG, COUNT, MAX, MIN |
| 偏移 | LAG, LEAD |
| 首尾 | FIRST_VALUE, LAST_VALUE, NTH_VALUE |
最佳实践:
- 使用PARTITION BY分区
- 使用ORDER BY排序
- 使用命名窗口简化代码
- 创建适当的索引
- 注意窗口范围设置
下一步,让我们学习CTE递归查询!
最后更新:2026-03-27