MariaDB窗口函数 #

一、窗口函数概述 #

1.1 什么是窗口函数 #

text
窗口函数概念
├── 对一组行(窗口)进行计算
├── 为每行返回一个值
├── 不会减少结果行数
└── 类似于聚合函数但保留原行

窗口函数语法
├── 函数名() OVER (...)
│   ├── PARTITION BY: 分区
│   ├── ORDER BY: 排序
│   └── frame_clause: 窗口范围

1.2 基本语法 #

sql
function_name([arguments]) OVER (
    [PARTITION BY partition_expression]
    [ORDER BY sort_expression]
    [frame_clause]
)

1.3 示例表 #

sql
CREATE TABLE sales (
    id INT PRIMARY KEY,
    salesperson VARCHAR(50),
    region VARCHAR(50),
    amount DECIMAL(10,2),
    sale_date DATE
);

INSERT INTO sales VALUES
    (1, 'John', 'North', 1000.00, '2024-01-15'),
    (2, 'John', 'North', 1500.00, '2024-01-16'),
    (3, 'Jane', 'South', 2000.00, '2024-01-15'),
    (4, 'Jane', 'South', 1800.00, '2024-01-16'),
    (5, 'Bob', 'North', 1200.00, '2024-01-15'),
    (6, 'Bob', 'North', 900.00, '2024-01-16'),
    (7, 'Alice', 'South', 2500.00, '2024-01-15'),
    (8, 'Alice', 'South', 2200.00, '2024-01-16');

二、排名函数 #

2.1 ROW_NUMBER #

sql
-- 为每行分配唯一序号
SELECT 
    salesperson,
    amount,
    ROW_NUMBER() OVER (ORDER BY amount DESC) AS row_num
FROM sales;

+-------------+---------+---------+
| salesperson | amount  | row_num |
+-------------+---------+---------+
| Alice       | 2500.00 |       1 |
| Alice       | 2200.00 |       2 |
| Jane        | 2000.00 |       3 |
| Jane        | 1800.00 |       4 |
| John        | 1500.00 |       5 |
| Bob         | 1200.00 |       6 |
| John        | 1000.00 |       7 |
| Bob         |  900.00 |       8 |
+-------------+---------+---------+

2.2 RANK #

sql
-- 排名,相同值相同排名,跳过后续名次
SELECT 
    salesperson,
    amount,
    RANK() OVER (ORDER BY amount DESC) AS rank
FROM sales;

-- 如果有并列:
-- 1, 2, 2, 4, 5...(跳过3)

2.3 DENSE_RANK #

sql
-- 排名,相同值相同排名,不跳过后续名次
SELECT 
    salesperson,
    amount,
    DENSE_RANK() OVER (ORDER BY amount DESC) AS dense_rank
FROM sales;

-- 如果有并列:
-- 1, 2, 2, 3, 4...(不跳过)

2.4 分区排名 #

sql
-- 按区域排名
SELECT 
    salesperson,
    region,
    amount,
    ROW_NUMBER() OVER (PARTITION BY region ORDER BY amount DESC) AS region_rank
FROM sales;

+-------------+--------+---------+-------------+
| salesperson | region | amount  | region_rank |
+-------------+--------+---------+-------------+
| Alice       | South  | 2500.00 |           1 |
| Alice       | South  | 2200.00 |           2 |
| Jane        | South  | 2000.00 |           3 |
| Jane        | South  | 1800.00 |           4 |
| John        | North  | 1500.00 |           1 |
| Bob         | North  | 1200.00 |           2 |
| John        | North  | 1000.00 |           3 |
| Bob         | North  |  900.00 |           4 |
+-------------+--------+---------+-------------+

2.5 NTILE #

sql
-- 将数据分成N组
SELECT 
    salesperson,
    amount,
    NTILE(4) OVER (ORDER BY amount DESC) AS quartile
FROM sales;

-- 将数据分成4组,每组2条

三、聚合窗口函数 #

3.1 SUM #

sql
-- 累计求和
SELECT 
    sale_date,
    amount,
    SUM(amount) OVER (ORDER BY sale_date) AS running_total
FROM sales;

-- 分区累计求和
SELECT 
    salesperson,
    sale_date,
    amount,
    SUM(amount) OVER (PARTITION BY salesperson ORDER BY sale_date) AS running_total
FROM sales;

3.2 AVG #

sql
-- 移动平均
SELECT 
    sale_date,
    amount,
    AVG(amount) OVER (
        ORDER BY sale_date
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ) AS moving_avg
FROM sales;

3.3 COUNT #

sql
-- 累计计数
SELECT 
    sale_date,
    amount,
    COUNT(*) OVER (ORDER BY sale_date) AS running_count
FROM sales;

-- 分区计数
SELECT 
    salesperson,
    sale_date,
    COUNT(*) OVER (PARTITION BY salesperson) AS sales_count
FROM sales;

3.4 MAX/MIN #

sql
-- 累计最大值
SELECT 
    sale_date,
    amount,
    MAX(amount) OVER (ORDER BY sale_date) AS running_max
FROM sales;

-- 分区最大值
SELECT 
    salesperson,
    amount,
    MAX(amount) OVER (PARTITION BY salesperson) AS max_amount
FROM sales;

四、偏移函数 #

4.1 LAG #

sql
-- 获取前一行的值
SELECT 
    sale_date,
    amount,
    LAG(amount) OVER (ORDER BY sale_date) AS prev_amount,
    amount - LAG(amount) OVER (ORDER BY sale_date) AS diff
FROM sales;

-- 指定偏移量和默认值
SELECT 
    sale_date,
    amount,
    LAG(amount, 2, 0) OVER (ORDER BY sale_date) AS prev_2_amount
FROM sales;

4.2 LEAD #

sql
-- 获取后一行的值
SELECT 
    sale_date,
    amount,
    LEAD(amount) OVER (ORDER BY sale_date) AS next_amount
FROM sales;

-- 指定偏移量
SELECT 
    sale_date,
    amount,
    LEAD(amount, 2) OVER (ORDER BY sale_date) AS next_2_amount
FROM sales;

4.3 分区偏移 #

sql
-- 按销售员分区
SELECT 
    salesperson,
    sale_date,
    amount,
    LAG(amount) OVER (PARTITION BY salesperson ORDER BY sale_date) AS prev_amount
FROM sales;

五、首尾函数 #

5.1 FIRST_VALUE #

sql
-- 获取分区第一个值
SELECT 
    salesperson,
    sale_date,
    amount,
    FIRST_VALUE(amount) OVER (PARTITION BY salesperson ORDER BY sale_date) AS first_amount
FROM sales;

5.2 LAST_VALUE #

sql
-- 获取分区最后一个值
SELECT 
    salesperson,
    sale_date,
    amount,
    LAST_VALUE(amount) OVER (
        PARTITION BY salesperson 
        ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS last_amount
FROM sales;

5.3 NTH_VALUE #

sql
-- 获取第N个值
SELECT 
    salesperson,
    sale_date,
    amount,
    NTH_VALUE(amount, 2) OVER (PARTITION BY salesperson ORDER BY sale_date) AS second_amount
FROM sales;

六、窗口范围 #

6.1 ROWS范围 #

sql
-- ROWS BETWEEN start AND end
-- UNBOUNDED PRECEDING: 窗口开始
-- UNBOUNDED FOLLOWING: 窗口结束
-- CURRENT ROW: 当前行
-- N PRECEDING: 前N行
-- N FOLLOWING: 后N行

-- 从开始到当前行
SUM(amount) OVER (ORDER BY sale_date ROWS UNBOUNDED PRECEDING)

-- 前2行到当前行
AVG(amount) OVER (ORDER BY sale_date ROWS 2 PRECEDING)

-- 前2行到后2行
AVG(amount) OVER (ORDER BY sale_date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)

-- 当前行到结束
SUM(amount) OVER (ORDER BY sale_date ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)

6.2 RANGE范围 #

sql
-- RANGE基于值的范围
-- 适合日期等连续值

-- 当天和前一天
SUM(amount) OVER (ORDER BY sale_date RANGE BETWEEN INTERVAL 1 DAY PRECEDING AND CURRENT ROW)

七、命名窗口 #

7.1 定义命名窗口 #

sql
-- 使用WINDOW子句定义窗口
SELECT 
    salesperson,
    amount,
    ROW_NUMBER() OVER w AS row_num,
    SUM(amount) OVER w AS total,
    AVG(amount) OVER w AS avg_amount
FROM sales
WINDOW w AS (PARTITION BY salesperson ORDER BY sale_date);

7.2 多个窗口 #

sql
-- 定义多个窗口
SELECT 
    salesperson,
    region,
    amount,
    ROW_NUMBER() OVER w1 AS region_rank,
    ROW_NUMBER() OVER w2 AS global_rank
FROM sales
WINDOW 
    w1 AS (PARTITION BY region ORDER BY amount DESC),
    w2 AS (ORDER BY amount DESC);

八、实用示例 #

8.1 每组前N条 #

sql
-- 获取每个销售员的前2笔最高销售额
WITH ranked AS (
    SELECT 
        salesperson,
        amount,
        ROW_NUMBER() OVER (PARTITION BY salesperson ORDER BY amount DESC) AS rn
    FROM sales
)
SELECT salesperson, amount
FROM ranked
WHERE rn <= 2;

8.2 同比环比 #

sql
-- 月度销售环比
SELECT 
    DATE_FORMAT(sale_date, '%Y-%m') AS month,
    SUM(amount) AS total,
    LAG(SUM(amount)) OVER (ORDER BY DATE_FORMAT(sale_date, '%Y-%m')) AS prev_month,
    (SUM(amount) - LAG(SUM(amount)) OVER (ORDER BY DATE_FORMAT(sale_date, '%Y-%m'))) / LAG(SUM(amount)) OVER (ORDER BY DATE_FORMAT(sale_date, '%Y-%m')) * 100 AS growth_rate
FROM sales
GROUP BY DATE_FORMAT(sale_date, '%Y-%m');

8.3 百分比计算 #

sql
-- 计算每个销售占总销售额的百分比
SELECT 
    salesperson,
    amount,
    amount / SUM(amount) OVER () * 100 AS percentage
FROM sales;

-- 计算累计百分比
SELECT 
    salesperson,
    amount,
    SUM(amount) OVER (ORDER BY amount DESC) / SUM(amount) OVER () * 100 AS cumulative_pct
FROM sales;

8.4 去重保留最新 #

sql
-- 保留每个销售员最新的记录
WITH ranked AS (
    SELECT *,
        ROW_NUMBER() OVER (PARTITION BY salesperson ORDER BY sale_date DESC) AS rn
    FROM sales
)
SELECT * FROM ranked WHERE rn = 1;

九、性能优化 #

9.1 索引优化 #

sql
-- 为窗口函数创建索引
CREATE INDEX idx_sales_date ON sales(sale_date);
CREATE INDEX idx_sales_person_date ON sales(salesperson, sale_date);

9.2 避免重复计算 #

sql
-- 使用命名窗口避免重复计算
SELECT 
    salesperson,
    amount,
    ROW_NUMBER() OVER w AS rn,
    RANK() OVER w AS rnk,
    DENSE_RANK() OVER w AS drnk
FROM sales
WINDOW w AS (ORDER BY amount DESC);

十、总结 #

窗口函数分类:

类别 函数
排名 ROW_NUMBER, RANK, DENSE_RANK, NTILE
聚合 SUM, AVG, COUNT, MAX, MIN
偏移 LAG, LEAD
首尾 FIRST_VALUE, LAST_VALUE, NTH_VALUE

最佳实践:

  1. 使用PARTITION BY分区
  2. 使用ORDER BY排序
  3. 使用命名窗口简化代码
  4. 创建适当的索引
  5. 注意窗口范围设置

下一步,让我们学习CTE递归查询!

最后更新:2026-03-27