高级查询 #

一、CTE (Common Table Expression) #

1.1 基本CTE #

sql
-- 基本CTE
WITH user_orders AS (
    SELECT 
        user_id,
        COUNT(*) AS order_count,
        SUM(amount) AS total_amount
    FROM orders
    GROUP BY user_id
)
SELECT 
    u.name,
    COALESCE(uo.order_count, 0) AS orders,
    COALESCE(uo.total_amount, 0) AS total
FROM users u
LEFT JOIN user_orders uo ON u.id = uo.user_id;

-- 多个CTE
WITH 
active_users AS (
    SELECT * FROM users WHERE status = 'active'
),
user_stats AS (
    SELECT 
        user_id,
        COUNT(*) AS order_count
    FROM orders
    GROUP BY user_id
)
SELECT 
    au.name,
    COALESCE(us.order_count, 0) AS orders
FROM active_users au
LEFT JOIN user_stats us ON au.id = us.user_id;

1.2 递归CTE #

sql
-- 递归CTE: 组织层级
WITH RECURSIVE employee_hierarchy AS (
    -- 基础查询: CEO
    SELECT 
        id,
        name,
        manager_id,
        1 AS level,
        ARRAY[name] AS path
    FROM employees
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- 递归查询: 下属
    SELECT 
        e.id,
        e.name,
        e.manager_id,
        eh.level + 1,
        eh.path || e.name
    FROM employees e
    JOIN employee_hierarchy eh ON e.manager_id = eh.id
)
SELECT 
    level,
    REPEAT('  ', level - 1) || name AS hierarchy,
    path
FROM employee_hierarchy
ORDER BY path;

-- 递归CTE: 日期序列
WITH RECURSIVE date_series AS (
    SELECT CURRENT_DATE - INTERVAL '6 days' AS date
    UNION ALL
    SELECT date + INTERVAL '1 day'
    FROM date_series
    WHERE date < CURRENT_DATE
)
SELECT date FROM date_series;

二、窗口函数 #

2.1 基本语法 #

sql
-- 窗口函数语法
function_name() OVER (
    PARTITION BY partition_expression
    ORDER BY order_expression
    frame_clause
)

-- 示例表
CREATE TABLE sales (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    salesperson VARCHAR(100),
    region VARCHAR(50),
    amount DECIMAL(10, 2),
    sale_date DATE
);

INSERT INTO sales (salesperson, region, amount, sale_date) VALUES
    ('Alice', 'North', 1000, '2024-01-01'),
    ('Alice', 'North', 1500, '2024-01-02'),
    ('Bob', 'South', 2000, '2024-01-01'),
    ('Bob', 'South', 1200, '2024-01-02'),
    ('Carol', 'North', 1800, '2024-01-01'),
    ('Carol', 'North', 2200, '2024-01-02');

2.2 排名函数 #

sql
-- ROW_NUMBER: 连续编号
SELECT 
    salesperson,
    amount,
    ROW_NUMBER() OVER (ORDER BY amount DESC) AS row_num
FROM sales;

-- RANK: 排名 (相同值相同排名,跳过)
SELECT 
    salesperson,
    amount,
    RANK() OVER (ORDER BY amount DESC) AS rank
FROM sales;

-- DENSE_RANK: 排名 (相同值相同排名,不跳过)
SELECT 
    salesperson,
    amount,
    DENSE_RANK() OVER (ORDER BY amount DESC) AS dense_rank
FROM sales;

-- NTILE: 分组
SELECT 
    salesperson,
    amount,
    NTILE(4) OVER (ORDER BY amount DESC) AS quartile
FROM sales;

-- 分区排名
SELECT 
    region,
    salesperson,
    amount,
    RANK() OVER (PARTITION BY region ORDER BY amount DESC) AS region_rank
FROM sales;

2.3 聚合窗口函数 #

sql
-- 累计聚合
SELECT 
    sale_date,
    salesperson,
    amount,
    SUM(amount) OVER (ORDER BY sale_date) AS running_total,
    AVG(amount) OVER (ORDER BY sale_date) AS running_avg,
    COUNT(*) OVER (ORDER BY sale_date) AS running_count
FROM sales;

-- 分区累计
SELECT 
    region,
    sale_date,
    amount,
    SUM(amount) OVER (
        PARTITION BY region 
        ORDER BY sale_date
    ) AS region_running_total
FROM sales;

-- 移动窗口
SELECT 
    sale_date,
    amount,
    AVG(amount) OVER (
        ORDER BY sale_date
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ) AS moving_avg_3
FROM sales;

-- 全局聚合
SELECT 
    salesperson,
    amount,
    SUM(amount) OVER () AS total_amount,
    amount / SUM(amount) OVER () * 100 AS pct
FROM sales;

2.4 值访问函数 #

sql
-- LAG: 获取前N行的值
SELECT 
    sale_date,
    amount,
    LAG(amount) OVER (ORDER BY sale_date) AS prev_amount,
    amount - LAG(amount) OVER (ORDER BY sale_date) AS diff
FROM sales;

-- LEAD: 获取后N行的值
SELECT 
    sale_date,
    amount,
    LEAD(amount) OVER (ORDER BY sale_date) AS next_amount
FROM sales;

-- FIRST_VALUE / LAST_VALUE
SELECT 
    salesperson,
    sale_date,
    amount,
    FIRST_VALUE(amount) OVER (PARTITION BY salesperson ORDER BY sale_date) AS first_sale,
    LAST_VALUE(amount) OVER (
        PARTITION BY salesperson 
        ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS last_sale
FROM sales;

-- NTH_VALUE
SELECT 
    sale_date,
    amount,
    NTH_VALUE(amount, 2) OVER (ORDER BY sale_date) AS second_value
FROM sales;

三、CASE表达式 #

3.1 简单CASE #

sql
-- 简单CASE表达式
SELECT 
    name,
    status,
    CASE status
        WHEN 'active' THEN 'Active User'
        WHEN 'inactive' THEN 'Inactive User'
        WHEN 'pending' THEN 'Pending User'
        ELSE 'Unknown'
    END AS status_desc
FROM users;

3.2 搜索CASE #

sql
-- 搜索CASE表达式
SELECT 
    name,
    age,
    CASE 
        WHEN age < 20 THEN 'Teenager'
        WHEN age >= 20 AND age < 30 THEN 'Twenties'
        WHEN age >= 30 AND age < 40 THEN 'Thirties'
        WHEN age >= 40 THEN 'Forty Plus'
        ELSE 'Unknown'
    END AS age_group,
    CASE 
        WHEN age < 25 THEN 'Junior'
        WHEN age >= 25 AND age < 35 THEN 'Mid-level'
        ELSE 'Senior'
    END AS career_stage
FROM users;

3.3 CASE在聚合中 #

sql
-- 条件计数
SELECT 
    COUNT(*) AS total,
    COUNT(CASE WHEN status = 'active' THEN 1 END) AS active_count,
    COUNT(CASE WHEN status = 'inactive' THEN 1 END) AS inactive_count,
    SUM(CASE WHEN status = 'active' THEN 1 ELSE 0 END) AS active_sum
FROM users;

-- 条件聚合
SELECT 
    region,
    SUM(CASE WHEN amount > 1000 THEN amount ELSE 0 END) AS high_value_sales,
    SUM(CASE WHEN amount <= 1000 THEN amount ELSE 0 END) AS low_value_sales
FROM sales
GROUP BY region;

-- 行转列
SELECT 
    salesperson,
    SUM(CASE WHEN region = 'North' THEN amount ELSE 0 END) AS north_sales,
    SUM(CASE WHEN region = 'South' THEN amount ELSE 0 END) AS south_sales,
    SUM(CASE WHEN region = 'East' THEN amount ELSE 0 END) AS east_sales,
    SUM(CASE WHEN region = 'West' THEN amount ELSE 0 END) AS west_sales
FROM sales
GROUP BY salesperson;

四、高级聚合 #

4.1 GROUPING SETS #

sql
-- GROUPING SETS: 多维聚合
SELECT 
    region,
    salesperson,
    SUM(amount) AS total
FROM sales
GROUP BY GROUPING SETS (
    (region, salesperson),
    (region),
    (salesperson),
    ()
);

-- 等价于 UNION ALL
SELECT region, salesperson, SUM(amount) FROM sales GROUP BY region, salesperson
UNION ALL
SELECT region, NULL, SUM(amount) FROM sales GROUP BY region
UNION ALL
SELECT NULL, salesperson, SUM(amount) FROM sales GROUP BY salesperson
UNION ALL
SELECT NULL, NULL, SUM(amount) FROM sales;

4.2 ROLLUP #

sql
-- ROLLUP: 层次聚合
SELECT 
    region,
    salesperson,
    SUM(amount) AS total
FROM sales
GROUP BY ROLLUP (region, salesperson);

-- 生成:
-- (region, salesperson)
-- (region)
-- ()

-- 使用 GROUPING 识别汇总行
SELECT 
    region,
    salesperson,
    SUM(amount) AS total,
    GROUPING(region) AS is_region_total,
    GROUPING(salesperson) AS is_salesperson_total
FROM sales
GROUP BY ROLLUP (region, salesperson);

4.3 CUBE #

sql
-- CUBE: 所有组合聚合
SELECT 
    region,
    salesperson,
    SUM(amount) AS total
FROM sales
GROUP BY CUBE (region, salesperson);

-- 生成:
-- (region, salesperson)
-- (region)
-- (salesperson)
-- ()

4.4 FILTER子句 #

sql
-- FILTER: 条件聚合
SELECT 
    COUNT(*) AS total,
    COUNT(*) FILTER (WHERE status = 'active') AS active_count,
    COUNT(*) FILTER (WHERE status = 'inactive') AS inactive_count,
    AVG(age) FILTER (WHERE status = 'active') AS avg_active_age
FROM users;

-- 等价于 CASE
SELECT 
    COUNT(*) AS total,
    COUNT(CASE WHEN status = 'active' THEN 1 END) AS active_count
FROM users;

五、集合操作 #

5.1 UNION #

sql
-- UNION: 合并去重
SELECT name FROM users
UNION
SELECT name FROM customers;

-- UNION ALL: 合并不去重
SELECT name FROM users
UNION ALL
SELECT name FROM customers;

5.2 INTERSECT #

sql
-- INTERSECT: 交集
SELECT name FROM users
INTERSECT
SELECT name FROM customers;

-- INTERSECT ALL: 交集不去重
SELECT name FROM users
INTERSECT ALL
SELECT name FROM customers;

5.3 EXCEPT #

sql
-- EXCEPT: 差集
SELECT name FROM users
EXCEPT
SELECT name FROM customers;

-- EXCEPT ALL: 差集不去重
SELECT name FROM users
EXCEPT ALL
SELECT name FROM customers;

六、高级过滤 #

6.1 FETCH和OFFSET #

sql
-- FETCH (PostgreSQL风格)
SELECT * FROM users
ORDER BY name
FETCH FIRST 10 ROWS ONLY;

-- OFFSET FETCH
SELECT * FROM users
ORDER BY name
OFFSET 10 ROWS
FETCH NEXT 10 ROWS ONLY;

6.2 LIMIT ALL #

sql
-- LIMIT ALL: 返回所有行
SELECT * FROM users LIMIT ALL;

-- 用于动态SQL
SELECT * FROM users
LIMIT CASE WHEN :limit = 0 THEN ALL ELSE :limit END;

七、JSON查询 #

7.1 JSON路径查询 #

sql
-- JSONB 路径查询
SELECT 
    data->>'name' AS name,
    data->'address'->>'city' AS city,
    data#>'{address,city}' AS city_alt
FROM users;

-- JSONB 包含查询
SELECT * FROM users
WHERE data @> '{"status": "active"}';

-- JSONB 存在查询
SELECT * FROM users
WHERE data ? 'email';

-- JSONB 数组查询
SELECT * FROM users
WHERE data->'tags' ? 'vip';

7.2 JSON聚合 #

sql
-- 聚合为JSON
SELECT 
    region,
    jsonb_agg(
        jsonb_build_object(
            'name', salesperson,
            'amount', amount
        )
    ) AS sales_data
FROM sales
GROUP BY region;

-- JSON对象聚合
SELECT 
    salesperson,
    jsonb_object_agg(
        region,
        SUM(amount)
    ) AS region_totals
FROM sales
GROUP BY salesperson;

八、查询优化 #

8.1 查看执行计划 #

sql
-- 基本执行计划
EXPLAIN SELECT * FROM users WHERE status = 'active';

-- 详细执行计划
EXPLAIN ANALYZE SELECT * FROM users WHERE status = 'active';

-- 执行计划格式
EXPLAIN (FORMAT JSON) SELECT * FROM users;
EXPLAIN (FORMAT DOT) SELECT * FROM users;

8.2 优化建议 #

text
高级查询优化建议
┌─────────────────────────────────────────────────────────────┐
│                                                             │
│   1. 使用 CTE 提高可读性                                    │
│   ├── 分解复杂查询                                         │
│   └── 便于维护                                             │
│                                                             │
│   2. 窗口函数代替自连接                                     │
│   ├── 性能更好                                             │
│   └── 代码更简洁                                           │
│                                                             │
│   3. 避免相关子查询                                         │
│   ├── 使用 JOIN 或窗口函数                                 │
│   └── 减少执行次数                                         │
│                                                             │
│   4. 合理使用索引                                           │
│   ├── WHERE/JOIN/ORDER BY 列                               │
│   └── 覆盖索引                                             │
│                                                             │
│   5. 限制结果集                                             │
│   ├── 使用 LIMIT                                           │
│   └── 减少数据传输                                         │
│                                                             │
└─────────────────────────────────────────────────────────────┘

九、总结 #

高级查询要点:

技术 说明
CTE 公用表表达式
递归CTE 层级查询
窗口函数 排名、聚合、值访问
CASE 条件表达式
GROUPING SETS 多维聚合
集合操作 UNION/INTERSECT/EXCEPT

下一步,让我们学习分布式特性!

最后更新:2026-03-27