聚合与分组 #

一、聚合函数概述 #

1.1 聚合函数列表 #

函数 说明
COUNT() 计数
SUM() 求和
AVG() 平均值
MIN() 最小值
MAX() 最大值
FIRST() 第一个值
LAST() 最后一个值
MEDIAN() 中位数
VARIANCE() 方差
STDDEV() 标准差

1.2 聚合特点 #

text
聚合特点:
├── 对一组值进行计算
├── 返回单个聚合值
├── 忽略NULL值
├── 可与GROUP BY配合
└── 支持DISTINCT去重

二、COUNT计数 #

2.1 基本计数 #

sql
SELECT COUNT(*) AS total FROM Person
SELECT COUNT(*) FROM Person WHERE status = 'active'

2.2 计数指定字段 #

sql
SELECT COUNT(email) AS withEmail FROM Person
SELECT COUNT(phone) AS withPhone FROM Person

2.3 DISTINCT计数 #

sql
SELECT COUNT(DISTINCT city) AS cityCount FROM Person
SELECT COUNT(DISTINCT department) AS deptCount FROM Employee

2.4 条件计数 #

sql
SELECT 
    COUNT(*) AS total,
    COUNT(CASE WHEN age > 30 THEN 1 END) AS over30,
    COUNT(CASE WHEN status = 'active' THEN 1 END) AS activeCount
FROM Person

三、SUM求和 #

3.1 基本求和 #

sql
SELECT SUM(salary) AS totalSalary FROM Employee
SELECT SUM(amount) AS totalAmount FROM Order

3.2 条件求和 #

sql
SELECT SUM(CASE WHEN status = 'paid' THEN amount ELSE 0 END) AS paidAmount
FROM Order

3.3 表达式求和 #

sql
SELECT SUM(price * quantity) AS totalRevenue FROM OrderItem
SELECT SUM(salary * 12) AS annualPayroll FROM Employee

四、AVG平均值 #

4.1 基本平均值 #

sql
SELECT AVG(age) AS avgAge FROM Person
SELECT AVG(salary) AS avgSalary FROM Employee

4.2 条件平均值 #

sql
SELECT AVG(salary) AS avgSalary FROM Employee WHERE department = 'IT'
SELECT AVG(rating) AS avgRating FROM Product WHERE category = 'Electronics'

4.3 分组平均值 #

sql
SELECT department, AVG(salary) AS avgSalary 
FROM Employee 
GROUP BY department

五、MIN和MAX #

5.1 基本用法 #

sql
SELECT MIN(age) AS minAge, MAX(age) AS maxAge FROM Person
SELECT MIN(salary) AS minSalary, MAX(salary) AS maxSalary FROM Employee

5.2 日期范围 #

sql
SELECT MIN(createdAt) AS earliest, MAX(createdAt) AS latest FROM Order
SELECT MIN(orderDate) AS firstOrder, MAX(orderDate) AS lastOrder FROM Order

5.3 字符串范围 #

sql
SELECT MIN(name) AS firstAlpha, MAX(name) AS lastAlpha FROM Person

六、其他聚合函数 #

6.1 FIRST和LAST #

sql
SELECT FIRST(name) AS firstName, LAST(name) AS lastName FROM Person
SELECT FIRST(amount) AS firstOrder, LAST(amount) AS lastOrder FROM Order

6.2 MEDIAN中位数 #

sql
SELECT MEDIAN(salary) AS medianSalary FROM Employee
SELECT MEDIAN(age) AS medianAge FROM Person

6.3 VARIANCE方差 #

sql
SELECT VARIANCE(salary) AS salaryVariance FROM Employee

6.4 STDDEV标准差 #

sql
SELECT STDDEV(salary) AS salaryStdDev FROM Employee

七、GROUP BY分组 #

7.1 基本分组 #

sql
SELECT city, COUNT(*) AS count FROM Person GROUP BY city
SELECT status, COUNT(*) AS count FROM Person GROUP BY status

7.2 多字段分组 #

sql
SELECT city, status, COUNT(*) AS count 
FROM Person 
GROUP BY city, status

7.3 分组聚合 #

sql
SELECT 
    city,
    COUNT(*) AS count,
    AVG(age) AS avgAge,
    MIN(age) AS minAge,
    MAX(age) AS maxAge
FROM Person
GROUP BY city

7.4 表达式分组 #

sql
SELECT 
    CASE 
        WHEN age < 30 THEN 'Young'
        WHEN age < 50 THEN 'Middle'
        ELSE 'Senior'
    END AS ageGroup,
    COUNT(*) AS count
FROM Person
GROUP BY ageGroup

7.5 日期分组 #

sql
SELECT 
    YEAR(createdAt) AS year,
    MONTH(createdAt) AS month,
    COUNT(*) AS count
FROM Order
GROUP BY year, month
ORDER BY year, month

八、HAVING过滤 #

8.1 基本HAVING #

sql
SELECT city, COUNT(*) AS count 
FROM Person 
GROUP BY city 
HAVING COUNT(*) > 10

8.2 多条件HAVING #

sql
SELECT 
    city,
    COUNT(*) AS count,
    AVG(age) AS avgAge
FROM Person
GROUP BY city
HAVING COUNT(*) > 5 AND AVG(age) > 30

8.3 HAVING vs WHERE #

sql
SELECT city, COUNT(*) AS count
FROM Person
WHERE status = 'active'
GROUP BY city
HAVING COUNT(*) > 10

区别说明:

子句 作用时机 说明
WHERE 分组前 过滤原始记录
HAVING 分组后 过滤分组结果

九、组合聚合 #

9.1 多聚合函数 #

sql
SELECT 
    department,
    COUNT(*) AS employeeCount,
    SUM(salary) AS totalSalary,
    AVG(salary) AS avgSalary,
    MIN(salary) AS minSalary,
    MAX(salary) AS maxSalary
FROM Employee
GROUP BY department

9.2 嵌套聚合 #

sql
SELECT 
    AVG(deptAvg) AS overallAvg
FROM (
    SELECT department, AVG(salary) AS deptAvg
    FROM Employee
    GROUP BY department
)

9.3 聚合与排序 #

sql
SELECT 
    city,
    COUNT(*) AS count,
    AVG(age) AS avgAge
FROM Person
GROUP BY city
ORDER BY count DESC, avgAge DESC

十、集合聚合 #

10.1 集合大小统计 #

sql
SELECT 
    name,
    SIZE(hobbies) AS hobbyCount,
    SIZE(friends) AS friendCount
FROM Person

10.2 集合内容聚合 #

sql
SELECT 
    hobby,
    COUNT(*) AS personCount
FROM Person
UNWIND hobbies AS hobby
GROUP BY hobby
ORDER BY personCount DESC

10.3 嵌入文档聚合 #

sql
SELECT 
    address.city AS city,
    COUNT(*) AS count
FROM Person
GROUP BY address.city

十一、链接聚合 #

11.1 关联计数 #

sql
SELECT 
    name,
    SIZE(in('KNOWS')) AS followerCount,
    SIZE(out('KNOWS')) AS followingCount
FROM Person

11.2 关联统计 #

sql
SELECT 
    c.name AS companyName,
    COUNT(in('WORKS_AT')) AS employeeCount,
    AVG(in('WORKS_AT').salary) AS avgSalary
FROM Company c

11.3 多层关联聚合 #

sql
SELECT 
    p.name AS personName,
    COUNT(out('KNOWS')) AS directFriends,
    COUNT(out('KNOWS').out('KNOWS')) AS friendsOfFriends
FROM Person p
GROUP BY p

十二、ROLLUP和CUBE #

12.1 ROLLUP分组 #

sql
SELECT 
    city,
    department,
    COUNT(*) AS count
FROM Employee
GROUP BY ROLLUP(city, department)

12.2 CUBE分组 #

sql
SELECT 
    city,
    department,
    COUNT(*) AS count
FROM Employee
GROUP BY CUBE(city, department)

十三、实际应用示例 #

13.1 销售统计 #

sql
SELECT 
    product.name AS productName,
    COUNT(*) AS orderCount,
    SUM(quantity) AS totalQuantity,
    SUM(price * quantity) AS totalRevenue,
    AVG(price) AS avgPrice
FROM OrderItem
GROUP BY product
ORDER BY totalRevenue DESC
LIMIT 10

13.2 用户活跃度分析 #

sql
SELECT 
    CASE 
        WHEN loginCount = 0 THEN 'Inactive'
        WHEN loginCount < 5 THEN 'Low'
        WHEN loginCount < 20 THEN 'Medium'
        ELSE 'High'
    END AS activityLevel,
    COUNT(*) AS userCount
FROM (
    SELECT 
        u.name,
        COUNT(l) AS loginCount
    FROM User u
    LEFT JOIN Login l ON l.userId = u.@rid
    WHERE l.loginTime > sysdate() - 30
    GROUP BY u
)
GROUP BY activityLevel

13.3 部门薪资分析 #

sql
SELECT 
    department,
    COUNT(*) AS employeeCount,
    AVG(salary) AS avgSalary,
    MIN(salary) AS minSalary,
    MAX(salary) AS maxSalary,
    STDDEV(salary) AS salaryStdDev
FROM Employee
GROUP BY department
HAVING COUNT(*) > 5
ORDER BY avgSalary DESC

13.4 时间趋势分析 #

sql
SELECT 
    YEAR(createdAt) AS year,
    MONTH(createdAt) AS month,
    COUNT(*) AS orderCount,
    SUM(amount) AS totalAmount,
    AVG(amount) AS avgAmount
FROM Order
WHERE createdAt >= DATE('2024-01-01')
GROUP BY year, month
ORDER BY year, month

13.5 客户价值分析 #

sql
SELECT 
    customer.name AS customerName,
    customer.level AS customerLevel,
    COUNT(*) AS orderCount,
    SUM(totalAmount) AS totalSpent,
    AVG(totalAmount) AS avgOrderValue,
    MAX(createdAt) AS lastOrderDate
FROM Order
GROUP BY customer
ORDER BY totalSpent DESC
LIMIT 100

十四、性能优化 #

14.1 使用索引 #

sql
CREATE INDEX Person.city NOTUNIQUE
SELECT city, COUNT(*) FROM Person GROUP BY city

14.2 限制分组数量 #

sql
SELECT city, COUNT(*) AS count
FROM Person
GROUP BY city
HAVING count > 10

14.3 预过滤 #

sql
SELECT department, AVG(salary)
FROM Employee
WHERE status = 'active'
GROUP BY department

十五、总结 #

聚合与分组要点:

操作 语法 说明
计数 COUNT() 统计数量
求和 SUM() 计算总和
平均值 AVG() 计算平均
分组 GROUP BY 分组统计
过滤 HAVING 分组后过滤

下一步,让我们学习多类查询!

最后更新:2026-03-27