聚合与分组 #
一、聚合函数概述 #
1.1 聚合函数列表 #
| 函数 | 说明 |
|---|---|
| COUNT() | 计数 |
| SUM() | 求和 |
| AVG() | 平均值 |
| MIN() | 最小值 |
| MAX() | 最大值 |
| FIRST() | 第一个值 |
| LAST() | 最后一个值 |
| MEDIAN() | 中位数 |
| VARIANCE() | 方差 |
| STDDEV() | 标准差 |
1.2 聚合特点 #
text
聚合特点:
├── 对一组值进行计算
├── 返回单个聚合值
├── 忽略NULL值
├── 可与GROUP BY配合
└── 支持DISTINCT去重
二、COUNT计数 #
2.1 基本计数 #
sql
SELECT COUNT(*) AS total FROM Person
SELECT COUNT(*) FROM Person WHERE status = 'active'
2.2 计数指定字段 #
sql
SELECT COUNT(email) AS withEmail FROM Person
SELECT COUNT(phone) AS withPhone FROM Person
2.3 DISTINCT计数 #
sql
SELECT COUNT(DISTINCT city) AS cityCount FROM Person
SELECT COUNT(DISTINCT department) AS deptCount FROM Employee
2.4 条件计数 #
sql
SELECT
COUNT(*) AS total,
COUNT(CASE WHEN age > 30 THEN 1 END) AS over30,
COUNT(CASE WHEN status = 'active' THEN 1 END) AS activeCount
FROM Person
三、SUM求和 #
3.1 基本求和 #
sql
SELECT SUM(salary) AS totalSalary FROM Employee
SELECT SUM(amount) AS totalAmount FROM Order
3.2 条件求和 #
sql
SELECT SUM(CASE WHEN status = 'paid' THEN amount ELSE 0 END) AS paidAmount
FROM Order
3.3 表达式求和 #
sql
SELECT SUM(price * quantity) AS totalRevenue FROM OrderItem
SELECT SUM(salary * 12) AS annualPayroll FROM Employee
四、AVG平均值 #
4.1 基本平均值 #
sql
SELECT AVG(age) AS avgAge FROM Person
SELECT AVG(salary) AS avgSalary FROM Employee
4.2 条件平均值 #
sql
SELECT AVG(salary) AS avgSalary FROM Employee WHERE department = 'IT'
SELECT AVG(rating) AS avgRating FROM Product WHERE category = 'Electronics'
4.3 分组平均值 #
sql
SELECT department, AVG(salary) AS avgSalary
FROM Employee
GROUP BY department
五、MIN和MAX #
5.1 基本用法 #
sql
SELECT MIN(age) AS minAge, MAX(age) AS maxAge FROM Person
SELECT MIN(salary) AS minSalary, MAX(salary) AS maxSalary FROM Employee
5.2 日期范围 #
sql
SELECT MIN(createdAt) AS earliest, MAX(createdAt) AS latest FROM Order
SELECT MIN(orderDate) AS firstOrder, MAX(orderDate) AS lastOrder FROM Order
5.3 字符串范围 #
sql
SELECT MIN(name) AS firstAlpha, MAX(name) AS lastAlpha FROM Person
六、其他聚合函数 #
6.1 FIRST和LAST #
sql
SELECT FIRST(name) AS firstName, LAST(name) AS lastName FROM Person
SELECT FIRST(amount) AS firstOrder, LAST(amount) AS lastOrder FROM Order
6.2 MEDIAN中位数 #
sql
SELECT MEDIAN(salary) AS medianSalary FROM Employee
SELECT MEDIAN(age) AS medianAge FROM Person
6.3 VARIANCE方差 #
sql
SELECT VARIANCE(salary) AS salaryVariance FROM Employee
6.4 STDDEV标准差 #
sql
SELECT STDDEV(salary) AS salaryStdDev FROM Employee
七、GROUP BY分组 #
7.1 基本分组 #
sql
SELECT city, COUNT(*) AS count FROM Person GROUP BY city
SELECT status, COUNT(*) AS count FROM Person GROUP BY status
7.2 多字段分组 #
sql
SELECT city, status, COUNT(*) AS count
FROM Person
GROUP BY city, status
7.3 分组聚合 #
sql
SELECT
city,
COUNT(*) AS count,
AVG(age) AS avgAge,
MIN(age) AS minAge,
MAX(age) AS maxAge
FROM Person
GROUP BY city
7.4 表达式分组 #
sql
SELECT
CASE
WHEN age < 30 THEN 'Young'
WHEN age < 50 THEN 'Middle'
ELSE 'Senior'
END AS ageGroup,
COUNT(*) AS count
FROM Person
GROUP BY ageGroup
7.5 日期分组 #
sql
SELECT
YEAR(createdAt) AS year,
MONTH(createdAt) AS month,
COUNT(*) AS count
FROM Order
GROUP BY year, month
ORDER BY year, month
八、HAVING过滤 #
8.1 基本HAVING #
sql
SELECT city, COUNT(*) AS count
FROM Person
GROUP BY city
HAVING COUNT(*) > 10
8.2 多条件HAVING #
sql
SELECT
city,
COUNT(*) AS count,
AVG(age) AS avgAge
FROM Person
GROUP BY city
HAVING COUNT(*) > 5 AND AVG(age) > 30
8.3 HAVING vs WHERE #
sql
SELECT city, COUNT(*) AS count
FROM Person
WHERE status = 'active'
GROUP BY city
HAVING COUNT(*) > 10
区别说明:
| 子句 | 作用时机 | 说明 |
|---|---|---|
| WHERE | 分组前 | 过滤原始记录 |
| HAVING | 分组后 | 过滤分组结果 |
九、组合聚合 #
9.1 多聚合函数 #
sql
SELECT
department,
COUNT(*) AS employeeCount,
SUM(salary) AS totalSalary,
AVG(salary) AS avgSalary,
MIN(salary) AS minSalary,
MAX(salary) AS maxSalary
FROM Employee
GROUP BY department
9.2 嵌套聚合 #
sql
SELECT
AVG(deptAvg) AS overallAvg
FROM (
SELECT department, AVG(salary) AS deptAvg
FROM Employee
GROUP BY department
)
9.3 聚合与排序 #
sql
SELECT
city,
COUNT(*) AS count,
AVG(age) AS avgAge
FROM Person
GROUP BY city
ORDER BY count DESC, avgAge DESC
十、集合聚合 #
10.1 集合大小统计 #
sql
SELECT
name,
SIZE(hobbies) AS hobbyCount,
SIZE(friends) AS friendCount
FROM Person
10.2 集合内容聚合 #
sql
SELECT
hobby,
COUNT(*) AS personCount
FROM Person
UNWIND hobbies AS hobby
GROUP BY hobby
ORDER BY personCount DESC
10.3 嵌入文档聚合 #
sql
SELECT
address.city AS city,
COUNT(*) AS count
FROM Person
GROUP BY address.city
十一、链接聚合 #
11.1 关联计数 #
sql
SELECT
name,
SIZE(in('KNOWS')) AS followerCount,
SIZE(out('KNOWS')) AS followingCount
FROM Person
11.2 关联统计 #
sql
SELECT
c.name AS companyName,
COUNT(in('WORKS_AT')) AS employeeCount,
AVG(in('WORKS_AT').salary) AS avgSalary
FROM Company c
11.3 多层关联聚合 #
sql
SELECT
p.name AS personName,
COUNT(out('KNOWS')) AS directFriends,
COUNT(out('KNOWS').out('KNOWS')) AS friendsOfFriends
FROM Person p
GROUP BY p
十二、ROLLUP和CUBE #
12.1 ROLLUP分组 #
sql
SELECT
city,
department,
COUNT(*) AS count
FROM Employee
GROUP BY ROLLUP(city, department)
12.2 CUBE分组 #
sql
SELECT
city,
department,
COUNT(*) AS count
FROM Employee
GROUP BY CUBE(city, department)
十三、实际应用示例 #
13.1 销售统计 #
sql
SELECT
product.name AS productName,
COUNT(*) AS orderCount,
SUM(quantity) AS totalQuantity,
SUM(price * quantity) AS totalRevenue,
AVG(price) AS avgPrice
FROM OrderItem
GROUP BY product
ORDER BY totalRevenue DESC
LIMIT 10
13.2 用户活跃度分析 #
sql
SELECT
CASE
WHEN loginCount = 0 THEN 'Inactive'
WHEN loginCount < 5 THEN 'Low'
WHEN loginCount < 20 THEN 'Medium'
ELSE 'High'
END AS activityLevel,
COUNT(*) AS userCount
FROM (
SELECT
u.name,
COUNT(l) AS loginCount
FROM User u
LEFT JOIN Login l ON l.userId = u.@rid
WHERE l.loginTime > sysdate() - 30
GROUP BY u
)
GROUP BY activityLevel
13.3 部门薪资分析 #
sql
SELECT
department,
COUNT(*) AS employeeCount,
AVG(salary) AS avgSalary,
MIN(salary) AS minSalary,
MAX(salary) AS maxSalary,
STDDEV(salary) AS salaryStdDev
FROM Employee
GROUP BY department
HAVING COUNT(*) > 5
ORDER BY avgSalary DESC
13.4 时间趋势分析 #
sql
SELECT
YEAR(createdAt) AS year,
MONTH(createdAt) AS month,
COUNT(*) AS orderCount,
SUM(amount) AS totalAmount,
AVG(amount) AS avgAmount
FROM Order
WHERE createdAt >= DATE('2024-01-01')
GROUP BY year, month
ORDER BY year, month
13.5 客户价值分析 #
sql
SELECT
customer.name AS customerName,
customer.level AS customerLevel,
COUNT(*) AS orderCount,
SUM(totalAmount) AS totalSpent,
AVG(totalAmount) AS avgOrderValue,
MAX(createdAt) AS lastOrderDate
FROM Order
GROUP BY customer
ORDER BY totalSpent DESC
LIMIT 100
十四、性能优化 #
14.1 使用索引 #
sql
CREATE INDEX Person.city NOTUNIQUE
SELECT city, COUNT(*) FROM Person GROUP BY city
14.2 限制分组数量 #
sql
SELECT city, COUNT(*) AS count
FROM Person
GROUP BY city
HAVING count > 10
14.3 预过滤 #
sql
SELECT department, AVG(salary)
FROM Employee
WHERE status = 'active'
GROUP BY department
十五、总结 #
聚合与分组要点:
| 操作 | 语法 | 说明 |
|---|---|---|
| 计数 | COUNT() | 统计数量 |
| 求和 | SUM() | 计算总和 |
| 平均值 | AVG() | 计算平均 |
| 分组 | GROUP BY | 分组统计 |
| 过滤 | HAVING | 分组后过滤 |
下一步,让我们学习多类查询!
最后更新:2026-03-27