聚合与分组 #
一、聚合概述 #
聚合操作用于对数据进行统计和分组分析,是数据分析的核心功能。
1.1 聚合操作 #
| 操作 | 说明 |
|---|---|
| COLLECT | 分组聚合 |
| AGGREGATE | 聚合计算 |
| WITH COUNT INTO | 计数 |
1.2 聚合函数 #
| 函数 | 说明 |
|---|---|
| COUNT() | 计数 |
| SUM() | 求和 |
| AVG() | 平均值 |
| MIN() | 最小值 |
| MAX() | 最大值 |
| STDDEV() | 标准差 |
| VARIANCE() | 方差 |
| UNIQUE() | 去重 |
| SORTED_UNIQUE() | 排序去重 |
二、基本聚合 #
2.1 计数 #
aql
FOR user IN users
COLLECT WITH COUNT INTO count
RETURN count
2.2 分组计数 #
aql
FOR user IN users
COLLECT city = user.city WITH COUNT INTO count
RETURN {
city: city,
count: count
}
2.3 多字段分组 #
aql
FOR user IN users
COLLECT city = user.city, status = user.status WITH COUNT INTO count
RETURN {
city: city,
status: status,
count: count
}
2.4 分组排序 #
aql
FOR user IN users
COLLECT city = user.city WITH COUNT INTO count
SORT count DESC
RETURN {
city: city,
count: count
}
三、AGGREGATE聚合 #
3.1 基本聚合 #
aql
FOR user IN users
COLLECT AGGREGATE
avgAge = AVG(user.age),
maxAge = MAX(user.age),
minAge = MIN(user.age),
count = COUNT()
RETURN {
avgAge: avgAge,
maxAge: maxAge,
minAge: minAge,
count: count
}
3.2 分组聚合 #
aql
FOR user IN users
COLLECT city = user.city AGGREGATE
avgAge = AVG(user.age),
maxAge = MAX(user.age),
minAge = MIN(user.age),
count = COUNT()
RETURN {
city: city,
avgAge: avgAge,
maxAge: maxAge,
minAge: minAge,
count: count
}
3.3 多聚合函数 #
aql
FOR order IN orders
COLLECT AGGREGATE
totalAmount = SUM(order.amount),
avgAmount = AVG(order.amount),
maxAmount = MAX(order.amount),
minAmount = MIN(order.amount),
orderCount = COUNT(),
totalVariance = VARIANCE(order.amount),
totalStdDev = STDDEV(order.amount)
RETURN {
totalAmount: totalAmount,
avgAmount: avgAmount,
maxAmount: maxAmount,
minAmount: minAmount,
orderCount: orderCount,
variance: totalVariance,
stdDev: totalStdDev
}
3.4 条件聚合 #
aql
FOR order IN orders
COLLECT AGGREGATE
totalRevenue = SUM(order.amount),
avgOrderValue = AVG(order.amount),
completedOrders = SUM(order.status == "completed" ? 1 : 0),
cancelledOrders = SUM(order.status == "cancelled" ? 1 : 0)
RETURN {
totalRevenue: totalRevenue,
avgOrderValue: avgOrderValue,
completedOrders: completedOrders,
cancelledOrders: cancelledOrders
}
四、分组进阶 #
4.1 保留分组数据 #
aql
FOR user IN users
COLLECT city = user.city INTO group
RETURN {
city: city,
users: group[*].user.name,
count: LENGTH(group)
}
4.2 分组后过滤 #
aql
FOR user IN users
COLLECT city = user.city WITH COUNT INTO count
FILTER count > 10
RETURN {
city: city,
count: count
}
4.3 分组聚合后过滤 #
aql
FOR user IN users
COLLECT city = user.city AGGREGATE
avgAge = AVG(user.age),
count = COUNT()
FILTER avgAge > 30 AND count > 5
RETURN {
city: city,
avgAge: avgAge,
count: count
}
4.4 嵌套分组 #
aql
FOR user IN users
COLLECT city = user.city INTO cityGroup
LET ageGroups = (
FOR item IN cityGroup
COLLECT ageGroup = item.user.age >= 50 ? "50+" :
item.user.age >= 30 ? "30-49" :
item.user.age >= 18 ? "18-29" : "未成年"
WITH COUNT INTO count
RETURN { ageGroup, count }
)
RETURN {
city: city,
total: LENGTH(cityGroup),
ageGroups: ageGroups
}
五、日期分组 #
5.1 按日期分组 #
aql
FOR order IN orders
COLLECT date = DATE_FORMAT(order.createdAt, "%Y-%m-%d")
AGGREGATE
total = SUM(order.amount),
count = COUNT()
SORT date DESC
RETURN {
date: date,
total: total,
count: count
}
5.2 按月份分组 #
aql
FOR order IN orders
COLLECT year = DATE_YEAR(order.createdAt),
month = DATE_MONTH(order.createdAt)
AGGREGATE
total = SUM(order.amount),
count = COUNT()
SORT year DESC, month DESC
RETURN {
year: year,
month: month,
total: total,
count: count
}
5.3 按周分组 #
aql
FOR order IN orders
COLLECT year = DATE_YEAR(order.createdAt),
week = DATE_WEEK(order.createdAt)
AGGREGATE
total = SUM(order.amount),
count = COUNT()
SORT year DESC, week DESC
RETURN {
year: year,
week: week,
total: total,
count: count
}
5.4 按小时分组 #
aql
FOR order IN orders
COLLECT hour = DATE_HOUR(order.createdAt)
AGGREGATE
total = SUM(order.amount),
count = COUNT()
SORT hour ASC
RETURN {
hour: hour,
total: total,
count: count
}
六、高级聚合 #
6.1 去重计数 #
aql
FOR order IN orders
COLLECT AGGREGATE
uniqueUsers = COUNT_DISTINCT(order.userId),
totalOrders = COUNT()
RETURN {
uniqueUsers: uniqueUsers,
totalOrders: totalOrders
}
6.2 分组去重 #
aql
FOR order IN orders
COLLECT city = order.city AGGREGATE
uniqueUsers = COUNT_DISTINCT(order.userId),
totalOrders = COUNT()
RETURN {
city: city,
uniqueUsers: uniqueUsers,
totalOrders: totalOrders
}
6.3 唯一值列表 #
aql
FOR user IN users
COLLECT city = user.city AGGREGATE
uniqueAges = UNIQUE(user.age),
sortedAges = SORTED_UNIQUE(user.age)
RETURN {
city: city,
uniqueAges: uniqueAges,
sortedAges: sortedAges
}
6.4 数组聚合 #
aql
FOR user IN users
COLLECT city = user.city AGGREGATE
allHobbies = UNION_DISTINCT(user.hobbies)
RETURN {
city: city,
allHobbies: allHobbies
}
七、分组统计 #
7.1 百分比计算 #
aql
LET total = (
FOR user IN users
COLLECT WITH COUNT INTO count
RETURN count
)[0]
FOR user IN users
COLLECT city = user.city WITH COUNT INTO count
RETURN {
city: city,
count: count,
percentage: ROUND(count / total * 100, 2)
}
7.2 累计占比 #
aql
LET total = (
FOR order IN orders
AGGREGATE total = SUM(order.amount)
RETURN total
)[0]
FOR order IN orders
COLLECT date = DATE_FORMAT(order.createdAt, "%Y-%m-%d")
AGGREGATE dailyTotal = SUM(order.amount)
SORT date ASC
RETURN {
date: date,
dailyTotal: dailyTotal,
percentage: ROUND(dailyTotal / total * 100, 2)
}
7.3 排名统计 #
aql
FOR user IN users
COLLECT city = user.city AGGREGATE
count = COUNT(),
avgScore = AVG(user.score)
SORT count DESC
LET rank = ROW_NUMBER()
RETURN {
rank: rank,
city: city,
count: count,
avgScore: avgScore
}
八、实战示例 #
8.1 用户统计报表 #
aql
FOR user IN users
COLLECT status = user.status AGGREGATE
count = COUNT(),
avgAge = AVG(user.age),
avgScore = AVG(user.score)
RETURN {
status: status,
count: count,
avgAge: ROUND(avgAge, 1),
avgScore: ROUND(avgScore, 2)
}
8.2 销售统计 #
aql
FOR order IN orders
COLLECT
year = DATE_YEAR(order.createdAt),
month = DATE_MONTH(order.createdAt),
status = order.status
AGGREGATE
totalAmount = SUM(order.amount),
orderCount = COUNT(),
avgAmount = AVG(order.amount)
SORT year DESC, month DESC
RETURN {
year: year,
month: month,
status: status,
totalAmount: totalAmount,
orderCount: orderCount,
avgAmount: ROUND(avgAmount, 2)
}
8.3 商品分析 #
aql
FOR order IN orders
COLLECT productId = order.productId AGGREGATE
totalSales = SUM(order.quantity),
totalRevenue = SUM(order.amount),
orderCount = COUNT(),
avgOrderValue = AVG(order.amount)
LET product = DOCUMENT("products", productId)
RETURN {
productId: productId,
productName: product.name,
category: product.category,
totalSales: totalSales,
totalRevenue: totalRevenue,
orderCount: orderCount,
avgOrderValue: ROUND(avgOrderValue, 2)
}
8.4 用户行为分析 #
aql
FOR action IN userActions
COLLECT
userId = action.userId,
actionType = action.type
AGGREGATE
count = COUNT(),
firstAction = MIN(action.createdAt),
lastAction = MAX(action.createdAt)
LET user = DOCUMENT("users", userId)
RETURN {
userId: userId,
userName: user.name,
actionType: actionType,
count: count,
firstAction: firstAction,
lastAction: lastAction,
daysActive: DATE_DIFF(firstAction, lastAction, "days")
}
8.5 地域分析 #
aql
FOR user IN users
COLLECT province = user.province AGGREGATE
userCount = COUNT(),
avgAge = AVG(user.age),
totalScore = SUM(user.score),
uniqueCities = COUNT_DISTINCT(user.city)
SORT userCount DESC
RETURN {
province: province,
userCount: userCount,
avgAge: ROUND(avgAge, 1),
totalScore: totalScore,
uniqueCities: uniqueCities
}
九、性能优化 #
9.1 使用索引 #
javascript
db.orders.ensureSkipList(["createdAt"]);
db.users.ensureHashIndex(["city"]);
9.2 限制聚合范围 #
aql
FOR order IN orders
FILTER order.createdAt > DATE_SUBTRACT(DATE_NOW(), 30, "days")
COLLECT date = DATE_FORMAT(order.createdAt, "%Y-%m-%d")
AGGREGATE total = SUM(order.amount)
RETURN { date, total }
9.3 分批聚合 #
aql
FOR order IN orders
FILTER order.status == "completed"
LIMIT 10000
COLLECT AGGREGATE total = SUM(order.amount)
RETURN total
十、总结 #
聚合与分组要点:
- COLLECT:分组操作
- AGGREGATE:聚合计算
- WITH COUNT INTO:计数
- 聚合函数:SUM、AVG、MIN、MAX等
- 日期分组:按日期维度统计
下一步,让我们学习多集合查询!
最后更新:2026-03-27