MongoDB 查询十分缓慢
小弟不才,mongodb 优化时间紧任务重,因此向各位大佬提问。
目前 mongodb 中有 130w 条数据,现在要统计当月 appName 的数量。然而 130w 条数据就需要 5s 以上的查询。
相关代码
查询的脚本如下:
db.UserBehaviorOnApp.aggregate([
// 匹配日期
{
$match: {
"accessTime" : {
"$gte" : ISODate("2022-12-06T06:04:53Z"),
"$lt" : ISODate("2023-01-06T06:04:53Z")
}
}
},
// 根据 appName 进行分组,并且统计 appName 出现的次数。 $sum: 1 表示累计+1
{ $group: { _id: "$appName", count: { $sum: 1 } } },
// 根据 count 进行倒序排列
{ $sort: {count:-1} },
// 取前五条记录
{ $limit : 5 }
])
使用 explain("executionStats") 分析结果如下:
{
"explainVersion" : "1",
"stages" : [
{
"$cursor" : {
"queryPlanner" : {
"namespace" : "user_behavior_log.UserBehaviorOnApp",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"accessTime" : {
"$lt" : ISODate("2023-01-06T14:04:53.000+08:00")
}
},
{
"accessTime" : {
"$gte" : ISODate("2022-12-06T14:04:53.000+08:00")
}
}
]
},
"queryHash" : "BDCC37AF",
"planCacheKey" : "F85B6289",
"maxIndexedOrSolutionsReached" : false,
"maxIndexedAndSolutionsReached" : false,
"maxScansToExplodeReached" : false,
"winningPlan" : {
"stage" : "PROJECTION_SIMPLE",
"transformBy" : {
"appName" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "COLLSCAN",
"filter" : {
"$and" : [
{
"accessTime" : {
"$lt" : ISODate("2023-01-06T14:04:53.000+08:00")
}
},
{
"accessTime" : {
"$gte" : ISODate("2022-12-06T14:04:53.000+08:00")
}
}
]
},
"direction" : "forward"
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1333199,
"executionTimeMillis" : 5769,
"totalKeysExamined" : 0,
"totalDocsExamined" : 1333343,
"executionStages" : {
"stage" : "PROJECTION_SIMPLE",
"nReturned" : 1333199,
"executionTimeMillisEstimate" : 344,
"works" : 1333345,
"advanced" : 1333199,
"needTime" : 145,
"needYield" : 0,
"saveState" : 1379,
"restoreState" : 1379,
"isEOF" : 1,
"transformBy" : {
"appName" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "COLLSCAN",
"filter" : {
"$and" : [
{
"accessTime" : {
"$lt" : ISODate("2023-01-06T14:04:53.000+08:00")
}
},
{
"accessTime" : {
"$gte" : ISODate("2022-12-06T14:04:53.000+08:00")
}
}
]
},
"nReturned" : 1333199,
"executionTimeMillisEstimate" : 250,
"works" : 1333345,
"advanced" : 1333199,
"needTime" : 145,
"needYield" : 0,
"saveState" : 1379,
"restoreState" : 1379,
"isEOF" : 1,
"direction" : "forward",
"docsExamined" : 1333343
}
}
}
},
"nReturned" : 1333199,
"executionTimeMillisEstimate" : 2222
},
{
"$group" : {
"_id" : "$appName",
"count" : {
"$sum" : {
"$const" : 1
}
}
},
"maxAccumulatorMemoryUsageBytes" : {
"count" : 74767392
},
"totalOutputDataSizeBytes" : 237801844,
"usedDisk" : false,
"nReturned" : 1038436,
"executionTimeMillisEstimate" : 5632
},
{
"$sort" : {
"sortKey" : {
"count" : -1
},
"limit" : 5
},
"totalDataSizeSortedBytesEstimate" : 12005,
"usedDisk" : false,
"nReturned" : 5,
"executionTimeMillisEstimate" : 5768
}
],
"serverInfo" : {
"host" : "02a8a2b6c8dc",
"port" : 27017,
"version" : "5.0.5",
"gitVersion" : "d65fd89df3fc039b5c55933c0f71d647a54510ae"
},
"serverParameters" : {
"internalQueryFacetBufferSizeBytes" : 104857600,
"internalQueryFacetMaxOutputDocSizeBytes" : 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes" : 104857600,
"internalDocumentSourceGroupMaxMemoryBytes" : 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes" : 104857600,
"internalQueryProhibitBlockingMergeOnMongoS" : 0,
"internalQueryMaxAddToSetBytes" : 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes" : 104857600
},
"command" : {
"aggregate" : "UserBehaviorOnApp",
"pipeline" : [
{
"$match" : {
"accessTime" : {
"$gte" : ISODate("2022-12-06T14:04:53.000+08:00"),
"$lt" : ISODate("2023-01-06T14:04:53.000+08:00")
}
}
},
{
"$group" : {
"_id" : "$appName",
"count" : {
"$sum" : 1
}
}
},
{
"$sort" : {
"count" : -1
}
},
{
"$limit" : 5
}
],
"cursor" : {
},
"$db" : "user_behavior_log"
},
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1672991848, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : 0
}
},
"operationTime" : Timestamp(1672991848, 1)
}
我的思路
我尝试着给 accessTime 添加索引
db.UserBehaviorOnApp.createIndex({"accessTime": 1})
但是查询时间反而更久了,虽然用上了索引。
结果
{
"explainVersion" : "1",
"stages" : [
{
"$cursor" : {
"queryPlanner" : {
"namespace" : "user_behavior_log.UserBehaviorOnApp",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"accessTime" : {
"$lt" : ISODate("2023-01-06T14:04:53.000+08:00")
}
},
{
"accessTime" : {
"$gte" : ISODate("2022-12-06T14:04:53.000+08:00")
}
}
]
},
"queryHash" : "BDCC37AF",
"planCacheKey" : "D418D338",
"maxIndexedOrSolutionsReached" : false,
"maxIndexedAndSolutionsReached" : false,
"maxScansToExplodeReached" : false,
"winningPlan" : {
"stage" : "PROJECTION_SIMPLE",
"transformBy" : {
"appName" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"accessTime" : 1
},
"indexName" : "accessTime_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"accessTime" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"accessTime" : [ "[new Date(1670306693000), new Date(1672985093000))" ]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1333199,
"executionTimeMillis" : 10107,
"totalKeysExamined" : 1333199,
"totalDocsExamined" : 1333199,
"executionStages" : {
"stage" : "PROJECTION_SIMPLE",
"nReturned" : 1333199,
"executionTimeMillisEstimate" : 2092,
"works" : 1333200,
"advanced" : 1333199,
"needTime" : 0,
"needYield" : 0,
"saveState" : 1379,
"restoreState" : 1379,
"isEOF" : 1,
"transformBy" : {
"appName" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 1333199,
"executionTimeMillisEstimate" : 1986,
"works" : 1333200,
"advanced" : 1333199,
"needTime" : 0,
"needYield" : 0,
"saveState" : 1379,
"restoreState" : 1379,
"isEOF" : 1,
"docsExamined" : 1333199,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 1333199,
"executionTimeMillisEstimate" : 324,
"works" : 1333200,
"advanced" : 1333199,
"needTime" : 0,
"needYield" : 0,
"saveState" : 1379,
"restoreState" : 1379,
"isEOF" : 1,
"keyPattern" : {
"accessTime" : 1
},
"indexName" : "accessTime_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"accessTime" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"accessTime" : [ "[new Date(1670306693000), new Date(1672985093000))" ]
},
"keysExamined" : 1333199,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"indexDef" : {
"indexName" : "accessTime_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"accessTime" : [ ]
},
"keyPattern" : {
"accessTime" : 1
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"direction" : "forward"
}
}
}
}
}
},
"nReturned" : 1333199,
"executionTimeMillisEstimate" : 6492
},
{
"$group" : {
"_id" : "$appName",
"count" : {
"$sum" : {
"$const" : 1
}
}
},
"maxAccumulatorMemoryUsageBytes" : {
"count" : 74767392
},
"totalOutputDataSizeBytes" : 237801844,
"usedDisk" : false,
"nReturned" : 1038436,
"executionTimeMillisEstimate" : 9928
},
{
"$sort" : {
"sortKey" : {
"count" : -1
},
"limit" : 5
},
"totalDataSizeSortedBytesEstimate" : 11760,
"usedDisk" : false,
"nReturned" : 5,
"executionTimeMillisEstimate" : 10101
}
],
"serverInfo" : {
"host" : "02a8a2b6c8dc",
"port" : 27017,
"version" : "5.0.5",
"gitVersion" : "d65fd89df3fc039b5c55933c0f71d647a54510ae"
},
"serverParameters" : {
"internalQueryFacetBufferSizeBytes" : 104857600,
"internalQueryFacetMaxOutputDocSizeBytes" : 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes" : 104857600,
"internalDocumentSourceGroupMaxMemoryBytes" : 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes" : 104857600,
"internalQueryProhibitBlockingMergeOnMongoS" : 0,
"internalQueryMaxAddToSetBytes" : 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes" : 104857600
},
"command" : {
"aggregate" : "UserBehaviorOnApp",
"pipeline" : [
{
"$match" : {
"accessTime" : {
"$gte" : ISODate("2022-12-06T14:04:53.000+08:00"),
"$lt" : ISODate("2023-01-06T14:04:53.000+08:00")
}
}
},
{
"$group" : {
"_id" : "$appName",
"count" : {
"$sum" : 1
}
}
},
{
"$sort" : {
"count" : -1
}
},
{
"$limit" : 5
}
],
"cursor" : {
},
"$db" : "user_behavior_log"
},
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1672991948, 3),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : 0
}
},
"operationTime" : Timestamp(1672991948, 3)
}
请问到底如何才能优化这条查询的性能?
文档结构如下:
aggregate
是聚合,内部操作都是管道pipline
,管道的含义很简单,就是筛选或者改变流向下一个阶段的数据。match
完成,还有 133万 数据,也就是流向下一个阶段就有这么多数据。group
就会读取这133万的数据进行计算,也就是读取每一个文档然后获取appName
字段进行分组计算,这一步本身就超级慢。而且appName
还是字符串类型,也会更慢。sort
又会进行一次133万数据排序。这种场景本身mongo就非常不适合处理。因为他就是简单的阶段流程,不像有些sql有各种算法。
这种场景,一般来说:
这种实时分析还是得
clickhouse
,其他都不行。