From 52540fecb20bac8b4f80e738e3c3df7265d65c35 Mon Sep 17 00:00:00 2001 From: congqixia Date: Mon, 4 Mar 2024 19:54:59 +0800 Subject: [PATCH] enhance: Add back load memory factor when esitmating memory resource (#30994) Segment load memory usage is underestimated due to removing the load memroy factor. This PR adds it back to protect querynode OOM during some extreme memory cases. Signed-off-by: Congqi Xia --- .../querynodev2/segments/segment_loader.go | 37 +++++++++++++------ 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/internal/querynodev2/segments/segment_loader.go b/internal/querynodev2/segments/segment_loader.go index 657e8deeac..b89b722a16 100644 --- a/internal/querynodev2/segments/segment_loader.go +++ b/internal/querynodev2/segments/segment_loader.go @@ -1352,14 +1352,15 @@ func (loader *segmentLoader) checkSegmentSize(ctx context.Context, segmentLoadIn metrics.QueryNodeDiskUsedSize.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Set(toMB(uint64(localDiskUsage))) diskUsage := uint64(localDiskUsage) + loader.committedResource.DiskSize + memoryUsageFactor := paramtable.Get().QueryNodeCfg.LoadMemoryUsageFactor.GetAsFloat() maxSegmentSize := uint64(0) predictMemUsage := memUsage predictDiskUsage := diskUsage mmapFieldCount := 0 for _, loadInfo := range segmentLoadInfos { + var segmentMemorySize, segmentDiskSize uint64 collection := loader.manager.Collection.Get(loadInfo.GetCollectionID()) - oldUsedMem := predictMemUsage vecFieldID2IndexInfo := make(map[int64]*querypb.FieldIndexInfo) for _, fieldIndexInfo := range loadInfo.IndexInfos { if fieldIndexInfo.EnableIndex { @@ -1383,20 +1384,21 @@ func (loader *segmentLoader) checkSegmentSize(ctx context.Context, segmentLoadIn return 0, 0, err } if mmapEnabled { - predictDiskUsage += neededMemSize + neededDiskSize + segmentDiskSize += neededMemSize + neededDiskSize } else { - predictMemUsage += neededMemSize - predictDiskUsage += neededDiskSize + segmentMemorySize += neededMemSize + segmentDiskSize += neededDiskSize } } else { + binlogSize := uint64(getBinlogDataSize(fieldBinlog)) if mmapEnabled { - predictDiskUsage += uint64(getBinlogDataSize(fieldBinlog)) + segmentDiskSize += binlogSize } else { - predictMemUsage += uint64(getBinlogDataSize(fieldBinlog)) + segmentMemorySize += binlogSize enableBinlogIndex := paramtable.Get().QueryNodeCfg.EnableTempSegmentIndex.GetAsBool() if enableBinlogIndex { buildBinlogIndexRate := paramtable.Get().QueryNodeCfg.InterimIndexMemExpandRate.GetAsFloat() - predictMemUsage += uint64(float32(getBinlogDataSize(fieldBinlog)) * float32(buildBinlogIndexRate)) + segmentMemorySize += uint64(float64(binlogSize) * buildBinlogIndexRate) } } } @@ -1408,17 +1410,30 @@ func (loader *segmentLoader) checkSegmentSize(ctx context.Context, segmentLoadIn // get size of stats data for _, fieldBinlog := range loadInfo.Statslogs { - predictMemUsage += uint64(getBinlogDataSize(fieldBinlog)) + segmentMemorySize += uint64(getBinlogDataSize(fieldBinlog)) } + // binlog & statslog use general load factor + segmentMemorySize = uint64(float64(segmentMemorySize) * memoryUsageFactor) + // get size of delete data for _, fieldBinlog := range loadInfo.Deltalogs { - predictMemUsage += uint64(float64(getBinlogDataSize(fieldBinlog)) * paramtable.Get().QueryNodeCfg.DeltaDataExpansionRate.GetAsFloat()) + segmentMemorySize += uint64(float64(getBinlogDataSize(fieldBinlog)) * paramtable.Get().QueryNodeCfg.DeltaDataExpansionRate.GetAsFloat()) } - if predictMemUsage-oldUsedMem > maxSegmentSize { - maxSegmentSize = predictMemUsage - oldUsedMem + if segmentMemorySize > maxSegmentSize { + maxSegmentSize = segmentMemorySize } + + predictMemUsage += segmentMemorySize + predictDiskUsage += segmentDiskSize + + log.Debug("segment resource for loading", + zap.Int64("segmentID", loadInfo.GetSegmentID()), + zap.Float64("memoryUsage(MB)", toMB(segmentMemorySize)), + zap.Float64("diskUsage(MB)", toMB(segmentDiskSize)), + zap.Float64("memoryLoadFactor", memoryUsageFactor), + ) } log.Info("predict memory and disk usage while loading (in MiB)",