enhance: add multiply factor when loading index (#38722)

issue: #38715
pr: #38716

Signed-off-by: chyezh <chyezh@outlook.com>
pull/38678/head
Zhen Ye 2024-12-25 10:50:58 +08:00 committed by GitHub
parent bca21bde30
commit 52de43dbeb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 16 additions and 1 deletions

View File

@ -603,6 +603,8 @@ dataCoord:
maxClusterSizeRatio: 10 # maximum cluster size / avg size in Kmeans train
maxClusterSize: 5g # maximum cluster size in Kmeans train
syncSegmentsInterval: 300 # The time interval for regularly syncing segments
index:
memSizeEstimateMultiplier: 2 # When the memory size is not setup by index procedure, multiplier to estimate the memory size of index data
enableGarbageCollection: true # Switch value to control if to enable garbage collection to clear the discarded data in MinIO or S3 service.
gc:
interval: 3600 # The interval at which data coord performs garbage collection, unit: second.

View File

@ -961,7 +961,8 @@ func GetCLoadInfoWithFunc(ctx context.Context,
IndexFiles: indexInfo.GetIndexFilePaths(),
IndexEngineVersion: indexInfo.GetCurrentIndexVersion(),
IndexStoreVersion: indexInfo.GetIndexStoreVersion(),
IndexFileSize: indexInfo.GetIndexSize(),
// TODO: For quickly fixing, we add the multiplier here, but those logic should be put at the datacoord after we add the mem size for each index.
IndexFileSize: int64(paramtable.Get().DataCoordCfg.IndexMemSizeEstimateMultiplier.GetAsFloat() * float64(indexInfo.GetIndexSize())),
}
// 2.

View File

@ -3324,6 +3324,9 @@ type dataCoordConfig struct {
ChannelCheckpointMaxLag ParamItem `refreshable:"true"`
SyncSegmentsInterval ParamItem `refreshable:"false"`
// Index related configuration
IndexMemSizeEstimateMultiplier ParamItem `refreshable:"true"`
// Clustering Compaction
ClusteringCompactionEnable ParamItem `refreshable:"true"`
ClusteringCompactionAutoEnable ParamItem `refreshable:"true"`
@ -3807,6 +3810,15 @@ During compaction, the size of segment # of rows is able to exceed segment max #
}
p.LevelZeroCompactionTriggerDeltalogMaxNum.Init(base.mgr)
p.IndexMemSizeEstimateMultiplier = ParamItem{
Key: "dataCoord.index.memSizeEstimateMultiplier",
Version: "2.4.19",
DefaultValue: "2",
Doc: "When the memory size is not setup by index procedure, multiplier to estimate the memory size of index data",
Export: true,
}
p.IndexMemSizeEstimateMultiplier.Init(base.mgr)
p.ClusteringCompactionEnable = ParamItem{
Key: "dataCoord.compaction.clustering.enable",
Version: "2.4.7",