enhance: Use an individual buffer size parameter for imports (#31833)

Use an individual buffer size parameter for imports and set buffer size
to 64MB.

issue: https://github.com/milvus-io/milvus/issues/28521

---------

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
pull/32031/head
yihao.dai 2024-04-08 21:07:18 +08:00 committed by GitHub
parent 9901958288
commit 49d109de18
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 15 additions and 2 deletions

View File

@ -506,6 +506,7 @@ dataNode:
import: import:
maxConcurrentTaskNum: 16 # The maximum number of import/pre-import tasks allowed to run concurrently on a datanode. maxConcurrentTaskNum: 16 # The maximum number of import/pre-import tasks allowed to run concurrently on a datanode.
maxImportFileSizeInGB: 16 # The maximum file size (in GB) for an import file, where an import file refers to either a Row-Based file or a set of Column-Based files. maxImportFileSizeInGB: 16 # The maximum file size (in GB) for an import file, where an import file refers to either a Row-Based file or a set of Column-Based files.
readBufferSizeInMB: 16 # The data block size (in MB) read from chunk manager by the datanode during import.
# Configures the system log output. # Configures the system log output.
log: log:

View File

@ -150,7 +150,7 @@ func (s *scheduler) handleErr(task Task, err error, msg string) {
} }
func (s *scheduler) PreImport(task Task) []*conc.Future[any] { func (s *scheduler) PreImport(task Task) []*conc.Future[any] {
bufferSize := paramtable.Get().DataNodeCfg.FlushInsertBufferSize.GetAsInt() bufferSize := paramtable.Get().DataNodeCfg.ReadBufferSizeInMB.GetAsInt() * 1024 * 1024
log.Info("start to preimport", WrapLogFields(task, log.Info("start to preimport", WrapLogFields(task,
zap.Int("bufferSize", bufferSize), zap.Int("bufferSize", bufferSize),
zap.Any("schema", task.GetSchema()))...) zap.Any("schema", task.GetSchema()))...)
@ -241,7 +241,7 @@ func (s *scheduler) readFileStat(reader importutilv2.Reader, task Task, fileIdx
} }
func (s *scheduler) Import(task Task) []*conc.Future[any] { func (s *scheduler) Import(task Task) []*conc.Future[any] {
bufferSize := paramtable.Get().DataNodeCfg.FlushInsertBufferSize.GetAsInt() bufferSize := paramtable.Get().DataNodeCfg.ReadBufferSizeInMB.GetAsInt() * 1024 * 1024
log.Info("start to import", WrapLogFields(task, log.Info("start to import", WrapLogFields(task,
zap.Int("bufferSize", bufferSize), zap.Int("bufferSize", bufferSize),
zap.Any("schema", task.GetSchema()))...) zap.Any("schema", task.GetSchema()))...)

View File

@ -3209,6 +3209,7 @@ type dataNodeConfig struct {
// import // import
MaxConcurrentImportTaskNum ParamItem `refreshable:"true"` MaxConcurrentImportTaskNum ParamItem `refreshable:"true"`
MaxImportFileSizeInGB ParamItem `refreshable:"true"` MaxImportFileSizeInGB ParamItem `refreshable:"true"`
ReadBufferSizeInMB ParamItem `refreshable:"true"`
// Compaction // Compaction
L0BatchMemoryRatio ParamItem `refreshable:"true"` L0BatchMemoryRatio ParamItem `refreshable:"true"`
@ -3477,6 +3478,16 @@ func (p *dataNodeConfig) init(base *BaseTable) {
} }
p.MaxImportFileSizeInGB.Init(base.mgr) p.MaxImportFileSizeInGB.Init(base.mgr)
p.ReadBufferSizeInMB = ParamItem{
Key: "datanode.import.readBufferSizeInMB",
Version: "2.4.0",
Doc: "The data block size (in MB) read from chunk manager by the datanode during import.",
DefaultValue: "16",
PanicIfEmpty: false,
Export: true,
}
p.ReadBufferSizeInMB.Init(base.mgr)
p.L0BatchMemoryRatio = ParamItem{ p.L0BatchMemoryRatio = ParamItem{
Key: "datanode.compaction.levelZeroBatchMemoryRatio", Key: "datanode.compaction.levelZeroBatchMemoryRatio",
Version: "2.4.0", Version: "2.4.0",

View File

@ -441,6 +441,7 @@ func TestComponentParam(t *testing.T) {
t.Logf("maxConcurrentImportTaskNum: %d", maxConcurrentImportTaskNum) t.Logf("maxConcurrentImportTaskNum: %d", maxConcurrentImportTaskNum)
assert.Equal(t, 16, maxConcurrentImportTaskNum) assert.Equal(t, 16, maxConcurrentImportTaskNum)
assert.Equal(t, int64(16), Params.MaxImportFileSizeInGB.GetAsInt64()) assert.Equal(t, int64(16), Params.MaxImportFileSizeInGB.GetAsInt64())
assert.Equal(t, 16, Params.ReadBufferSizeInMB.GetAsInt())
params.Save("datanode.gracefulStopTimeout", "100") params.Save("datanode.gracefulStopTimeout", "100")
assert.Equal(t, 100*time.Second, Params.GracefulStopTimeout.GetAsDuration(time.Second)) assert.Equal(t, 100*time.Second, Params.GracefulStopTimeout.GetAsDuration(time.Second))
}) })