mirror of https://github.com/milvus-io/milvus.git
enhance: use GPU pool for gpu tasks (#29678)
- this much improve the performance for GPU index Signed-off-by: yah01 <yang.cen@zilliz.com>pull/29138/merge
parent
4f8c540c77
commit
9e0163e12f
|
@ -29,13 +29,17 @@ import (
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/golang/protobuf/proto"
|
"github.com/golang/protobuf/proto"
|
||||||
|
"github.com/samber/lo"
|
||||||
"go.uber.org/atomic"
|
"go.uber.org/atomic"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/segcorepb"
|
"github.com/milvus-io/milvus/internal/proto/segcorepb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
|
"github.com/milvus-io/milvus/pkg/util/indexparamcheck"
|
||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -123,6 +127,7 @@ type Collection struct {
|
||||||
loadType querypb.LoadType
|
loadType querypb.LoadType
|
||||||
metricType atomic.String
|
metricType atomic.String
|
||||||
schema atomic.Pointer[schemapb.CollectionSchema]
|
schema atomic.Pointer[schemapb.CollectionSchema]
|
||||||
|
isGpuIndex bool
|
||||||
|
|
||||||
refCount *atomic.Uint32
|
refCount *atomic.Uint32
|
||||||
}
|
}
|
||||||
|
@ -137,6 +142,11 @@ func (c *Collection) Schema() *schemapb.CollectionSchema {
|
||||||
return c.schema.Load()
|
return c.schema.Load()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsGpuIndex returns a boolean value indicating whether the collection is using a GPU index.
|
||||||
|
func (c *Collection) IsGpuIndex() bool {
|
||||||
|
return c.isGpuIndex
|
||||||
|
}
|
||||||
|
|
||||||
// getPartitionIDs return partitionIDs of collection
|
// getPartitionIDs return partitionIDs of collection
|
||||||
func (c *Collection) GetPartitions() []int64 {
|
func (c *Collection) GetPartitions() []int64 {
|
||||||
return c.partitions.Collect()
|
return c.partitions.Collect()
|
||||||
|
@ -205,6 +215,7 @@ func NewCollection(collectionID int64, schema *schemapb.CollectionSchema, indexM
|
||||||
|
|
||||||
collection := C.NewCollection(unsafe.Pointer(&schemaBlob[0]), (C.int64_t)(len(schemaBlob)))
|
collection := C.NewCollection(unsafe.Pointer(&schemaBlob[0]), (C.int64_t)(len(schemaBlob)))
|
||||||
|
|
||||||
|
isGpuIndex := false
|
||||||
if indexMeta != nil && len(indexMeta.GetIndexMetas()) > 0 && indexMeta.GetMaxIndexRowCount() > 0 {
|
if indexMeta != nil && len(indexMeta.GetIndexMetas()) > 0 && indexMeta.GetMaxIndexRowCount() > 0 {
|
||||||
indexMetaBlob, err := proto.Marshal(indexMeta)
|
indexMetaBlob, err := proto.Marshal(indexMeta)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -212,6 +223,15 @@ func NewCollection(collectionID int64, schema *schemapb.CollectionSchema, indexM
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
C.SetIndexMeta(collection, unsafe.Pointer(&indexMetaBlob[0]), (C.int64_t)(len(indexMetaBlob)))
|
C.SetIndexMeta(collection, unsafe.Pointer(&indexMetaBlob[0]), (C.int64_t)(len(indexMetaBlob)))
|
||||||
|
|
||||||
|
for _, indexMeta := range indexMeta.GetIndexMetas() {
|
||||||
|
isGpuIndex = lo.ContainsBy(indexMeta.GetIndexParams(), func(param *commonpb.KeyValuePair) bool {
|
||||||
|
return param.Key == common.IndexTypeKey && indexparamcheck.IsGpuIndex(param.Value)
|
||||||
|
})
|
||||||
|
if isGpuIndex {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
coll := &Collection{
|
coll := &Collection{
|
||||||
|
@ -220,6 +240,7 @@ func NewCollection(collectionID int64, schema *schemapb.CollectionSchema, indexM
|
||||||
partitions: typeutil.NewConcurrentSet[int64](),
|
partitions: typeutil.NewConcurrentSet[int64](),
|
||||||
loadType: loadType,
|
loadType: loadType,
|
||||||
refCount: atomic.NewUint32(0),
|
refCount: atomic.NewUint32(0),
|
||||||
|
isGpuIndex: isGpuIndex,
|
||||||
}
|
}
|
||||||
coll.schema.Store(schema)
|
coll.schema.Store(schema)
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@ func newScheduler(policy schedulePolicy) Scheduler {
|
||||||
receiveChan: make(chan addTaskReq, maxReceiveChanSize),
|
receiveChan: make(chan addTaskReq, maxReceiveChanSize),
|
||||||
execChan: make(chan Task),
|
execChan: make(chan Task),
|
||||||
pool: conc.NewPool[any](maxReadConcurrency, conc.WithPreAlloc(true)),
|
pool: conc.NewPool[any](maxReadConcurrency, conc.WithPreAlloc(true)),
|
||||||
|
gpuPool: conc.NewPool[any](paramtable.Get().QueryNodeCfg.MaxGpuReadConcurrency.GetAsInt(), conc.WithPreAlloc(true)),
|
||||||
schedulerCounter: schedulerCounter{},
|
schedulerCounter: schedulerCounter{},
|
||||||
lifetime: lifetime.NewLifetime(lifetime.Initializing),
|
lifetime: lifetime.NewLifetime(lifetime.Initializing),
|
||||||
}
|
}
|
||||||
|
@ -46,6 +47,7 @@ type scheduler struct {
|
||||||
receiveChan chan addTaskReq
|
receiveChan chan addTaskReq
|
||||||
execChan chan Task
|
execChan chan Task
|
||||||
pool *conc.Pool[any]
|
pool *conc.Pool[any]
|
||||||
|
gpuPool *conc.Pool[any]
|
||||||
|
|
||||||
// wg is the waitgroup for internal worker goroutine
|
// wg is the waitgroup for internal worker goroutine
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
|
@ -227,7 +229,7 @@ func (s *scheduler) exec() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
s.pool.Submit(func() (any, error) {
|
s.getPool(t).Submit(func() (any, error) {
|
||||||
// Update concurrency metric and notify task done.
|
// Update concurrency metric and notify task done.
|
||||||
metrics.QueryNodeReadTaskConcurrency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Inc()
|
metrics.QueryNodeReadTaskConcurrency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Inc()
|
||||||
collector.Counter.Inc(metricsinfo.ExecuteQueueType, 1)
|
collector.Counter.Inc(metricsinfo.ExecuteQueueType, 1)
|
||||||
|
@ -245,6 +247,14 @@ func (s *scheduler) exec() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *scheduler) getPool(t Task) *conc.Pool[any] {
|
||||||
|
if t.IsGpuIndex() {
|
||||||
|
return s.gpuPool
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.pool
|
||||||
|
}
|
||||||
|
|
||||||
// setupExecListener setup the execChan and next task to run.
|
// setupExecListener setup the execChan and next task to run.
|
||||||
func (s *scheduler) setupExecListener(lastWaitingTask Task) (Task, int64, chan Task) {
|
func (s *scheduler) setupExecListener(lastWaitingTask Task) (Task, int64, chan Task) {
|
||||||
var execChan chan Task
|
var execChan chan Task
|
||||||
|
|
|
@ -64,6 +64,10 @@ func (t *MockTask) Username() string {
|
||||||
return t.username
|
return t.username
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *MockTask) IsGpuIndex() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func (t *MockTask) TimeRecorder() *timerecord.TimeRecorder {
|
func (t *MockTask) TimeRecorder() *timerecord.TimeRecorder {
|
||||||
return t.tr
|
return t.tr
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,6 +41,10 @@ func (t *QueryStreamTask) Username() string {
|
||||||
return t.req.Req.GetUsername()
|
return t.req.Req.GetUsername()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *QueryStreamTask) IsGpuIndex() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// PreExecute the task, only call once.
|
// PreExecute the task, only call once.
|
||||||
func (t *QueryStreamTask) PreExecute() error {
|
func (t *QueryStreamTask) PreExecute() error {
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -51,6 +51,10 @@ func (t *QueryTask) Username() string {
|
||||||
return t.req.Req.GetUsername()
|
return t.req.Req.GetUsername()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *QueryTask) IsGpuIndex() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// PreExecute the task, only call once.
|
// PreExecute the task, only call once.
|
||||||
func (t *QueryTask) PreExecute() error {
|
func (t *QueryTask) PreExecute() error {
|
||||||
// Update task wait time metric before execute
|
// Update task wait time metric before execute
|
||||||
|
|
|
@ -77,6 +77,10 @@ func (t *SearchTask) Username() string {
|
||||||
return t.req.Req.GetUsername()
|
return t.req.Req.GetUsername()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *SearchTask) IsGpuIndex() bool {
|
||||||
|
return t.collection.IsGpuIndex()
|
||||||
|
}
|
||||||
|
|
||||||
func (t *SearchTask) PreExecute() error {
|
func (t *SearchTask) PreExecute() error {
|
||||||
// Update task wait time metric before execute
|
// Update task wait time metric before execute
|
||||||
nodeID := strconv.FormatInt(paramtable.GetNodeID(), 10)
|
nodeID := strconv.FormatInt(paramtable.GetNodeID(), 10)
|
||||||
|
|
|
@ -82,6 +82,9 @@ type Task interface {
|
||||||
// Return "" if the task do not contain any user info.
|
// Return "" if the task do not contain any user info.
|
||||||
Username() string
|
Username() string
|
||||||
|
|
||||||
|
// Return whether the task would be running on GPU.
|
||||||
|
IsGpuIndex() bool
|
||||||
|
|
||||||
// PreExecute the task, only call once.
|
// PreExecute the task, only call once.
|
||||||
PreExecute() error
|
PreExecute() error
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@ type IndexType = string
|
||||||
|
|
||||||
// IndexType definitions
|
// IndexType definitions
|
||||||
const (
|
const (
|
||||||
|
IndexGpuBF IndexType = "GPU_BRUTE_FORCE"
|
||||||
IndexRaftIvfFlat IndexType = "GPU_IVF_FLAT"
|
IndexRaftIvfFlat IndexType = "GPU_IVF_FLAT"
|
||||||
IndexRaftIvfPQ IndexType = "GPU_IVF_PQ"
|
IndexRaftIvfPQ IndexType = "GPU_IVF_PQ"
|
||||||
IndexRaftCagra IndexType = "GPU_CAGRA"
|
IndexRaftCagra IndexType = "GPU_CAGRA"
|
||||||
|
@ -29,3 +30,10 @@ const (
|
||||||
IndexHNSW IndexType = "HNSW"
|
IndexHNSW IndexType = "HNSW"
|
||||||
IndexDISKANN IndexType = "DISKANN"
|
IndexDISKANN IndexType = "DISKANN"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func IsGpuIndex(indexType IndexType) bool {
|
||||||
|
return indexType == IndexGpuBF ||
|
||||||
|
indexType == IndexRaftIvfFlat ||
|
||||||
|
indexType == IndexRaftIvfPQ ||
|
||||||
|
indexType == IndexRaftCagra
|
||||||
|
}
|
||||||
|
|
|
@ -1758,6 +1758,7 @@ type queryNodeConfig struct {
|
||||||
MaxReceiveChanSize ParamItem `refreshable:"false"`
|
MaxReceiveChanSize ParamItem `refreshable:"false"`
|
||||||
MaxUnsolvedQueueSize ParamItem `refreshable:"true"`
|
MaxUnsolvedQueueSize ParamItem `refreshable:"true"`
|
||||||
MaxReadConcurrency ParamItem `refreshable:"true"`
|
MaxReadConcurrency ParamItem `refreshable:"true"`
|
||||||
|
MaxGpuReadConcurrency ParamItem `refreshable:"false"`
|
||||||
MaxGroupNQ ParamItem `refreshable:"true"`
|
MaxGroupNQ ParamItem `refreshable:"true"`
|
||||||
TopKMergeRatio ParamItem `refreshable:"true"`
|
TopKMergeRatio ParamItem `refreshable:"true"`
|
||||||
CPURatio ParamItem `refreshable:"true"`
|
CPURatio ParamItem `refreshable:"true"`
|
||||||
|
@ -2000,6 +2001,13 @@ Max read concurrency must greater than or equal to 1, and less than or equal to
|
||||||
}
|
}
|
||||||
p.MaxReadConcurrency.Init(base.mgr)
|
p.MaxReadConcurrency.Init(base.mgr)
|
||||||
|
|
||||||
|
p.MaxGpuReadConcurrency = ParamItem{
|
||||||
|
Key: "queryNode.scheduler.maGpuReadConcurrency",
|
||||||
|
Version: "2.0.0",
|
||||||
|
DefaultValue: "8",
|
||||||
|
}
|
||||||
|
p.MaxGpuReadConcurrency.Init(base.mgr)
|
||||||
|
|
||||||
p.MaxUnsolvedQueueSize = ParamItem{
|
p.MaxUnsolvedQueueSize = ParamItem{
|
||||||
Key: "queryNode.scheduler.unsolvedQueueSize",
|
Key: "queryNode.scheduler.unsolvedQueueSize",
|
||||||
Version: "2.0.0",
|
Version: "2.0.0",
|
||||||
|
|
Loading…
Reference in New Issue