2022-10-11 03:39:22 +00:00
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2022-09-15 10:48:32 +00:00
package querycoordv2
import (
"context"
"fmt"
2022-10-18 05:39:26 +00:00
"sync"
2023-02-26 03:31:49 +00:00
"github.com/cockroachdb/errors"
2023-04-06 11:14:32 +00:00
"github.com/samber/lo"
"go.uber.org/multierr"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
2023-02-26 03:31:49 +00:00
2023-06-08 17:28:37 +00:00
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
2022-09-15 10:48:32 +00:00
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/job"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
2023-04-06 11:14:32 +00:00
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/metricsinfo"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/timerecord"
"github.com/milvus-io/milvus/pkg/util/typeutil"
2022-09-15 10:48:32 +00:00
)
var (
2023-07-17 06:59:34 +00:00
// ErrRemoveNodeFromRGFailed = errors.New("failed to remove node from resource group")
// ErrTransferNodeFailed = errors.New("failed to transfer node between resource group")
// ErrTransferReplicaFailed = errors.New("failed to transfer replica between resource group")
// ErrListResourceGroupsFailed = errors.New("failed to list resource group")
// ErrDescribeResourceGroupFailed = errors.New("failed to describe resource group")
// ErrLoadUseWrongRG = errors.New("load operation should use collection's resource group")
// ErrLoadWithDefaultRG = errors.New("load operation can't use default resource group and other resource group together")
2022-09-15 10:48:32 +00:00
)
func ( s * Server ) ShowCollections ( ctx context . Context , req * querypb . ShowCollectionsRequest ) ( * querypb . ShowCollectionsResponse , error ) {
2022-11-14 07:29:06 +00:00
log . Ctx ( ctx ) . Info ( "show collections request received" , zap . Int64s ( "collections" , req . GetCollectionIDs ( ) ) )
2022-09-15 10:48:32 +00:00
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2023-07-17 06:59:34 +00:00
msg := "failed to show collections"
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
return & querypb . ShowCollectionsResponse {
2023-07-17 06:59:34 +00:00
Status : merr . Status ( errors . Wrap ( err , msg ) ) ,
2022-09-15 10:48:32 +00:00
} , nil
}
2023-01-10 12:35:39 +00:00
defer meta . GlobalFailedLoadCache . TryExpire ( )
2022-09-15 10:48:32 +00:00
2022-09-21 09:54:50 +00:00
isGetAll := false
2022-09-15 10:48:32 +00:00
collectionSet := typeutil . NewUniqueSet ( req . GetCollectionIDs ( ) ... )
if len ( req . GetCollectionIDs ( ) ) == 0 {
for _ , collection := range s . meta . GetAllCollections ( ) {
collectionSet . Insert ( collection . GetCollectionID ( ) )
}
2022-09-21 09:54:50 +00:00
isGetAll = true
2022-09-15 10:48:32 +00:00
}
collections := collectionSet . Collect ( )
resp := & querypb . ShowCollectionsResponse {
2023-03-10 09:15:54 +00:00
Status : & commonpb . Status { } ,
2022-09-21 09:54:50 +00:00
CollectionIDs : make ( [ ] int64 , 0 , len ( collectionSet ) ) ,
InMemoryPercentages : make ( [ ] int64 , 0 , len ( collectionSet ) ) ,
QueryServiceAvailable : make ( [ ] bool , 0 , len ( collectionSet ) ) ,
2022-09-15 10:48:32 +00:00
}
2022-09-21 09:54:50 +00:00
for _ , collectionID := range collections {
2022-09-15 10:48:32 +00:00
log := log . With ( zap . Int64 ( "collectionID" , collectionID ) )
2023-04-12 07:06:28 +00:00
collection := s . meta . CollectionManager . GetCollection ( collectionID )
percentage := s . meta . CollectionManager . CalculateLoadPercentage ( collectionID )
refreshProgress := int64 ( 0 )
if percentage < 0 {
2022-09-21 09:54:50 +00:00
if isGetAll {
// The collection is released during this,
// ignore it
continue
}
2023-03-10 09:15:54 +00:00
err := meta . GlobalFailedLoadCache . Get ( collectionID )
if err != nil {
2023-07-17 06:59:34 +00:00
msg := "show collection failed"
log . Warn ( msg , zap . Error ( err ) )
status := merr . Status ( errors . Wrap ( err , msg ) )
2023-01-10 12:35:39 +00:00
return & querypb . ShowCollectionsResponse {
Status : status ,
} , nil
}
2023-03-10 09:15:54 +00:00
2023-09-30 02:31:28 +00:00
err = merr . WrapErrCollectionNotLoaded ( collectionID )
2022-09-15 10:48:32 +00:00
log . Warn ( "show collection failed" , zap . Error ( err ) )
return & querypb . ShowCollectionsResponse {
2023-03-10 09:15:54 +00:00
Status : merr . Status ( err ) ,
2022-09-15 10:48:32 +00:00
} , nil
}
2023-03-30 02:48:23 +00:00
2023-04-12 07:06:28 +00:00
if collection . IsRefreshed ( ) {
refreshProgress = 100
}
2022-09-21 09:54:50 +00:00
resp . CollectionIDs = append ( resp . CollectionIDs , collectionID )
resp . InMemoryPercentages = append ( resp . InMemoryPercentages , int64 ( percentage ) )
resp . QueryServiceAvailable = append ( resp . QueryServiceAvailable , s . checkAnyReplicaAvailable ( collectionID ) )
2023-04-12 07:06:28 +00:00
resp . RefreshProgress = append ( resp . RefreshProgress , refreshProgress )
2022-09-15 10:48:32 +00:00
}
return resp , nil
}
func ( s * Server ) ShowPartitions ( ctx context . Context , req * querypb . ShowPartitionsRequest ) ( * querypb . ShowPartitionsResponse , error ) {
2022-11-14 07:29:06 +00:00
log := log . Ctx ( ctx ) . With (
2022-09-15 10:48:32 +00:00
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
)
2022-09-21 09:54:50 +00:00
log . Info ( "show partitions request received" , zap . Int64s ( "partitions" , req . GetPartitionIDs ( ) ) )
2022-09-15 10:48:32 +00:00
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2023-07-17 06:59:34 +00:00
msg := "failed to show partitions"
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
return & querypb . ShowPartitionsResponse {
2023-07-17 06:59:34 +00:00
Status : merr . Status ( errors . Wrap ( err , msg ) ) ,
2022-09-15 10:48:32 +00:00
} , nil
}
2023-01-10 12:35:39 +00:00
defer meta . GlobalFailedLoadCache . TryExpire ( )
2022-09-15 10:48:32 +00:00
partitions := req . GetPartitionIDs ( )
percentages := make ( [ ] int64 , 0 )
2023-04-12 07:06:28 +00:00
refreshProgress := int64 ( 0 )
2022-09-15 10:48:32 +00:00
2023-03-20 06:55:57 +00:00
if len ( partitions ) == 0 {
partitions = lo . Map ( s . meta . GetPartitionsByCollection ( req . GetCollectionID ( ) ) , func ( partition * meta . Partition , _ int ) int64 {
return partition . GetPartitionID ( )
} )
}
for _ , partitionID := range partitions {
percentage := s . meta . GetPartitionLoadPercentage ( partitionID )
if percentage < 0 {
err := meta . GlobalFailedLoadCache . Get ( req . GetCollectionID ( ) )
2022-09-15 10:48:32 +00:00
if err != nil {
2023-03-20 06:55:57 +00:00
status := merr . Status ( err )
log . Warn ( "show partition failed" , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
return & querypb . ShowPartitionsResponse {
2023-03-20 06:55:57 +00:00
Status : status ,
2022-09-15 10:48:32 +00:00
} , nil
}
2023-07-17 06:59:34 +00:00
err = merr . WrapErrPartitionNotLoaded ( partitionID )
2023-09-30 02:31:28 +00:00
log . Warn ( "show partitions failed" , zap . Error ( err ) )
2023-01-10 12:35:39 +00:00
return & querypb . ShowPartitionsResponse {
2023-09-30 02:31:28 +00:00
Status : merr . Status ( err ) ,
2023-01-10 12:35:39 +00:00
} , nil
}
2023-03-20 06:55:57 +00:00
percentages = append ( percentages , int64 ( percentage ) )
2022-09-15 10:48:32 +00:00
}
2023-04-12 07:06:28 +00:00
collection := s . meta . GetCollection ( req . GetCollectionID ( ) )
if collection != nil && collection . IsRefreshed ( ) {
refreshProgress = 100
}
refreshProgresses := make ( [ ] int64 , len ( partitions ) )
for i := range partitions {
refreshProgresses [ i ] = refreshProgress
}
2022-09-15 10:48:32 +00:00
return & querypb . ShowPartitionsResponse {
2023-10-11 13:01:35 +00:00
Status : merr . Success ( ) ,
2022-09-15 10:48:32 +00:00
PartitionIDs : partitions ,
InMemoryPercentages : percentages ,
2023-04-12 07:06:28 +00:00
RefreshProgress : refreshProgresses ,
2022-09-15 10:48:32 +00:00
} , nil
}
func ( s * Server ) LoadCollection ( ctx context . Context , req * querypb . LoadCollectionRequest ) ( * commonpb . Status , error ) {
2022-11-14 07:29:06 +00:00
log := log . Ctx ( ctx ) . With (
2022-09-15 10:48:32 +00:00
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
2023-02-09 08:24:31 +00:00
zap . Int32 ( "replicaNumber" , req . GetReplicaNumber ( ) ) ,
zap . Strings ( "resourceGroups" , req . GetResourceGroups ( ) ) ,
zap . Bool ( "refreshMode" , req . GetRefresh ( ) ) ,
2022-09-15 10:48:32 +00:00
)
log . Info ( "load collection request received" ,
zap . Any ( "schema" , req . Schema ) ,
2022-10-20 03:29:27 +00:00
zap . Int64s ( "fieldIndexes" , lo . Values ( req . GetFieldIndexID ( ) ) ) ,
)
2022-09-15 10:48:32 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . TotalLabel ) . Inc ( )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to load collection"
2023-03-31 02:54:29 +00:00
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err , msg ) ) , nil
2022-09-15 10:48:32 +00:00
}
2023-01-18 08:41:44 +00:00
// If refresh mode is ON.
if req . GetRefresh ( ) {
2023-04-12 07:06:28 +00:00
err := s . refreshCollection ( req . GetCollectionID ( ) )
if err != nil {
log . Warn ( "failed to refresh collection" , zap . Error ( err ) )
}
return merr . Status ( err ) , nil
2023-01-18 08:41:44 +00:00
}
2023-01-30 02:19:48 +00:00
if err := s . checkResourceGroup ( req . GetCollectionID ( ) , req . GetResourceGroups ( ) ) ; err != nil {
msg := "failed to load collection"
log . Warn ( msg , zap . Error ( err ) )
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err , msg ) ) , nil
2023-01-30 02:19:48 +00:00
}
2022-09-15 10:48:32 +00:00
loadJob := job . NewLoadCollectionJob ( ctx ,
req ,
s . dist ,
s . meta ,
2023-05-18 01:17:23 +00:00
s . broker ,
2023-03-20 06:55:57 +00:00
s . cluster ,
2022-09-15 10:48:32 +00:00
s . targetMgr ,
2023-03-20 06:55:57 +00:00
s . targetObserver ,
2022-09-15 10:48:32 +00:00
s . nodeMgr ,
)
s . jobScheduler . Add ( loadJob )
err := loadJob . Wait ( )
2023-07-17 06:59:34 +00:00
if err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to load collection"
log . Warn ( msg , zap . Error ( err ) )
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err , msg ) ) , nil
2022-09-15 10:48:32 +00:00
}
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . SuccessLabel ) . Inc ( )
2023-10-11 13:01:35 +00:00
return merr . Success ( ) , nil
2022-09-15 10:48:32 +00:00
}
func ( s * Server ) ReleaseCollection ( ctx context . Context , req * querypb . ReleaseCollectionRequest ) ( * commonpb . Status , error ) {
2022-11-14 07:29:06 +00:00
log := log . Ctx ( ctx ) . With (
2022-09-15 10:48:32 +00:00
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
)
log . Info ( "release collection request received" )
tr := timerecord . NewTimeRecorder ( "release-collection" )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to release collection"
2023-03-31 02:54:29 +00:00
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err , msg ) ) , nil
2022-09-15 10:48:32 +00:00
}
releaseJob := job . NewReleaseCollectionJob ( ctx ,
req ,
s . dist ,
s . meta ,
2023-05-18 01:17:23 +00:00
s . broker ,
s . cluster ,
2022-09-15 10:48:32 +00:00
s . targetMgr ,
2023-01-14 13:55:41 +00:00
s . targetObserver ,
2023-05-30 09:41:28 +00:00
s . checkerController ,
2022-09-15 10:48:32 +00:00
)
s . jobScheduler . Add ( releaseJob )
err := releaseJob . Wait ( )
if err != nil {
msg := "failed to release collection"
2023-08-14 10:57:32 +00:00
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err , msg ) ) , nil
2022-09-15 10:48:32 +00:00
}
log . Info ( "collection released" )
metrics . QueryCoordReleaseLatency . WithLabelValues ( ) . Observe ( float64 ( tr . ElapseSpan ( ) . Milliseconds ( ) ) )
2023-01-10 12:35:39 +00:00
meta . GlobalFailedLoadCache . Remove ( req . GetCollectionID ( ) )
2023-10-11 13:01:35 +00:00
return merr . Success ( ) , nil
2022-09-15 10:48:32 +00:00
}
func ( s * Server ) LoadPartitions ( ctx context . Context , req * querypb . LoadPartitionsRequest ) ( * commonpb . Status , error ) {
2022-11-14 07:29:06 +00:00
log := log . Ctx ( ctx ) . With (
2022-09-15 10:48:32 +00:00
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
2023-01-30 02:19:48 +00:00
zap . Int32 ( "replicaNumber" , req . GetReplicaNumber ( ) ) ,
zap . Strings ( "resourceGroups" , req . GetResourceGroups ( ) ) ,
2023-02-02 11:29:51 +00:00
zap . Bool ( "refreshMode" , req . GetRefresh ( ) ) ,
2022-09-15 10:48:32 +00:00
)
log . Info ( "received load partitions request" ,
zap . Any ( "schema" , req . Schema ) ,
zap . Int64s ( "partitions" , req . GetPartitionIDs ( ) ) )
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . TotalLabel ) . Inc ( )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to load partitions"
2023-03-31 02:54:29 +00:00
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err , msg ) ) , nil
2022-09-15 10:48:32 +00:00
}
2023-01-18 08:41:44 +00:00
// If refresh mode is ON.
if req . GetRefresh ( ) {
2023-04-12 07:06:28 +00:00
err := s . refreshCollection ( req . GetCollectionID ( ) )
if err != nil {
log . Warn ( "failed to refresh partitions" , zap . Error ( err ) )
}
return merr . Status ( err ) , nil
2023-01-30 02:19:48 +00:00
}
if err := s . checkResourceGroup ( req . GetCollectionID ( ) , req . GetResourceGroups ( ) ) ; err != nil {
msg := "failed to load partitions"
2023-07-17 06:59:34 +00:00
log . Warn ( msg , zap . Error ( err ) )
2023-01-30 02:19:48 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err , msg ) ) , nil
2023-01-18 08:41:44 +00:00
}
2022-09-15 10:48:32 +00:00
loadJob := job . NewLoadPartitionJob ( ctx ,
req ,
s . dist ,
s . meta ,
2023-05-18 01:17:23 +00:00
s . broker ,
2023-03-20 06:55:57 +00:00
s . cluster ,
2022-09-15 10:48:32 +00:00
s . targetMgr ,
2023-03-20 06:55:57 +00:00
s . targetObserver ,
2022-09-15 10:48:32 +00:00
s . nodeMgr ,
)
s . jobScheduler . Add ( loadJob )
err := loadJob . Wait ( )
2023-07-17 06:59:34 +00:00
if err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to load partitions"
log . Warn ( msg , zap . Error ( err ) )
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err , msg ) ) , nil
2022-09-15 10:48:32 +00:00
}
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . SuccessLabel ) . Inc ( )
2023-10-11 13:01:35 +00:00
return merr . Success ( ) , nil
2022-09-15 10:48:32 +00:00
}
2023-01-30 02:19:48 +00:00
func ( s * Server ) checkResourceGroup ( collectionID int64 , resourceGroups [ ] string ) error {
if len ( resourceGroups ) != 0 {
collectionUsedRG := s . meta . ReplicaManager . GetResourceGroupByCollection ( collectionID )
for _ , rgName := range resourceGroups {
2023-02-09 08:24:31 +00:00
if len ( collectionUsedRG ) > 0 && ! collectionUsedRG . Contain ( rgName ) {
2023-07-17 06:59:34 +00:00
return merr . WrapErrParameterInvalid ( "created resource group(s)" , rgName , "given resource group not found" )
2023-01-30 02:19:48 +00:00
}
2023-02-21 11:06:27 +00:00
if len ( resourceGroups ) > 1 && rgName == meta . DefaultResourceGroupName {
2023-07-17 06:59:34 +00:00
return merr . WrapErrParameterInvalid ( "no default resource group mixed with the other resource group(s)" , rgName )
2023-02-21 11:06:27 +00:00
}
2023-01-30 02:19:48 +00:00
}
}
return nil
}
2022-09-15 10:48:32 +00:00
func ( s * Server ) ReleasePartitions ( ctx context . Context , req * querypb . ReleasePartitionsRequest ) ( * commonpb . Status , error ) {
2022-11-14 07:29:06 +00:00
log := log . Ctx ( ctx ) . With (
2022-09-15 10:48:32 +00:00
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
)
log . Info ( "release partitions" , zap . Int64s ( "partitions" , req . GetPartitionIDs ( ) ) )
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . TotalLabel ) . Inc ( )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to release partitions"
2023-03-31 02:54:29 +00:00
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err , msg ) ) , nil
2022-09-15 10:48:32 +00:00
}
if len ( req . GetPartitionIDs ( ) ) == 0 {
2024-02-12 00:30:55 +00:00
err := merr . WrapErrParameterInvalid ( "any partition" , "empty partition list" )
2023-07-17 06:59:34 +00:00
log . Warn ( "no partition to release" , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2023-07-17 06:59:34 +00:00
return merr . Status ( err ) , nil
2022-09-15 10:48:32 +00:00
}
tr := timerecord . NewTimeRecorder ( "release-partitions" )
releaseJob := job . NewReleasePartitionJob ( ctx ,
req ,
s . dist ,
s . meta ,
2023-05-18 01:17:23 +00:00
s . broker ,
2023-03-20 06:55:57 +00:00
s . cluster ,
2022-09-15 10:48:32 +00:00
s . targetMgr ,
2023-01-14 13:55:41 +00:00
s . targetObserver ,
2023-05-30 09:41:28 +00:00
s . checkerController ,
2022-09-15 10:48:32 +00:00
)
s . jobScheduler . Add ( releaseJob )
err := releaseJob . Wait ( )
if err != nil {
msg := "failed to release partitions"
2023-08-14 10:57:32 +00:00
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err , msg ) ) , nil
2022-09-15 10:48:32 +00:00
}
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . SuccessLabel ) . Inc ( )
metrics . QueryCoordReleaseLatency . WithLabelValues ( ) . Observe ( float64 ( tr . ElapseSpan ( ) . Milliseconds ( ) ) )
2023-01-10 12:35:39 +00:00
meta . GlobalFailedLoadCache . Remove ( req . GetCollectionID ( ) )
2023-10-11 13:01:35 +00:00
return merr . Success ( ) , nil
2022-09-15 10:48:32 +00:00
}
func ( s * Server ) GetPartitionStates ( ctx context . Context , req * querypb . GetPartitionStatesRequest ) ( * querypb . GetPartitionStatesResponse , error ) {
2022-11-14 07:29:06 +00:00
log := log . Ctx ( ctx ) . With (
2022-09-15 10:48:32 +00:00
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
)
log . Info ( "get partition states" , zap . Int64s ( "partitions" , req . GetPartitionIDs ( ) ) )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to get partition states"
2023-03-31 02:54:29 +00:00
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
return & querypb . GetPartitionStatesResponse {
2023-07-17 06:59:34 +00:00
Status : merr . Status ( errors . Wrap ( err , msg ) ) ,
2022-09-15 10:48:32 +00:00
} , nil
}
msg := "partition not loaded"
notLoadResp := & querypb . GetPartitionStatesResponse {
2023-07-17 06:59:34 +00:00
Status : merr . Status ( merr . WrapErrPartitionNotLoaded ( req . GetPartitionIDs ( ) ) ) ,
2022-09-15 10:48:32 +00:00
}
states := make ( [ ] * querypb . PartitionStates , 0 , len ( req . GetPartitionIDs ( ) ) )
switch s . meta . GetLoadType ( req . GetCollectionID ( ) ) {
case querypb . LoadType_LoadCollection :
collection := s . meta . GetCollection ( req . GetCollectionID ( ) )
state := querypb . PartitionState_PartialInMemory
if collection . LoadPercentage >= 100 {
state = querypb . PartitionState_InMemory
}
releasedPartitions := typeutil . NewUniqueSet ( collection . GetReleasedPartitions ( ) ... )
for _ , partition := range req . GetPartitionIDs ( ) {
if releasedPartitions . Contain ( partition ) {
log . Warn ( msg )
return notLoadResp , nil
}
states = append ( states , & querypb . PartitionStates {
PartitionID : partition ,
State : state ,
} )
}
case querypb . LoadType_LoadPartition :
for _ , partitionID := range req . GetPartitionIDs ( ) {
partition := s . meta . GetPartition ( partitionID )
if partition == nil {
log . Warn ( msg , zap . Int64 ( "partition" , partitionID ) )
return notLoadResp , nil
}
state := querypb . PartitionState_PartialInMemory
if partition . LoadPercentage >= 100 {
state = querypb . PartitionState_InMemory
}
states = append ( states , & querypb . PartitionStates {
PartitionID : partitionID ,
State : state ,
} )
}
default :
log . Warn ( msg )
return notLoadResp , nil
}
return & querypb . GetPartitionStatesResponse {
2023-10-11 13:01:35 +00:00
Status : merr . Success ( ) ,
2022-09-15 10:48:32 +00:00
PartitionDescriptions : states ,
} , nil
}
func ( s * Server ) GetSegmentInfo ( ctx context . Context , req * querypb . GetSegmentInfoRequest ) ( * querypb . GetSegmentInfoResponse , error ) {
2022-11-14 07:29:06 +00:00
log := log . Ctx ( ctx ) . With (
2022-09-15 10:48:32 +00:00
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
)
log . Info ( "get segment info" , zap . Int64s ( "segments" , req . GetSegmentIDs ( ) ) )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to get segment info"
2023-03-31 02:54:29 +00:00
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
return & querypb . GetSegmentInfoResponse {
2023-07-17 06:59:34 +00:00
Status : merr . Status ( errors . Wrap ( err , msg ) ) ,
2022-09-15 10:48:32 +00:00
} , nil
}
infos := make ( [ ] * querypb . SegmentInfo , 0 , len ( req . GetSegmentIDs ( ) ) )
if len ( req . GetSegmentIDs ( ) ) == 0 {
infos = s . getCollectionSegmentInfo ( req . GetCollectionID ( ) )
} else {
for _ , segmentID := range req . GetSegmentIDs ( ) {
segments := s . dist . SegmentDistManager . Get ( segmentID )
if len ( segments ) == 0 {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrSegmentNotLoaded ( segmentID )
2022-09-15 10:48:32 +00:00
msg := fmt . Sprintf ( "segment %v not found in any node" , segmentID )
log . Warn ( msg , zap . Int64 ( "segment" , segmentID ) )
return & querypb . GetSegmentInfoResponse {
2023-07-17 06:59:34 +00:00
Status : merr . Status ( errors . Wrap ( err , msg ) ) ,
2022-09-15 10:48:32 +00:00
} , nil
}
info := & querypb . SegmentInfo { }
utils . MergeMetaSegmentIntoSegmentInfo ( info , segments ... )
infos = append ( infos , info )
}
}
return & querypb . GetSegmentInfoResponse {
2023-10-11 13:01:35 +00:00
Status : merr . Success ( ) ,
2022-09-15 10:48:32 +00:00
Infos : infos ,
} , nil
}
2023-03-20 06:55:57 +00:00
func ( s * Server ) SyncNewCreatedPartition ( ctx context . Context , req * querypb . SyncNewCreatedPartitionRequest ) ( * commonpb . Status , error ) {
log := log . Ctx ( ctx ) . With (
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
zap . Int64 ( "partitionID" , req . GetPartitionID ( ) ) ,
)
log . Info ( "received sync new created partition request" )
failedMsg := "failed to sync new created partition"
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
log . Warn ( failedMsg , zap . Error ( err ) )
return merr . Status ( err ) , nil
2023-03-20 06:55:57 +00:00
}
2023-05-18 01:17:23 +00:00
syncJob := job . NewSyncNewCreatedPartitionJob ( ctx , req , s . meta , s . cluster , s . broker )
2023-03-20 06:55:57 +00:00
s . jobScheduler . Add ( syncJob )
err := syncJob . Wait ( )
2023-07-17 06:59:34 +00:00
if err != nil {
2023-03-20 06:55:57 +00:00
log . Warn ( failedMsg , zap . Error ( err ) )
2023-07-17 06:59:34 +00:00
return merr . Status ( err ) , nil
2023-03-20 06:55:57 +00:00
}
2023-10-11 13:01:35 +00:00
return merr . Success ( ) , nil
2023-03-20 06:55:57 +00:00
}
2023-01-18 08:41:44 +00:00
// refreshCollection must be called after loading a collection. It looks for new segments that are not loaded yet and
// tries to load them up. It returns when all segments of the given collection are loaded, or when error happens.
// Note that a collection's loading progress always stays at 100% after a successful load and will not get updated
// during refreshCollection.
2023-04-12 07:06:28 +00:00
func ( s * Server ) refreshCollection ( collectionID int64 ) error {
collection := s . meta . CollectionManager . GetCollection ( collectionID )
if collection == nil {
return merr . WrapErrCollectionNotLoaded ( collectionID )
2023-01-18 08:41:44 +00:00
}
// Check that collection is fully loaded.
2023-04-12 07:06:28 +00:00
if collection . GetStatus ( ) != querypb . LoadStatus_Loaded {
return merr . WrapErrCollectionNotLoaded ( collectionID , "collection not fully loaded" )
2023-01-18 08:41:44 +00:00
}
// Pull the latest target.
2023-04-12 07:06:28 +00:00
readyCh , err := s . targetObserver . UpdateNextTarget ( collectionID )
2023-01-18 08:41:44 +00:00
if err != nil {
2023-04-12 07:06:28 +00:00
return err
2023-01-18 08:41:44 +00:00
}
2023-04-12 07:06:28 +00:00
collection . SetRefreshNotifier ( readyCh )
return nil
2023-01-18 08:41:44 +00:00
}
2023-04-12 07:06:28 +00:00
// This is totally same to refreshCollection, remove it for now
2023-01-18 08:41:44 +00:00
// refreshPartitions must be called after loading a collection. It looks for new segments that are not loaded yet and
// tries to load them up. It returns when all segments of the given collection are loaded, or when error happens.
// Note that a collection's loading progress always stays at 100% after a successful load and will not get updated
// during refreshPartitions.
2023-04-12 07:06:28 +00:00
// func (s *Server) refreshPartitions(ctx context.Context, collID int64, partIDs []int64) (*commonpb.Status, error) {
// ctx, cancel := context.WithTimeout(ctx, Params.QueryCoordCfg.LoadTimeoutSeconds.GetAsDuration(time.Second))
// defer cancel()
// log := log.Ctx(ctx).With(
// zap.Int64("collectionID", collID),
// zap.Int64s("partitionIDs", partIDs),
// )
// if s.status.Load() != commonpb.StateCode_Healthy {
// msg := "failed to refresh partitions"
// log.Warn(msg, zap.Error(ErrNotHealthy))
// metrics.QueryCoordReleaseCount.WithLabelValues(metrics.FailLabel).Inc()
// return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg, ErrNotHealthy), nil
// }
// // Check that all partitions are fully loaded.
// if s.meta.CollectionManager.GetCurrentLoadPercentage(collID) != 100 {
// errMsg := "partitions must be fully loaded before refreshing"
// log.Warn(errMsg)
// return &commonpb.Status{
// ErrorCode: commonpb.ErrorCode_UnexpectedError,
// Reason: errMsg,
// }, nil
// }
// // Pull the latest target.
// readyCh, err := s.targetObserver.UpdateNextTarget(collID)
// if err != nil {
// log.Warn("failed to update next target", zap.Error(err))
// return &commonpb.Status{
// ErrorCode: commonpb.ErrorCode_UnexpectedError,
// Reason: err.Error(),
// }, nil
// }
// select {
// case <-ctx.Done():
// log.Warn("refresh partitions failed as context canceled")
// return &commonpb.Status{
// ErrorCode: commonpb.ErrorCode_UnexpectedError,
// Reason: "context canceled",
// }, nil
// case <-readyCh:
// log.Info("refresh partitions succeeded")
// return &commonpb.Status{
// ErrorCode: commonpb.ErrorCode_Success,
// }, nil
// }
// }
2023-01-18 08:41:44 +00:00
2022-12-29 07:47:31 +00:00
func ( s * Server ) isStoppingNode ( nodeID int64 ) error {
isStopping , err := s . nodeMgr . IsStoppingNode ( nodeID )
if err != nil {
log . Warn ( "fail to check whether the node is stopping" , zap . Int64 ( "node_id" , nodeID ) , zap . Error ( err ) )
return err
}
if isStopping {
msg := fmt . Sprintf ( "failed to balance due to the source/destination node[%d] is stopping" , nodeID )
log . Warn ( msg )
return errors . New ( msg )
}
return nil
}
2022-09-15 10:48:32 +00:00
func ( s * Server ) LoadBalance ( ctx context . Context , req * querypb . LoadBalanceRequest ) ( * commonpb . Status , error ) {
2022-11-14 07:29:06 +00:00
log := log . Ctx ( ctx ) . With (
2022-09-15 10:48:32 +00:00
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
)
log . Info ( "load balance request received" ,
zap . Int64s ( "source" , req . GetSourceNodeIDs ( ) ) ,
zap . Int64s ( "dest" , req . GetDstNodeIDs ( ) ) ,
zap . Int64s ( "segments" , req . GetSealedSegmentIDs ( ) ) )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to load balance"
2023-03-31 02:54:29 +00:00
log . Warn ( msg , zap . Error ( err ) )
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err , msg ) ) , nil
2022-09-15 10:48:32 +00:00
}
// Verify request
if len ( req . GetSourceNodeIDs ( ) ) != 1 {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrParameterInvalid ( "only 1 source node" , fmt . Sprintf ( "%d source nodes" , len ( req . GetSourceNodeIDs ( ) ) ) )
2022-09-15 10:48:32 +00:00
msg := "source nodes can only contain 1 node"
log . Warn ( msg , zap . Int ( "source-nodes-num" , len ( req . GetSourceNodeIDs ( ) ) ) )
2023-07-17 06:59:34 +00:00
return merr . Status ( err ) , nil
2022-09-15 10:48:32 +00:00
}
2023-03-30 02:48:23 +00:00
if s . meta . CollectionManager . CalculateLoadPercentage ( req . GetCollectionID ( ) ) < 100 {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrCollectionNotFullyLoaded ( req . GetCollectionID ( ) )
2022-09-15 10:48:32 +00:00
msg := "can't balance segments of not fully loaded collection"
log . Warn ( msg )
2023-07-17 06:59:34 +00:00
return merr . Status ( err ) , nil
2022-09-15 10:48:32 +00:00
}
srcNode := req . GetSourceNodeIDs ( ) [ 0 ]
replica := s . meta . ReplicaManager . GetByCollectionAndNode ( req . GetCollectionID ( ) , srcNode )
if replica == nil {
2023-09-26 09:15:27 +00:00
err := merr . WrapErrNodeNotFound ( srcNode , fmt . Sprintf ( "source node not found in any replica of collection %d" , req . GetCollectionID ( ) ) )
2022-09-27 08:00:54 +00:00
msg := "source node not found in any replica"
2022-09-15 10:48:32 +00:00
log . Warn ( msg )
2023-07-17 06:59:34 +00:00
return merr . Status ( err ) , nil
2022-09-15 10:48:32 +00:00
}
2022-12-29 07:47:31 +00:00
if err := s . isStoppingNode ( srcNode ) ; err != nil {
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err ,
fmt . Sprintf ( "can't balance, because the source node[%d] is invalid" , srcNode ) ) ) , nil
2022-12-29 07:47:31 +00:00
}
2024-04-10 11:47:20 +00:00
// when no dst node specified, default to use all other nodes in same
dstNodeSet := typeutil . NewUniqueSet ( )
if len ( req . GetDstNodeIDs ( ) ) == 0 {
outboundNodes := s . meta . ResourceManager . CheckOutboundNodes ( replica )
availableNodes := lo . Filter ( replica . Replica . GetNodes ( ) , func ( node int64 , _ int ) bool { return ! outboundNodes . Contain ( node ) } )
dstNodeSet . Insert ( availableNodes ... )
} else {
for _ , dstNode := range req . GetDstNodeIDs ( ) {
if ! replica . Contains ( dstNode ) {
err := merr . WrapErrNodeNotFound ( dstNode , "destination node not found in the same replica" )
log . Warn ( "failed to balance to the destination node" , zap . Error ( err ) )
return merr . Status ( err ) , nil
}
dstNodeSet . Insert ( dstNode )
2022-09-15 10:48:32 +00:00
}
2024-04-10 11:47:20 +00:00
}
// check whether dstNode is healthy
for dstNode := range dstNodeSet {
2022-12-29 07:47:31 +00:00
if err := s . isStoppingNode ( dstNode ) ; err != nil {
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err ,
fmt . Sprintf ( "can't balance, because the destination node[%d] is invalid" , dstNode ) ) ) , nil
2022-12-29 07:47:31 +00:00
}
2022-09-15 10:48:32 +00:00
}
2024-04-10 11:47:20 +00:00
// check sealed segment list
segments := s . dist . SegmentDistManager . GetByCollectionAndNode ( req . GetCollectionID ( ) , srcNode )
segmentsMap := lo . SliceToMap ( segments , func ( s * meta . Segment ) ( int64 , * meta . Segment ) {
return s . GetID ( ) , s
} )
toBalance := typeutil . NewSet [ * meta . Segment ] ( )
if len ( req . GetSealedSegmentIDs ( ) ) == 0 {
toBalance . Insert ( segments ... )
} else {
// check whether sealed segment exist
for _ , segmentID := range req . GetSealedSegmentIDs ( ) {
segment , ok := segmentsMap [ segmentID ]
if ! ok {
err := merr . WrapErrSegmentNotFound ( segmentID , "segment not found in source node" )
return merr . Status ( err ) , nil
}
// Only balance segments in targets
existInTarget := s . targetMgr . GetSealedSegment ( segment . GetCollectionID ( ) , segment . GetID ( ) , meta . CurrentTarget ) != nil
if ! existInTarget {
log . Info ( "segment doesn't exist in current target, skip it" , zap . Int64 ( "segmentID" , segmentID ) )
continue
}
toBalance . Insert ( segment )
}
}
err := s . balanceSegments ( ctx , replica . GetCollectionID ( ) , replica . GetID ( ) , srcNode , dstNodeSet . Collect ( ) , toBalance . Collect ( ) , true , false )
2022-09-15 10:48:32 +00:00
if err != nil {
msg := "failed to balance segments"
log . Warn ( msg , zap . Error ( err ) )
2023-07-17 06:59:34 +00:00
return merr . Status ( errors . Wrap ( err , msg ) ) , nil
2022-09-15 10:48:32 +00:00
}
2024-04-10 11:47:20 +00:00
2023-10-11 13:01:35 +00:00
return merr . Success ( ) , nil
2022-09-15 10:48:32 +00:00
}
func ( s * Server ) ShowConfigurations ( ctx context . Context , req * internalpb . ShowConfigurationsRequest ) ( * internalpb . ShowConfigurationsResponse , error ) {
2022-11-14 07:29:06 +00:00
log := log . Ctx ( ctx )
2022-09-15 10:48:32 +00:00
2022-10-25 11:29:36 +00:00
log . Info ( "show configurations request received" , zap . String ( "pattern" , req . GetPattern ( ) ) )
2022-09-15 10:48:32 +00:00
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to show configurations"
2023-03-31 02:54:29 +00:00
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
return & internalpb . ShowConfigurationsResponse {
2023-07-17 06:59:34 +00:00
Status : merr . Status ( errors . Wrap ( err , msg ) ) ,
2022-09-15 10:48:32 +00:00
} , nil
}
2022-12-09 06:31:21 +00:00
configList := make ( [ ] * commonpb . KeyValuePair , 0 )
2023-01-13 07:31:41 +00:00
for key , value := range Params . GetComponentConfigurations ( "querycoord" , req . Pattern ) {
2022-09-15 10:48:32 +00:00
configList = append ( configList ,
& commonpb . KeyValuePair {
Key : key ,
Value : value ,
} )
}
return & internalpb . ShowConfigurationsResponse {
2023-10-11 13:01:35 +00:00
Status : merr . Success ( ) ,
2022-09-15 10:48:32 +00:00
Configuations : configList ,
} , nil
}
func ( s * Server ) GetMetrics ( ctx context . Context , req * milvuspb . GetMetricsRequest ) ( * milvuspb . GetMetricsResponse , error ) {
2022-11-14 07:29:06 +00:00
log := log . Ctx ( ctx )
2022-09-15 10:48:32 +00:00
2023-01-04 09:39:35 +00:00
log . RatedDebug ( 60 , "get metrics request received" ,
2022-09-15 10:48:32 +00:00
zap . String ( "metricType" , req . GetRequest ( ) ) )
2023-10-16 07:04:09 +00:00
if err := merr . CheckHealthyStandby ( s . State ( ) ) ; err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to get metrics"
2023-03-31 02:54:29 +00:00
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
return & milvuspb . GetMetricsResponse {
2023-07-17 06:59:34 +00:00
Status : merr . Status ( errors . Wrap ( err , msg ) ) ,
2022-09-15 10:48:32 +00:00
} , nil
}
resp := & milvuspb . GetMetricsResponse {
2023-10-11 13:01:35 +00:00
Status : merr . Success ( ) ,
2022-09-15 10:48:32 +00:00
ComponentName : metricsinfo . ConstructComponentName ( typeutil . QueryCoordRole ,
2022-11-04 06:25:38 +00:00
paramtable . GetNodeID ( ) ) ,
2022-09-15 10:48:32 +00:00
}
metricType , err := metricsinfo . ParseMetricType ( req . GetRequest ( ) )
if err != nil {
msg := "failed to parse metric type"
log . Warn ( msg , zap . Error ( err ) )
2023-07-17 06:59:34 +00:00
resp . Status = merr . Status ( errors . Wrap ( err , msg ) )
2022-09-15 10:48:32 +00:00
return resp , nil
}
if metricType != metricsinfo . SystemInfoMetrics {
msg := "invalid metric type"
err := errors . New ( metricsinfo . MsgUnimplementedMetric )
log . Warn ( msg , zap . Error ( err ) )
2023-07-17 06:59:34 +00:00
resp . Status = merr . Status ( errors . Wrap ( err , msg ) )
2022-09-15 10:48:32 +00:00
return resp , nil
}
2022-10-14 10:05:24 +00:00
resp . Response , err = s . getSystemInfoMetrics ( ctx , req )
2022-09-15 10:48:32 +00:00
if err != nil {
2022-10-14 10:05:24 +00:00
msg := "failed to get system info metrics"
log . Warn ( msg , zap . Error ( err ) )
2023-07-17 06:59:34 +00:00
resp . Status = merr . Status ( errors . Wrap ( err , msg ) )
2022-10-14 10:05:24 +00:00
return resp , nil
2022-09-15 10:48:32 +00:00
}
2022-10-14 10:05:24 +00:00
return resp , nil
2022-09-15 10:48:32 +00:00
}
func ( s * Server ) GetReplicas ( ctx context . Context , req * milvuspb . GetReplicasRequest ) ( * milvuspb . GetReplicasResponse , error ) {
2022-11-14 07:29:06 +00:00
log := log . Ctx ( ctx ) . With (
2022-09-15 10:48:32 +00:00
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
)
log . Info ( "get replicas request received" , zap . Bool ( "with-shard-nodes" , req . GetWithShardNodes ( ) ) )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to get replicas"
2023-03-31 02:54:29 +00:00
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
return & milvuspb . GetReplicasResponse {
2023-07-17 06:59:34 +00:00
Status : merr . Status ( errors . Wrap ( err , msg ) ) ,
2022-09-15 10:48:32 +00:00
} , nil
}
resp := & milvuspb . GetReplicasResponse {
2023-10-11 13:01:35 +00:00
Status : merr . Success ( ) ,
2022-09-15 10:48:32 +00:00
Replicas : make ( [ ] * milvuspb . ReplicaInfo , 0 ) ,
}
replicas := s . meta . ReplicaManager . GetByCollection ( req . GetCollectionID ( ) )
if len ( replicas ) == 0 {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrReplicaNotFound ( req . GetCollectionID ( ) , "failed to get replicas by collection" )
2022-09-15 10:48:32 +00:00
msg := "failed to get replicas, collection not loaded"
log . Warn ( msg )
2023-07-17 06:59:34 +00:00
resp . Status = merr . Status ( err )
2022-09-15 10:48:32 +00:00
return resp , nil
}
for _ , replica := range replicas {
2023-05-22 03:59:26 +00:00
msg := "failed to get replica info"
if len ( replica . GetNodes ( ) ) == 0 {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrReplicaNotAvailable ( replica . GetID ( ) , "no available nodes in replica" )
2023-05-22 03:59:26 +00:00
log . Warn ( msg ,
zap . Int64 ( "replica" , replica . GetID ( ) ) ,
zap . Error ( err ) )
2023-07-17 06:59:34 +00:00
resp . Status = merr . Status ( err )
2023-05-22 03:59:26 +00:00
break
}
2022-09-15 10:48:32 +00:00
info , err := s . fillReplicaInfo ( replica , req . GetWithShardNodes ( ) )
if err != nil {
log . Warn ( msg ,
zap . Int64 ( "replica" , replica . GetID ( ) ) ,
zap . Error ( err ) )
2023-07-17 06:59:34 +00:00
resp . Status = merr . Status ( err )
2023-05-22 03:59:26 +00:00
break
2022-09-15 10:48:32 +00:00
}
resp . Replicas = append ( resp . Replicas , info )
}
return resp , nil
}
func ( s * Server ) GetShardLeaders ( ctx context . Context , req * querypb . GetShardLeadersRequest ) ( * querypb . GetShardLeadersResponse , error ) {
2023-12-04 10:52:32 +00:00
log := log . Ctx ( ctx ) . WithRateGroup ( "qcv2.GetShardLeaders" , 1 , 60 ) . With (
2022-09-15 10:48:32 +00:00
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
)
2023-12-04 10:52:32 +00:00
log . RatedInfo ( 10 , "get shard leaders request received" )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2022-09-15 10:48:32 +00:00
msg := "failed to get shard leaders"
2023-03-31 02:54:29 +00:00
log . Warn ( msg , zap . Error ( err ) )
2022-09-15 10:48:32 +00:00
return & querypb . GetShardLeadersResponse {
2023-07-17 06:59:34 +00:00
Status : merr . Status ( errors . Wrap ( err , msg ) ) ,
2022-09-15 10:48:32 +00:00
} , nil
}
resp := & querypb . GetShardLeadersResponse {
2023-10-11 13:01:35 +00:00
Status : merr . Success ( ) ,
2022-09-15 10:48:32 +00:00
}
2023-05-09 02:36:41 +00:00
percentage := s . meta . CollectionManager . CalculateLoadPercentage ( req . GetCollectionID ( ) )
if percentage < 0 {
err := merr . WrapErrCollectionNotLoaded ( req . GetCollectionID ( ) )
log . Warn ( "failed to GetShardLeaders" , zap . Error ( err ) )
resp . Status = merr . Status ( err )
return resp , nil
}
2023-07-03 08:58:28 +00:00
collection := s . meta . CollectionManager . GetCollection ( req . GetCollectionID ( ) )
if collection . GetStatus ( ) == querypb . LoadStatus_Loaded {
// when collection is loaded, regard collection as readable, set percentage == 100
percentage = 100
}
2023-05-09 02:36:41 +00:00
if percentage < 100 {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrCollectionNotFullyLoaded ( req . GetCollectionID ( ) )
2022-09-15 10:48:32 +00:00
msg := fmt . Sprintf ( "collection %v is not fully loaded" , req . GetCollectionID ( ) )
log . Warn ( msg )
2023-07-17 06:59:34 +00:00
resp . Status = merr . Status ( err )
2022-09-15 10:48:32 +00:00
return resp , nil
}
2022-11-07 11:37:04 +00:00
channels := s . targetMgr . GetDmChannelsByCollection ( req . GetCollectionID ( ) , meta . CurrentTarget )
2022-09-15 10:48:32 +00:00
if len ( channels ) == 0 {
msg := "failed to get channels"
2023-07-17 06:59:34 +00:00
err := merr . WrapErrCollectionNotLoaded ( req . GetCollectionID ( ) )
2023-03-09 07:47:52 +00:00
log . Warn ( msg , zap . Error ( err ) )
2023-07-17 06:59:34 +00:00
resp . Status = merr . Status ( err )
2022-09-15 10:48:32 +00:00
return resp , nil
}
2023-10-24 16:44:12 +00:00
currentTargets := s . targetMgr . GetSealedSegmentsByCollection ( req . GetCollectionID ( ) , meta . CurrentTarget )
2022-09-15 10:48:32 +00:00
for _ , channel := range channels {
log := log . With ( zap . String ( "channel" , channel . GetChannelName ( ) ) )
leaders := s . dist . LeaderViewManager . GetLeadersByShard ( channel . GetChannelName ( ) )
2023-12-28 07:22:51 +00:00
readableLeaders := make ( map [ int64 ] * meta . LeaderView )
2022-12-06 10:05:18 +00:00
2022-12-08 11:09:18 +00:00
var channelErr error
2023-07-24 06:13:02 +00:00
if len ( leaders ) == 0 {
channelErr = merr . WrapErrChannelLack ( "channel not subscribed" )
}
2022-12-08 11:09:18 +00:00
2022-12-06 10:05:18 +00:00
// In a replica, a shard is available, if and only if:
// 1. The leader is online
// 2. All QueryNodes in the distribution are online
// 3. The last heartbeat response time is within HeartbeatAvailableInterval for all QueryNodes(include leader) in the distribution
// 4. All segments of the shard in target should be in the distribution
2022-09-15 10:48:32 +00:00
for _ , leader := range leaders {
2022-12-08 11:09:18 +00:00
log := log . With ( zap . Int64 ( "leaderID" , leader . ID ) )
2022-09-15 10:48:32 +00:00
info := s . nodeMgr . Get ( leader . ID )
2022-12-06 10:05:18 +00:00
// Check whether leader is online
2022-12-08 11:09:18 +00:00
err := checkNodeAvailable ( leader . ID , info )
if err != nil {
log . Info ( "leader is not available" , zap . Error ( err ) )
multierr . AppendInto ( & channelErr , fmt . Errorf ( "leader not available: %w" , err ) )
2022-09-15 10:48:32 +00:00
continue
}
2022-12-06 10:05:18 +00:00
// Check whether QueryNodes are online and available
isAvailable := true
2023-08-11 03:21:32 +00:00
for id , version := range leader . Segments {
2022-12-08 11:09:18 +00:00
info := s . nodeMgr . Get ( version . GetNodeID ( ) )
err = checkNodeAvailable ( version . GetNodeID ( ) , info )
if err != nil {
2023-08-11 03:21:32 +00:00
log . Info ( "leader is not available due to QueryNode unavailable" ,
zap . Int64 ( "segmentID" , id ) ,
zap . Error ( err ) )
2022-12-06 10:05:18 +00:00
isAvailable = false
2022-12-08 11:09:18 +00:00
multierr . AppendInto ( & channelErr , err )
2022-09-15 10:48:32 +00:00
break
}
}
2022-12-06 10:05:18 +00:00
2022-12-08 11:09:18 +00:00
// Avoid iterating all segments if any QueryNode unavailable
if ! isAvailable {
continue
}
2022-12-06 10:05:18 +00:00
// Check whether segments are fully loaded
for segmentID , info := range currentTargets {
if info . GetInsertChannel ( ) != leader . Channel {
continue
}
_ , exist := leader . Segments [ segmentID ]
if ! exist {
2022-12-08 11:09:18 +00:00
log . Info ( "leader is not available due to lack of segment" , zap . Int64 ( "segmentID" , segmentID ) )
2023-07-17 06:59:34 +00:00
multierr . AppendInto ( & channelErr , merr . WrapErrSegmentLack ( segmentID ) )
2022-12-06 10:05:18 +00:00
isAvailable = false
break
}
}
if ! isAvailable {
2022-09-15 10:48:32 +00:00
continue
}
2022-12-06 10:05:18 +00:00
2023-12-28 07:22:51 +00:00
readableLeaders [ leader . ID ] = leader
2022-09-15 10:48:32 +00:00
}
2023-12-28 07:22:51 +00:00
if len ( readableLeaders ) == 0 {
2022-09-15 10:48:32 +00:00
msg := fmt . Sprintf ( "channel %s is not available in any replica" , channel . GetChannelName ( ) )
2022-12-08 11:09:18 +00:00
log . Warn ( msg , zap . Error ( channelErr ) )
2023-07-17 06:59:34 +00:00
resp . Status = merr . Status (
errors . Wrap ( merr . WrapErrChannelNotAvailable ( channel . GetChannelName ( ) ) , channelErr . Error ( ) ) )
2022-09-15 10:48:32 +00:00
resp . Shards = nil
return resp , nil
}
2023-12-28 07:22:51 +00:00
readableLeaders = filterDupLeaders ( s . meta . ReplicaManager , readableLeaders )
ids := make ( [ ] int64 , 0 , len ( leaders ) )
addrs := make ( [ ] string , 0 , len ( leaders ) )
for _ , leader := range readableLeaders {
info := s . nodeMgr . Get ( leader . ID )
ids = append ( ids , info . ID ( ) )
addrs = append ( addrs , info . Addr ( ) )
}
2022-09-15 10:48:32 +00:00
resp . Shards = append ( resp . Shards , & querypb . ShardLeadersList {
ChannelName : channel . GetChannelName ( ) ,
NodeIds : ids ,
NodeAddrs : addrs ,
} )
}
2022-12-06 10:05:18 +00:00
2022-09-15 10:48:32 +00:00
return resp , nil
}
2022-10-18 05:39:26 +00:00
func ( s * Server ) CheckHealth ( ctx context . Context , req * milvuspb . CheckHealthRequest ) ( * milvuspb . CheckHealthResponse , error ) {
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2023-10-11 13:01:35 +00:00
return & milvuspb . CheckHealthResponse { Status : merr . Status ( err ) , IsHealthy : false , Reasons : [ ] string { err . Error ( ) } } , nil
2022-10-18 05:39:26 +00:00
}
2023-04-20 01:52:31 +00:00
errReasons , err := s . checkNodeHealth ( ctx )
if err != nil || len ( errReasons ) != 0 {
2023-10-11 13:01:35 +00:00
return & milvuspb . CheckHealthResponse { Status : merr . Success ( ) , IsHealthy : false , Reasons : errReasons } , nil
2023-04-20 01:52:31 +00:00
}
2023-10-11 13:01:35 +00:00
return & milvuspb . CheckHealthResponse { Status : merr . Success ( ) , IsHealthy : true , Reasons : errReasons } , nil
2023-04-20 01:52:31 +00:00
}
func ( s * Server ) checkNodeHealth ( ctx context . Context ) ( [ ] string , error ) {
2022-10-18 05:39:26 +00:00
group , ctx := errgroup . WithContext ( ctx )
2023-04-20 01:52:31 +00:00
errReasons := make ( [ ] string , 0 )
2022-10-18 05:39:26 +00:00
mu := & sync . Mutex { }
for _ , node := range s . nodeMgr . GetAll ( ) {
node := node
group . Go ( func ( ) error {
resp , err := s . cluster . GetComponentStates ( ctx , node . ID ( ) )
2023-10-11 13:01:35 +00:00
if err != nil {
return err
}
err = merr . AnalyzeState ( "QueryNode" , node . ID ( ) , resp )
if err != nil {
2022-10-18 05:39:26 +00:00
mu . Lock ( )
defer mu . Unlock ( )
2023-10-11 13:01:35 +00:00
errReasons = append ( errReasons , err . Error ( ) )
2022-10-18 05:39:26 +00:00
}
2023-10-11 13:01:35 +00:00
return nil
2022-10-18 05:39:26 +00:00
} )
}
err := group . Wait ( )
2023-04-20 01:52:31 +00:00
return errReasons , err
2022-10-18 05:39:26 +00:00
}
2023-01-30 02:19:48 +00:00
func ( s * Server ) CreateResourceGroup ( ctx context . Context , req * milvuspb . CreateResourceGroupRequest ) ( * commonpb . Status , error ) {
log := log . Ctx ( ctx ) . With (
zap . String ( "rgName" , req . GetResourceGroup ( ) ) ,
)
log . Info ( "create resource group request received" )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
log . Warn ( "failed to create resource group" , zap . Error ( err ) )
return merr . Status ( err ) , nil
2023-01-30 02:19:48 +00:00
}
err := s . meta . ResourceManager . AddResourceGroup ( req . GetResourceGroup ( ) )
if err != nil {
2023-03-31 02:54:29 +00:00
log . Warn ( "failed to create resource group" , zap . Error ( err ) )
return merr . Status ( err ) , nil
2023-01-30 02:19:48 +00:00
}
2023-10-11 13:01:35 +00:00
return merr . Success ( ) , nil
2023-01-30 02:19:48 +00:00
}
func ( s * Server ) DropResourceGroup ( ctx context . Context , req * milvuspb . DropResourceGroupRequest ) ( * commonpb . Status , error ) {
log := log . Ctx ( ctx ) . With (
zap . String ( "rgName" , req . GetResourceGroup ( ) ) ,
)
log . Info ( "drop resource group request received" )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
log . Warn ( "failed to drop resource group" , zap . Error ( err ) )
return merr . Status ( err ) , nil
2023-01-30 02:19:48 +00:00
}
2023-02-21 11:06:27 +00:00
replicas := s . meta . ReplicaManager . GetByResourceGroup ( req . GetResourceGroup ( ) )
if len ( replicas ) > 0 {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrParameterInvalid ( "empty resource group" , fmt . Sprintf ( "resource group %s has collection %d loaded" , req . GetResourceGroup ( ) , replicas [ 0 ] . GetCollectionID ( ) ) )
return merr . Status ( errors . Wrap ( err ,
fmt . Sprintf ( "some replicas still loaded in resource group[%s], release it first" , req . GetResourceGroup ( ) ) ) ) , nil
2023-02-21 11:06:27 +00:00
}
2023-01-30 02:19:48 +00:00
err := s . meta . ResourceManager . RemoveResourceGroup ( req . GetResourceGroup ( ) )
if err != nil {
2023-03-31 02:54:29 +00:00
log . Warn ( "failed to drop resource group" , zap . Error ( err ) )
2023-07-17 06:59:34 +00:00
return merr . Status ( err ) , nil
2023-01-30 02:19:48 +00:00
}
2023-10-11 13:01:35 +00:00
return merr . Success ( ) , nil
2023-01-30 02:19:48 +00:00
}
func ( s * Server ) TransferNode ( ctx context . Context , req * milvuspb . TransferNodeRequest ) ( * commonpb . Status , error ) {
log := log . Ctx ( ctx ) . With (
zap . String ( "source" , req . GetSourceResourceGroup ( ) ) ,
zap . String ( "target" , req . GetTargetResourceGroup ( ) ) ,
2023-02-14 08:16:34 +00:00
zap . Int32 ( "numNode" , req . GetNumNode ( ) ) ,
2023-01-30 02:19:48 +00:00
)
log . Info ( "transfer node between resource group request received" )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
log . Warn ( "failed to transfer node between resource group" , zap . Error ( err ) )
return merr . Status ( err ) , nil
2023-01-30 02:19:48 +00:00
}
if ok := s . meta . ResourceManager . ContainResourceGroup ( req . GetSourceResourceGroup ( ) ) ; ! ok {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrParameterInvalid ( "valid resource group" , req . GetSourceResourceGroup ( ) , "source resource group not found" )
return merr . Status ( err ) , nil
2023-01-30 02:19:48 +00:00
}
if ok := s . meta . ResourceManager . ContainResourceGroup ( req . GetTargetResourceGroup ( ) ) ; ! ok {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrParameterInvalid ( "valid resource group" , req . GetTargetResourceGroup ( ) , "target resource group not found" )
return merr . Status ( err ) , nil
2023-01-30 02:19:48 +00:00
}
2023-02-21 11:06:27 +00:00
if req . GetNumNode ( ) <= 0 {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrParameterInvalid ( "NumNode > 0" , fmt . Sprintf ( "invalid NumNode %d" , req . GetNumNode ( ) ) )
return merr . Status ( err ) , nil
2023-02-21 11:06:27 +00:00
}
2023-02-23 06:15:45 +00:00
replicasInSource := s . meta . ReplicaManager . GetByResourceGroup ( req . GetSourceResourceGroup ( ) )
replicasInTarget := s . meta . ReplicaManager . GetByResourceGroup ( req . GetTargetResourceGroup ( ) )
loadSameCollection := false
2023-07-17 06:59:34 +00:00
sameCollectionID := int64 ( 0 )
2023-02-23 06:15:45 +00:00
for _ , r1 := range replicasInSource {
for _ , r2 := range replicasInTarget {
if r1 . GetCollectionID ( ) == r2 . GetCollectionID ( ) {
loadSameCollection = true
2023-07-17 06:59:34 +00:00
sameCollectionID = r1 . GetCollectionID ( )
2023-02-23 06:15:45 +00:00
}
}
}
if loadSameCollection {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrParameterInvalid ( "resource groups load not the same collection" , fmt . Sprintf ( "collection %d loaded for both" , sameCollectionID ) )
return merr . Status ( err ) , nil
2023-02-23 06:15:45 +00:00
}
nodes , err := s . meta . ResourceManager . TransferNode ( req . GetSourceResourceGroup ( ) , req . GetTargetResourceGroup ( ) , int ( req . GetNumNode ( ) ) )
2023-01-30 02:19:48 +00:00
if err != nil {
2023-07-17 06:59:34 +00:00
log . Warn ( "failed to transfer node" , zap . Error ( err ) )
return merr . Status ( err ) , nil
2023-01-30 02:19:48 +00:00
}
2023-02-23 06:15:45 +00:00
utils . AddNodesToCollectionsInRG ( s . meta , req . GetTargetResourceGroup ( ) , nodes ... )
2023-10-11 13:01:35 +00:00
return merr . Success ( ) , nil
2023-01-30 02:19:48 +00:00
}
func ( s * Server ) TransferReplica ( ctx context . Context , req * querypb . TransferReplicaRequest ) ( * commonpb . Status , error ) {
log := log . Ctx ( ctx ) . With (
zap . String ( "source" , req . GetSourceResourceGroup ( ) ) ,
zap . String ( "target" , req . GetTargetResourceGroup ( ) ) ,
zap . Int64 ( "collectionID" , req . GetCollectionID ( ) ) ,
)
log . Info ( "transfer replica request received" )
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
log . Warn ( "failed to transfer replica between resource group" , zap . Error ( err ) )
return merr . Status ( err ) , nil
2023-01-30 02:19:48 +00:00
}
if ok := s . meta . ResourceManager . ContainResourceGroup ( req . GetSourceResourceGroup ( ) ) ; ! ok {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrResourceGroupNotFound ( req . GetSourceResourceGroup ( ) )
return merr . Status ( errors . Wrap ( err ,
fmt . Sprintf ( "the source resource group[%s] doesn't exist" , req . GetSourceResourceGroup ( ) ) ) ) , nil
2023-01-30 02:19:48 +00:00
}
if ok := s . meta . ResourceManager . ContainResourceGroup ( req . GetTargetResourceGroup ( ) ) ; ! ok {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrResourceGroupNotFound ( req . GetTargetResourceGroup ( ) )
return merr . Status ( errors . Wrap ( err ,
fmt . Sprintf ( "the target resource group[%s] doesn't exist" , req . GetTargetResourceGroup ( ) ) ) ) , nil
2023-01-30 02:19:48 +00:00
}
2023-03-08 10:57:51 +00:00
if req . GetNumReplica ( ) <= 0 {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrParameterInvalid ( "NumReplica > 0" , fmt . Sprintf ( "invalid NumReplica %d" , req . GetNumReplica ( ) ) )
return merr . Status ( err ) , nil
2023-03-08 10:57:51 +00:00
}
replicas := s . meta . ReplicaManager . GetByCollectionAndRG ( req . GetCollectionID ( ) , req . GetSourceResourceGroup ( ) )
if len ( replicas ) < int ( req . GetNumReplica ( ) ) {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrParameterInvalid ( "NumReplica not greater than the number of replica in source resource group" , fmt . Sprintf ( "only found [%d] replicas in source resource group[%s]" ,
len ( replicas ) , req . GetSourceResourceGroup ( ) ) )
return merr . Status ( err ) , nil
2023-03-01 03:11:47 +00:00
}
replicas = s . meta . ReplicaManager . GetByCollectionAndRG ( req . GetCollectionID ( ) , req . GetTargetResourceGroup ( ) )
2023-02-16 02:48:34 +00:00
if len ( replicas ) > 0 {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrParameterInvalid ( "no same collection in target resource group" , fmt . Sprintf ( "found [%d] replicas of same collection in target resource group[%s], dynamically increase replica num is unsupported" ,
len ( replicas ) , req . GetTargetResourceGroup ( ) ) )
return merr . Status ( err ) , nil
2023-02-21 11:06:27 +00:00
}
2023-03-08 10:57:51 +00:00
replicas = s . meta . ReplicaManager . GetByCollection ( req . GetCollectionID ( ) )
if ( req . GetSourceResourceGroup ( ) == meta . DefaultResourceGroupName || req . GetTargetResourceGroup ( ) == meta . DefaultResourceGroupName ) &&
len ( replicas ) != int ( req . GetNumReplica ( ) ) {
2023-07-17 06:59:34 +00:00
err := merr . WrapErrParameterInvalid ( "tranfer all replicas from/to default resource group" ,
fmt . Sprintf ( "try to transfer %d replicas from/to but %d replicas exist" , req . GetNumReplica ( ) , len ( replicas ) ) )
return merr . Status ( err ) , nil
2023-01-30 02:19:48 +00:00
}
err := s . transferReplica ( req . GetTargetResourceGroup ( ) , replicas [ : req . GetNumReplica ( ) ] )
if err != nil {
2023-07-17 06:59:34 +00:00
return merr . Status ( err ) , nil
2023-01-30 02:19:48 +00:00
}
2023-10-11 13:01:35 +00:00
return merr . Success ( ) , nil
2023-01-30 02:19:48 +00:00
}
func ( s * Server ) transferReplica ( targetRG string , replicas [ ] * meta . Replica ) error {
ret := make ( [ ] * meta . Replica , 0 )
for _ , replica := range replicas {
newReplica := replica . Clone ( )
newReplica . ResourceGroup = targetRG
ret = append ( ret , newReplica )
}
err := utils . AssignNodesToReplicas ( s . meta , targetRG , ret ... )
if err != nil {
return err
}
return s . meta . ReplicaManager . Put ( ret ... )
}
func ( s * Server ) ListResourceGroups ( ctx context . Context , req * milvuspb . ListResourceGroupsRequest ) ( * milvuspb . ListResourceGroupsResponse , error ) {
log := log . Ctx ( ctx )
log . Info ( "list resource group request received" )
resp := & milvuspb . ListResourceGroupsResponse {
2023-10-11 13:01:35 +00:00
Status : merr . Success ( ) ,
2023-01-30 02:19:48 +00:00
}
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
log . Warn ( "failed to list resource group" , zap . Error ( err ) )
resp . Status = merr . Status ( err )
2023-01-30 02:19:48 +00:00
return resp , nil
}
resp . ResourceGroups = s . meta . ResourceManager . ListResourceGroups ( )
return resp , nil
}
func ( s * Server ) DescribeResourceGroup ( ctx context . Context , req * querypb . DescribeResourceGroupRequest ) ( * querypb . DescribeResourceGroupResponse , error ) {
log := log . Ctx ( ctx ) . With (
zap . String ( "rgName" , req . GetResourceGroup ( ) ) ,
)
log . Info ( "describe resource group request received" )
resp := & querypb . DescribeResourceGroupResponse {
2023-10-11 13:01:35 +00:00
Status : merr . Success ( ) ,
2023-01-30 02:19:48 +00:00
}
2023-03-31 02:54:29 +00:00
if err := merr . CheckHealthy ( s . State ( ) ) ; err != nil {
2023-07-17 06:59:34 +00:00
log . Warn ( "failed to describe resource group" , zap . Error ( err ) )
2023-03-31 02:54:29 +00:00
resp . Status = merr . Status ( err )
2023-01-30 02:19:48 +00:00
return resp , nil
}
rg , err := s . meta . ResourceManager . GetResourceGroup ( req . GetResourceGroup ( ) )
if err != nil {
2023-07-17 06:59:34 +00:00
resp . Status = merr . Status ( err )
2023-01-30 02:19:48 +00:00
return resp , nil
}
loadedReplicas := make ( map [ int64 ] int32 )
outgoingNodes := make ( map [ int64 ] int32 )
replicasInRG := s . meta . GetByResourceGroup ( req . GetResourceGroup ( ) )
for _ , replica := range replicasInRG {
loadedReplicas [ replica . GetCollectionID ( ) ] ++
for _ , node := range replica . GetNodes ( ) {
if ! s . meta . ContainsNode ( replica . GetResourceGroup ( ) , node ) {
outgoingNodes [ replica . GetCollectionID ( ) ] ++
}
}
}
incomingNodes := make ( map [ int64 ] int32 )
collections := s . meta . GetAll ( )
for _ , collection := range collections {
replicas := s . meta . GetByCollection ( collection )
for _ , replica := range replicas {
if replica . GetResourceGroup ( ) == req . GetResourceGroup ( ) {
continue
}
for _ , node := range replica . GetNodes ( ) {
if s . meta . ContainsNode ( req . GetResourceGroup ( ) , node ) {
incomingNodes [ collection ] ++
}
}
}
}
resp . ResourceGroup = & querypb . ResourceGroupInfo {
Name : req . GetResourceGroup ( ) ,
Capacity : int32 ( rg . GetCapacity ( ) ) ,
NumAvailableNode : int32 ( len ( rg . GetNodes ( ) ) ) ,
NumLoadedReplica : loadedReplicas ,
NumOutgoingNode : outgoingNodes ,
NumIncomingNode : incomingNodes ,
}
return resp , nil
}