mirror of https://github.com/milvus-io/milvus.git
parent
5f30c8bc64
commit
926c88ff18
2
go.mod
2
go.mod
|
@ -28,7 +28,7 @@ require (
|
|||
github.com/klauspost/compress v1.14.2
|
||||
github.com/lingdor/stackerror v0.0.0-20191119040541-976d8885ed76
|
||||
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d
|
||||
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230119003339-ef21525213cb
|
||||
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230131100146-10289a199a32
|
||||
github.com/minio/minio-go/v7 v7.0.17
|
||||
github.com/opentracing/opentracing-go v1.2.0
|
||||
github.com/panjf2000/ants/v2 v2.4.8
|
||||
|
|
6
go.sum
6
go.sum
|
@ -490,8 +490,10 @@ github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyex
|
|||
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
|
||||
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8=
|
||||
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
|
||||
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230119003339-ef21525213cb h1:uk6L3CO1lHY7mFR7KS950MwV1r5ERbpzKh1hEieQ5rY=
|
||||
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230119003339-ef21525213cb/go.mod h1:148qnlmZ0Fdm1Fq+Mj/OW2uDoEP25g3mjh0vMGtkgmk=
|
||||
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230131092640-7aebd990ebac h1:RNEjVzLuE3XQQVzfdt/YOEtqXoM5JnA0sSw58ES0MoI=
|
||||
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230131092640-7aebd990ebac/go.mod h1:148qnlmZ0Fdm1Fq+Mj/OW2uDoEP25g3mjh0vMGtkgmk=
|
||||
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230131100146-10289a199a32 h1:cLW/zBbJQWNtux+Cjx4RniB70iw3T5ut8/D393YNtz0=
|
||||
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230131100146-10289a199a32/go.mod h1:148qnlmZ0Fdm1Fq+Mj/OW2uDoEP25g3mjh0vMGtkgmk=
|
||||
github.com/milvus-io/pulsar-client-go v0.6.8 h1:fZdZH73aPRszu2fazyeeahQEz34tyn1Pt9EkqJmV100=
|
||||
github.com/milvus-io/pulsar-client-go v0.6.8/go.mod h1:oFIlYIk23tamkSLttw849qphmMIpHY8ztEBWDWJW+sc=
|
||||
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
|
||||
|
|
|
@ -403,7 +403,7 @@ const char descriptor_table_protodef_common_2eproto[] PROTOBUF_SECTION_VARIABLE(
|
|||
"\n\n\006Sealed\020\003\022\013\n\007Flushed\020\004\022\014\n\010Flushing\020\005\022\013"
|
||||
"\n\007Dropped\020\006\022\r\n\tImporting\020\007*>\n\017Placeholde"
|
||||
"rType\022\010\n\004None\020\000\022\020\n\014BinaryVector\020d\022\017\n\013Flo"
|
||||
"atVector\020e*\215\r\n\007MsgType\022\r\n\tUndefined\020\000\022\024\n"
|
||||
"atVector\020e*\235\016\n\007MsgType\022\r\n\tUndefined\020\000\022\024\n"
|
||||
"\020CreateCollection\020d\022\022\n\016DropCollection\020e\022"
|
||||
"\021\n\rHasCollection\020f\022\026\n\022DescribeCollection"
|
||||
"\020g\022\023\n\017ShowCollections\020h\022\024\n\020GetSystemConf"
|
||||
|
@ -445,42 +445,46 @@ const char descriptor_table_protodef_common_2eproto[] PROTOBUF_SECTION_VARIABLE(
|
|||
"le\020\303\014\022\017\n\nSelectUser\020\304\014\022\023\n\016SelectResource"
|
||||
"\020\305\014\022\025\n\020OperatePrivilege\020\306\014\022\020\n\013SelectGran"
|
||||
"t\020\307\014\022\033\n\026RefreshPolicyInfoCache\020\310\014\022\017\n\nLis"
|
||||
"tPolicy\020\311\014*\"\n\007DslType\022\007\n\003Dsl\020\000\022\016\n\nBoolEx"
|
||||
"prV1\020\001*B\n\017CompactionState\022\021\n\rUndefiedSta"
|
||||
"te\020\000\022\r\n\tExecuting\020\001\022\r\n\tCompleted\020\002*X\n\020Co"
|
||||
"nsistencyLevel\022\n\n\006Strong\020\000\022\013\n\007Session\020\001\022"
|
||||
"\013\n\007Bounded\020\002\022\016\n\nEventually\020\003\022\016\n\nCustomiz"
|
||||
"ed\020\004*\213\001\n\013ImportState\022\021\n\rImportPending\020\000\022"
|
||||
"\020\n\014ImportFailed\020\001\022\021\n\rImportStarted\020\002\022\023\n\017"
|
||||
"ImportPersisted\020\005\022\023\n\017ImportCompleted\020\006\022\032"
|
||||
"\n\026ImportFailedAndCleaned\020\007*2\n\nObjectType"
|
||||
"\022\016\n\nCollection\020\000\022\n\n\006Global\020\001\022\010\n\004User\020\002*\206"
|
||||
"\005\n\017ObjectPrivilege\022\020\n\014PrivilegeAll\020\000\022\035\n\031"
|
||||
"PrivilegeCreateCollection\020\001\022\033\n\027Privilege"
|
||||
"DropCollection\020\002\022\037\n\033PrivilegeDescribeCol"
|
||||
"lection\020\003\022\034\n\030PrivilegeShowCollections\020\004\022"
|
||||
"\021\n\rPrivilegeLoad\020\005\022\024\n\020PrivilegeRelease\020\006"
|
||||
"\022\027\n\023PrivilegeCompaction\020\007\022\023\n\017PrivilegeIn"
|
||||
"sert\020\010\022\023\n\017PrivilegeDelete\020\t\022\032\n\026Privilege"
|
||||
"GetStatistics\020\n\022\030\n\024PrivilegeCreateIndex\020"
|
||||
"\013\022\030\n\024PrivilegeIndexDetail\020\014\022\026\n\022Privilege"
|
||||
"DropIndex\020\r\022\023\n\017PrivilegeSearch\020\016\022\022\n\016Priv"
|
||||
"ilegeFlush\020\017\022\022\n\016PrivilegeQuery\020\020\022\030\n\024Priv"
|
||||
"ilegeLoadBalance\020\021\022\023\n\017PrivilegeImport\020\022\022"
|
||||
"\034\n\030PrivilegeCreateOwnership\020\023\022\027\n\023Privile"
|
||||
"geUpdateUser\020\024\022\032\n\026PrivilegeDropOwnership"
|
||||
"\020\025\022\034\n\030PrivilegeSelectOwnership\020\026\022\034\n\030Priv"
|
||||
"ilegeManageOwnership\020\027\022\027\n\023PrivilegeSelec"
|
||||
"tUser\020\030*S\n\tStateCode\022\020\n\014Initializing\020\000\022\013"
|
||||
"\n\007Healthy\020\001\022\014\n\010Abnormal\020\002\022\013\n\007StandBy\020\003\022\014"
|
||||
"\n\010Stopping\020\004*c\n\tLoadState\022\025\n\021LoadStateNo"
|
||||
"tExist\020\000\022\024\n\020LoadStateNotLoad\020\001\022\024\n\020LoadSt"
|
||||
"ateLoading\020\002\022\023\n\017LoadStateLoaded\020\003:^\n\021pri"
|
||||
"vilege_ext_obj\022\037.google.protobuf.Message"
|
||||
"Options\030\351\007 \001(\0132!.milvus.proto.common.Pri"
|
||||
"vilegeExtBU\n\016io.milvus.grpcB\013CommonProto"
|
||||
"P\001Z1github.com/milvus-io/milvus-proto/go"
|
||||
"-api/commonpb\240\001\001b\006proto3"
|
||||
"tPolicy\020\311\014\022\030\n\023CreateResourceGroup\020\244\r\022\026\n\021"
|
||||
"DropResourceGroup\020\245\r\022\027\n\022ListResourceGrou"
|
||||
"ps\020\246\r\022\032\n\025DescribeResourceGroup\020\247\r\022\021\n\014Tra"
|
||||
"nsferNode\020\250\r\022\024\n\017TransferReplica\020\251\r*\"\n\007Ds"
|
||||
"lType\022\007\n\003Dsl\020\000\022\016\n\nBoolExprV1\020\001*B\n\017Compac"
|
||||
"tionState\022\021\n\rUndefiedState\020\000\022\r\n\tExecutin"
|
||||
"g\020\001\022\r\n\tCompleted\020\002*X\n\020ConsistencyLevel\022\n"
|
||||
"\n\006Strong\020\000\022\013\n\007Session\020\001\022\013\n\007Bounded\020\002\022\016\n\n"
|
||||
"Eventually\020\003\022\016\n\nCustomized\020\004*\213\001\n\013ImportS"
|
||||
"tate\022\021\n\rImportPending\020\000\022\020\n\014ImportFailed\020"
|
||||
"\001\022\021\n\rImportStarted\020\002\022\023\n\017ImportPersisted\020"
|
||||
"\005\022\023\n\017ImportCompleted\020\006\022\032\n\026ImportFailedAn"
|
||||
"dCleaned\020\007*2\n\nObjectType\022\016\n\nCollection\020\000"
|
||||
"\022\n\n\006Global\020\001\022\010\n\004User\020\002*\206\005\n\017ObjectPrivile"
|
||||
"ge\022\020\n\014PrivilegeAll\020\000\022\035\n\031PrivilegeCreateC"
|
||||
"ollection\020\001\022\033\n\027PrivilegeDropCollection\020\002"
|
||||
"\022\037\n\033PrivilegeDescribeCollection\020\003\022\034\n\030Pri"
|
||||
"vilegeShowCollections\020\004\022\021\n\rPrivilegeLoad"
|
||||
"\020\005\022\024\n\020PrivilegeRelease\020\006\022\027\n\023PrivilegeCom"
|
||||
"paction\020\007\022\023\n\017PrivilegeInsert\020\010\022\023\n\017Privil"
|
||||
"egeDelete\020\t\022\032\n\026PrivilegeGetStatistics\020\n\022"
|
||||
"\030\n\024PrivilegeCreateIndex\020\013\022\030\n\024PrivilegeIn"
|
||||
"dexDetail\020\014\022\026\n\022PrivilegeDropIndex\020\r\022\023\n\017P"
|
||||
"rivilegeSearch\020\016\022\022\n\016PrivilegeFlush\020\017\022\022\n\016"
|
||||
"PrivilegeQuery\020\020\022\030\n\024PrivilegeLoadBalance"
|
||||
"\020\021\022\023\n\017PrivilegeImport\020\022\022\034\n\030PrivilegeCrea"
|
||||
"teOwnership\020\023\022\027\n\023PrivilegeUpdateUser\020\024\022\032"
|
||||
"\n\026PrivilegeDropOwnership\020\025\022\034\n\030PrivilegeS"
|
||||
"electOwnership\020\026\022\034\n\030PrivilegeManageOwner"
|
||||
"ship\020\027\022\027\n\023PrivilegeSelectUser\020\030*S\n\tState"
|
||||
"Code\022\020\n\014Initializing\020\000\022\013\n\007Healthy\020\001\022\014\n\010A"
|
||||
"bnormal\020\002\022\013\n\007StandBy\020\003\022\014\n\010Stopping\020\004*c\n\t"
|
||||
"LoadState\022\025\n\021LoadStateNotExist\020\000\022\024\n\020Load"
|
||||
"StateNotLoad\020\001\022\024\n\020LoadStateLoading\020\002\022\023\n\017"
|
||||
"LoadStateLoaded\020\003:^\n\021privilege_ext_obj\022\037"
|
||||
".google.protobuf.MessageOptions\030\351\007 \001(\0132!"
|
||||
".milvus.proto.common.PrivilegeExtBU\n\016io."
|
||||
"milvus.grpcB\013CommonProtoP\001Z1github.com/m"
|
||||
"ilvus-io/milvus-proto/go-api/commonpb\240\001\001"
|
||||
"b\006proto3"
|
||||
;
|
||||
static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_common_2eproto_deps[1] = {
|
||||
&::descriptor_table_google_2fprotobuf_2fdescriptor_2eproto,
|
||||
|
@ -501,7 +505,7 @@ static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_com
|
|||
static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_common_2eproto_once;
|
||||
static bool descriptor_table_common_2eproto_initialized = false;
|
||||
const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_common_2eproto = {
|
||||
&descriptor_table_common_2eproto_initialized, descriptor_table_protodef_common_2eproto, "common.proto", 5624,
|
||||
&descriptor_table_common_2eproto_initialized, descriptor_table_protodef_common_2eproto, "common.proto", 5768,
|
||||
&descriptor_table_common_2eproto_once, descriptor_table_common_2eproto_sccs, descriptor_table_common_2eproto_deps, 11, 1,
|
||||
schemas, file_default_instances, TableStruct_common_2eproto::offsets,
|
||||
file_level_metadata_common_2eproto, 11, file_level_enum_descriptors_common_2eproto, file_level_service_descriptors_common_2eproto,
|
||||
|
@ -721,6 +725,12 @@ bool MsgType_IsValid(int value) {
|
|||
case 1607:
|
||||
case 1608:
|
||||
case 1609:
|
||||
case 1700:
|
||||
case 1701:
|
||||
case 1702:
|
||||
case 1703:
|
||||
case 1704:
|
||||
case 1705:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -360,12 +360,18 @@ enum MsgType : int {
|
|||
SelectGrant = 1607,
|
||||
RefreshPolicyInfoCache = 1608,
|
||||
ListPolicy = 1609,
|
||||
CreateResourceGroup = 1700,
|
||||
DropResourceGroup = 1701,
|
||||
ListResourceGroups = 1702,
|
||||
DescribeResourceGroup = 1703,
|
||||
TransferNode = 1704,
|
||||
TransferReplica = 1705,
|
||||
MsgType_INT_MIN_SENTINEL_DO_NOT_USE_ = std::numeric_limits<::PROTOBUF_NAMESPACE_ID::int32>::min(),
|
||||
MsgType_INT_MAX_SENTINEL_DO_NOT_USE_ = std::numeric_limits<::PROTOBUF_NAMESPACE_ID::int32>::max()
|
||||
};
|
||||
bool MsgType_IsValid(int value);
|
||||
constexpr MsgType MsgType_MIN = Undefined;
|
||||
constexpr MsgType MsgType_MAX = ListPolicy;
|
||||
constexpr MsgType MsgType_MAX = TransferReplica;
|
||||
constexpr int MsgType_ARRAYSIZE = MsgType_MAX + 1;
|
||||
|
||||
const ::PROTOBUF_NAMESPACE_ID::EnumDescriptor* MsgType_descriptor();
|
||||
|
|
|
@ -696,12 +696,12 @@ func (s *Server) GetDdChannel(ctx context.Context, request *internalpb.GetDdChan
|
|||
return s.proxy.GetDdChannel(ctx, request)
|
||||
}
|
||||
|
||||
//GetPersistentSegmentInfo notifies Proxy to get persistent segment info.
|
||||
// GetPersistentSegmentInfo notifies Proxy to get persistent segment info.
|
||||
func (s *Server) GetPersistentSegmentInfo(ctx context.Context, request *milvuspb.GetPersistentSegmentInfoRequest) (*milvuspb.GetPersistentSegmentInfoResponse, error) {
|
||||
return s.proxy.GetPersistentSegmentInfo(ctx, request)
|
||||
}
|
||||
|
||||
//GetQuerySegmentInfo notifies Proxy to get query segment info.
|
||||
// GetQuerySegmentInfo notifies Proxy to get query segment info.
|
||||
func (s *Server) GetQuerySegmentInfo(ctx context.Context, request *milvuspb.GetQuerySegmentInfoRequest) (*milvuspb.GetQuerySegmentInfoResponse, error) {
|
||||
return s.proxy.GetQuerySegmentInfo(ctx, request)
|
||||
|
||||
|
@ -892,3 +892,27 @@ func (s *Server) GetVersion(ctx context.Context, request *milvuspb.GetVersionReq
|
|||
func (s *Server) CheckHealth(ctx context.Context, request *milvuspb.CheckHealthRequest) (*milvuspb.CheckHealthResponse, error) {
|
||||
return s.proxy.CheckHealth(ctx, request)
|
||||
}
|
||||
|
||||
func (s *Server) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
|
||||
return s.proxy.CreateResourceGroup(ctx, req)
|
||||
}
|
||||
|
||||
func (s *Server) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
|
||||
return s.proxy.DropResourceGroup(ctx, req)
|
||||
}
|
||||
|
||||
func (s *Server) DescribeResourceGroup(ctx context.Context, req *milvuspb.DescribeResourceGroupRequest) (*milvuspb.DescribeResourceGroupResponse, error) {
|
||||
return s.proxy.DescribeResourceGroup(ctx, req)
|
||||
}
|
||||
|
||||
func (s *Server) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
|
||||
return s.proxy.TransferNode(ctx, req)
|
||||
}
|
||||
|
||||
func (s *Server) TransferReplica(ctx context.Context, req *milvuspb.TransferReplicaRequest) (*commonpb.Status, error) {
|
||||
return s.proxy.TransferReplica(ctx, req)
|
||||
}
|
||||
|
||||
func (s *Server) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
|
||||
return s.proxy.ListResourceGroups(ctx, req)
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ import (
|
|||
"google.golang.org/grpc/health/grpc_health_v1"
|
||||
)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
type MockBase struct {
|
||||
mock.Mock
|
||||
isMockGetComponentStatesOn bool
|
||||
|
@ -95,7 +95,7 @@ func (m *MockBase) GetStatisticsChannel(ctx context.Context) (*milvuspb.StringRe
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
type MockRootCoord struct {
|
||||
MockBase
|
||||
initErr error
|
||||
|
@ -282,7 +282,7 @@ func (m *MockRootCoord) CheckHealth(ctx context.Context, req *milvuspb.CheckHeal
|
|||
}, nil
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
type MockIndexCoord struct {
|
||||
MockBase
|
||||
initErr error
|
||||
|
@ -347,7 +347,7 @@ func (m *MockIndexCoord) CheckHealth(ctx context.Context, req *milvuspb.CheckHea
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
type MockQueryCoord struct {
|
||||
MockBase
|
||||
initErr error
|
||||
|
@ -462,7 +462,31 @@ func (m *MockQueryCoord) CheckHealth(ctx context.Context, req *milvuspb.CheckHea
|
|||
}, nil
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
func (m *MockQueryCoord) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockQueryCoord) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockQueryCoord) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockQueryCoord) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockQueryCoord) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockQueryCoord) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
type MockDataCoord struct {
|
||||
MockBase
|
||||
err error
|
||||
|
@ -624,7 +648,7 @@ func (m *MockDataCoord) GcConfirm(ctx context.Context, req *datapb.GcConfirmRequ
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
type MockProxy struct {
|
||||
MockBase
|
||||
err error
|
||||
|
@ -938,6 +962,30 @@ func (m *MockProxy) CheckHealth(ctx context.Context, request *milvuspb.CheckHeal
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockProxy) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockProxy) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockProxy) DescribeResourceGroup(ctx context.Context, req *milvuspb.DescribeResourceGroupRequest) (*milvuspb.DescribeResourceGroupResponse, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockProxy) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockProxy) TransferReplica(ctx context.Context, req *milvuspb.TransferReplicaRequest) (*commonpb.Status, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MockProxy) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
type WaitOption struct {
|
||||
|
@ -1055,7 +1103,7 @@ func runAndWaitForServerReady(server *Server) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
func Test_NewServer(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
server, err := NewServer(ctx, nil)
|
||||
|
@ -1363,6 +1411,36 @@ func Test_NewServer(t *testing.T) {
|
|||
assert.Nil(t, err)
|
||||
})
|
||||
|
||||
t.Run("CreateResourceGroup", func(t *testing.T) {
|
||||
_, err := server.CreateResourceGroup(ctx, nil)
|
||||
assert.Nil(t, err)
|
||||
})
|
||||
|
||||
t.Run("DropResourceGroup", func(t *testing.T) {
|
||||
_, err := server.DropResourceGroup(ctx, nil)
|
||||
assert.Nil(t, err)
|
||||
})
|
||||
|
||||
t.Run("TransferNode", func(t *testing.T) {
|
||||
_, err := server.TransferNode(ctx, nil)
|
||||
assert.Nil(t, err)
|
||||
})
|
||||
|
||||
t.Run("TransferReplica", func(t *testing.T) {
|
||||
_, err := server.TransferReplica(ctx, nil)
|
||||
assert.Nil(t, err)
|
||||
})
|
||||
|
||||
t.Run("ListResourceGroups", func(t *testing.T) {
|
||||
_, err := server.ListResourceGroups(ctx, nil)
|
||||
assert.Nil(t, err)
|
||||
})
|
||||
|
||||
t.Run("DescribeResourceGroup", func(t *testing.T) {
|
||||
_, err := server.DescribeResourceGroup(ctx, nil)
|
||||
assert.Nil(t, err)
|
||||
})
|
||||
|
||||
err = server.Stop()
|
||||
assert.Nil(t, err)
|
||||
|
||||
|
|
|
@ -418,3 +418,111 @@ func (c *Client) CheckHealth(ctx context.Context, req *milvuspb.CheckHealthReque
|
|||
}
|
||||
return ret.(*milvuspb.CheckHealthResponse), err
|
||||
}
|
||||
|
||||
func (c *Client) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
|
||||
req = typeutil.Clone(req)
|
||||
commonpbutil.UpdateMsgBase(
|
||||
req.GetBase(),
|
||||
commonpbutil.FillMsgBaseFromClient(Params.QueryCoordCfg.GetNodeID(), commonpbutil.WithTargetID(c.sess.ServerID)),
|
||||
)
|
||||
ret, err := c.grpcClient.ReCall(ctx, func(client querypb.QueryCoordClient) (any, error) {
|
||||
if !funcutil.CheckCtxValid(ctx) {
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
return client.CreateResourceGroup(ctx, req)
|
||||
})
|
||||
if err != nil || ret == nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret.(*commonpb.Status), err
|
||||
}
|
||||
|
||||
func (c *Client) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
|
||||
req = typeutil.Clone(req)
|
||||
commonpbutil.UpdateMsgBase(
|
||||
req.GetBase(),
|
||||
commonpbutil.FillMsgBaseFromClient(Params.QueryCoordCfg.GetNodeID(), commonpbutil.WithTargetID(c.sess.ServerID)),
|
||||
)
|
||||
ret, err := c.grpcClient.ReCall(ctx, func(client querypb.QueryCoordClient) (any, error) {
|
||||
if !funcutil.CheckCtxValid(ctx) {
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
return client.DropResourceGroup(ctx, req)
|
||||
})
|
||||
if err != nil || ret == nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret.(*commonpb.Status), err
|
||||
}
|
||||
|
||||
func (c *Client) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error) {
|
||||
req = typeutil.Clone(req)
|
||||
commonpbutil.UpdateMsgBase(
|
||||
req.GetBase(),
|
||||
commonpbutil.FillMsgBaseFromClient(Params.QueryCoordCfg.GetNodeID(), commonpbutil.WithTargetID(c.sess.ServerID)),
|
||||
)
|
||||
ret, err := c.grpcClient.ReCall(ctx, func(client querypb.QueryCoordClient) (any, error) {
|
||||
if !funcutil.CheckCtxValid(ctx) {
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
return client.DescribeResourceGroup(ctx, req)
|
||||
})
|
||||
if err != nil || ret == nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret.(*querypb.DescribeResourceGroupResponse), err
|
||||
}
|
||||
|
||||
func (c *Client) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
|
||||
req = typeutil.Clone(req)
|
||||
commonpbutil.UpdateMsgBase(
|
||||
req.GetBase(),
|
||||
commonpbutil.FillMsgBaseFromClient(Params.QueryCoordCfg.GetNodeID(), commonpbutil.WithTargetID(c.sess.ServerID)),
|
||||
)
|
||||
ret, err := c.grpcClient.ReCall(ctx, func(client querypb.QueryCoordClient) (any, error) {
|
||||
if !funcutil.CheckCtxValid(ctx) {
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
return client.TransferNode(ctx, req)
|
||||
})
|
||||
if err != nil || ret == nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret.(*commonpb.Status), err
|
||||
}
|
||||
|
||||
func (c *Client) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error) {
|
||||
req = typeutil.Clone(req)
|
||||
commonpbutil.UpdateMsgBase(
|
||||
req.GetBase(),
|
||||
commonpbutil.FillMsgBaseFromClient(Params.QueryCoordCfg.GetNodeID(), commonpbutil.WithTargetID(c.sess.ServerID)),
|
||||
)
|
||||
ret, err := c.grpcClient.ReCall(ctx, func(client querypb.QueryCoordClient) (any, error) {
|
||||
if !funcutil.CheckCtxValid(ctx) {
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
return client.TransferReplica(ctx, req)
|
||||
})
|
||||
if err != nil || ret == nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret.(*commonpb.Status), err
|
||||
}
|
||||
|
||||
func (c *Client) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
|
||||
req = typeutil.Clone(req)
|
||||
commonpbutil.UpdateMsgBase(
|
||||
req.GetBase(),
|
||||
commonpbutil.FillMsgBaseFromClient(Params.QueryCoordCfg.GetNodeID(), commonpbutil.WithTargetID(c.sess.ServerID)),
|
||||
)
|
||||
ret, err := c.grpcClient.ReCall(ctx, func(client querypb.QueryCoordClient) (any, error) {
|
||||
if !funcutil.CheckCtxValid(ctx) {
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
return client.ListResourceGroups(ctx, req)
|
||||
})
|
||||
if err != nil || ret == nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret.(*milvuspb.ListResourceGroupsResponse), err
|
||||
}
|
||||
|
|
|
@ -124,6 +124,24 @@ func Test_NewClient(t *testing.T) {
|
|||
|
||||
r20, err := client.CheckHealth(ctx, nil)
|
||||
retCheck(retNotNil, r20, err)
|
||||
|
||||
r21, err := client.CreateResourceGroup(ctx, nil)
|
||||
retCheck(retNotNil, r21, err)
|
||||
|
||||
r22, err := client.DropResourceGroup(ctx, nil)
|
||||
retCheck(retNotNil, r22, err)
|
||||
|
||||
r23, err := client.TransferNode(ctx, nil)
|
||||
retCheck(retNotNil, r23, err)
|
||||
|
||||
r24, err := client.TransferReplica(ctx, nil)
|
||||
retCheck(retNotNil, r24, err)
|
||||
|
||||
r26, err := client.ListResourceGroups(ctx, nil)
|
||||
retCheck(retNotNil, r26, err)
|
||||
|
||||
r27, err := client.DescribeResourceGroup(ctx, nil)
|
||||
retCheck(retNotNil, r27, err)
|
||||
}
|
||||
|
||||
client.grpcClient = &mock.GRPCClientBase[querypb.QueryCoordClient]{
|
||||
|
|
|
@ -417,3 +417,27 @@ func (s *Server) GetShardLeaders(ctx context.Context, req *querypb.GetShardLeade
|
|||
func (s *Server) CheckHealth(ctx context.Context, req *milvuspb.CheckHealthRequest) (*milvuspb.CheckHealthResponse, error) {
|
||||
return s.queryCoord.CheckHealth(ctx, req)
|
||||
}
|
||||
|
||||
func (s *Server) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
|
||||
return s.queryCoord.CreateResourceGroup(ctx, req)
|
||||
}
|
||||
|
||||
func (s *Server) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
|
||||
return s.queryCoord.DropResourceGroup(ctx, req)
|
||||
}
|
||||
|
||||
func (s *Server) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
|
||||
return s.queryCoord.TransferNode(ctx, req)
|
||||
}
|
||||
|
||||
func (s *Server) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error) {
|
||||
return s.queryCoord.TransferReplica(ctx, req)
|
||||
}
|
||||
|
||||
func (s *Server) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
|
||||
return s.queryCoord.ListResourceGroups(ctx, req)
|
||||
}
|
||||
|
||||
func (s *Server) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error) {
|
||||
return s.queryCoord.DescribeResourceGroup(ctx, req)
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ import (
|
|||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
type MockQueryCoord struct {
|
||||
states *milvuspb.ComponentStates
|
||||
status *commonpb.Status
|
||||
|
@ -158,7 +158,35 @@ func (m *MockQueryCoord) CheckHealth(ctx context.Context, req *milvuspb.CheckHea
|
|||
}, m.err
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
func (m *MockQueryCoord) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
|
||||
return m.status, nil
|
||||
}
|
||||
|
||||
func (m *MockQueryCoord) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
|
||||
return m.status, nil
|
||||
}
|
||||
|
||||
func (m *MockQueryCoord) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
|
||||
return m.status, nil
|
||||
}
|
||||
|
||||
func (m *MockQueryCoord) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error) {
|
||||
return m.status, nil
|
||||
}
|
||||
|
||||
func (m *MockQueryCoord) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
|
||||
return &milvuspb.ListResourceGroupsResponse{
|
||||
Status: m.status,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (m *MockQueryCoord) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error) {
|
||||
return &querypb.DescribeResourceGroupResponse{
|
||||
Status: m.status,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
type MockRootCoord struct {
|
||||
types.RootCoord
|
||||
initErr error
|
||||
|
@ -191,7 +219,7 @@ func (m *MockRootCoord) GetComponentStates(ctx context.Context) (*milvuspb.Compo
|
|||
}, nil
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
type MockDataCoord struct {
|
||||
types.DataCoord
|
||||
initErr error
|
||||
|
@ -224,7 +252,7 @@ func (m *MockDataCoord) GetComponentStates(ctx context.Context) (*milvuspb.Compo
|
|||
}, nil
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
type MockIndexCoord struct {
|
||||
types.IndexCoord
|
||||
initErr error
|
||||
|
@ -257,7 +285,7 @@ func (m *MockIndexCoord) GetComponentStates(ctx context.Context) (*milvuspb.Comp
|
|||
}, nil
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
func Test_NewServer(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
server, err := NewServer(ctx, nil)
|
||||
|
@ -399,6 +427,43 @@ func Test_NewServer(t *testing.T) {
|
|||
assert.Equal(t, true, ret.IsHealthy)
|
||||
})
|
||||
|
||||
t.Run("CreateResourceGroup", func(t *testing.T) {
|
||||
resp, err := server.CreateResourceGroup(ctx, nil)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode)
|
||||
})
|
||||
|
||||
t.Run("DropResourceGroup", func(t *testing.T) {
|
||||
resp, err := server.DropResourceGroup(ctx, nil)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode)
|
||||
})
|
||||
|
||||
t.Run("TransferNode", func(t *testing.T) {
|
||||
resp, err := server.TransferNode(ctx, nil)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode)
|
||||
})
|
||||
|
||||
t.Run("TransferReplica", func(t *testing.T) {
|
||||
resp, err := server.TransferReplica(ctx, nil)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode)
|
||||
})
|
||||
|
||||
t.Run("ListResourceGroups", func(t *testing.T) {
|
||||
req := &milvuspb.ListResourceGroupsRequest{}
|
||||
resp, err := server.ListResourceGroups(ctx, req)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, commonpb.ErrorCode_Success, resp.Status.ErrorCode)
|
||||
})
|
||||
|
||||
t.Run("DescribeResourceGroup", func(t *testing.T) {
|
||||
resp, err := server.DescribeResourceGroup(ctx, nil)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, commonpb.ErrorCode_Success, resp.Status.ErrorCode)
|
||||
})
|
||||
|
||||
err = server.Stop()
|
||||
assert.Nil(t, err)
|
||||
}
|
||||
|
|
|
@ -147,4 +147,7 @@ type QueryCoordCatalog interface {
|
|||
ReleasePartition(collection int64, partitions ...int64) error
|
||||
ReleaseReplicas(collectionID int64) error
|
||||
ReleaseReplica(collection, replica int64) error
|
||||
SaveResourceGroup(rgs ...*querypb.ResourceGroup) error
|
||||
RemoveResourceGroup(rgName string) error
|
||||
GetResourceGroups() ([]*querypb.ResourceGroup, error)
|
||||
}
|
||||
|
|
|
@ -36,6 +36,13 @@ service QueryCoord {
|
|||
rpc GetShardLeaders(GetShardLeadersRequest) returns (GetShardLeadersResponse) {}
|
||||
|
||||
rpc CheckHealth(milvus.CheckHealthRequest) returns (milvus.CheckHealthResponse) {}
|
||||
|
||||
rpc CreateResourceGroup(milvus.CreateResourceGroupRequest) returns (common.Status) {}
|
||||
rpc DropResourceGroup(milvus.DropResourceGroupRequest) returns (common.Status) {}
|
||||
rpc TransferNode(milvus.TransferNodeRequest) returns (common.Status) {}
|
||||
rpc TransferReplica(TransferReplicaRequest) returns (common.Status) {}
|
||||
rpc ListResourceGroups(milvus.ListResourceGroupsRequest) returns (milvus.ListResourceGroupsResponse) {}
|
||||
rpc DescribeResourceGroup(DescribeResourceGroupRequest) returns (DescribeResourceGroupResponse) {}
|
||||
}
|
||||
|
||||
service QueryNode {
|
||||
|
@ -101,6 +108,8 @@ message LoadCollectionRequest {
|
|||
// fieldID -> indexID
|
||||
map<int64, int64> field_indexID = 6;
|
||||
bool refresh = 7;
|
||||
// resource group names
|
||||
repeated string resource_groups = 8;
|
||||
}
|
||||
|
||||
message ReleaseCollectionRequest {
|
||||
|
@ -128,6 +137,8 @@ message LoadPartitionsRequest {
|
|||
// fieldID -> indexID
|
||||
map<int64, int64> field_indexID = 7;
|
||||
bool refresh = 8;
|
||||
// resource group names
|
||||
repeated string resource_groups = 9;
|
||||
}
|
||||
|
||||
message ReleasePartitionsRequest {
|
||||
|
@ -488,6 +499,7 @@ message Replica {
|
|||
int64 ID = 1;
|
||||
int64 collectionID = 2;
|
||||
repeated int64 nodes = 3;
|
||||
string resource_group = 4;
|
||||
}
|
||||
|
||||
enum SyncType {
|
||||
|
@ -510,3 +522,39 @@ message SyncDistributionRequest {
|
|||
repeated SyncAction actions = 4;
|
||||
}
|
||||
|
||||
message ResourceGroup {
|
||||
string name = 1;
|
||||
int32 capacity = 2;
|
||||
repeated int64 nodes = 3;
|
||||
}
|
||||
|
||||
// transfer `replicaNum` replicas in `collectionID` from `source_resource_group` to `target_resource_groups`
|
||||
message TransferReplicaRequest {
|
||||
common.MsgBase base = 1;
|
||||
string source_resource_group = 2;
|
||||
string target_resource_group = 3;
|
||||
int64 collectionID = 4;
|
||||
int64 num_replica = 5;
|
||||
}
|
||||
|
||||
message DescribeResourceGroupRequest {
|
||||
common.MsgBase base = 1;
|
||||
string resource_group = 2;
|
||||
}
|
||||
|
||||
message DescribeResourceGroupResponse {
|
||||
common.Status status = 1;
|
||||
ResourceGroupInfo resource_group = 2;
|
||||
}
|
||||
|
||||
message ResourceGroupInfo {
|
||||
string name = 1;
|
||||
int32 capacity = 2;
|
||||
int32 num_available_node = 3;
|
||||
// collection id -> loaded replica num
|
||||
map<int64, int32> num_loaded_replica = 4;
|
||||
// collection id -> accessed other rg's node num
|
||||
map<int64, int32> num_outgoing_node = 5;
|
||||
// collection id -> be accessed node num by other rg
|
||||
map<int64, int32> num_incoming_node = 6;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -3931,6 +3931,10 @@ func (node *Proxy) GetReplicas(ctx context.Context, req *milvuspb.GetReplicasReq
|
|||
commonpbutil.WithSourceID(Params.ProxyCfg.GetNodeID()),
|
||||
)
|
||||
|
||||
if req.GetCollectionName() != "" {
|
||||
req.CollectionID, _ = globalMetaCache.GetCollectionID(ctx, req.GetCollectionName())
|
||||
}
|
||||
|
||||
resp, err := node.queryCoord.GetReplicas(ctx, req)
|
||||
if err != nil {
|
||||
log.Error("Failed to get replicas from Query Coordinator", zap.Error(err))
|
||||
|
@ -4725,3 +4729,393 @@ func (node *Proxy) CheckHealth(ctx context.Context, request *milvuspb.CheckHealt
|
|||
IsHealthy: true,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (node *Proxy) CreateResourceGroup(ctx context.Context, request *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
|
||||
if !node.checkHealthy() {
|
||||
return unhealthyStatus(), nil
|
||||
}
|
||||
|
||||
sp, ctx := trace.StartSpanFromContextWithOperationName(ctx, "Proxy-CreateResourceGroup")
|
||||
defer sp.Finish()
|
||||
method := "CreateResourceGroup"
|
||||
tr := timerecord.NewTimeRecorder(method)
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.TotalLabel).Inc()
|
||||
t := &CreateResourceGroupTask{
|
||||
ctx: ctx,
|
||||
Condition: NewTaskCondition(ctx),
|
||||
CreateResourceGroupRequest: request,
|
||||
queryCoord: node.queryCoord,
|
||||
}
|
||||
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.String("role", typeutil.ProxyRole),
|
||||
)
|
||||
|
||||
log.Debug("CreateResourceGroup received")
|
||||
|
||||
if err := node.sched.ddQueue.Enqueue(t); err != nil {
|
||||
log.Warn("CreateResourceGroup failed to enqueue",
|
||||
zap.Error(err))
|
||||
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.AbandonLabel).Inc()
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: err.Error(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
log.Debug("CreateResourceGroup enqueued",
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
|
||||
if err := t.WaitToFinish(); err != nil {
|
||||
log.Warn("CreateResourceGroup failed to WaitToFinish",
|
||||
zap.Error(err),
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.FailLabel).Inc()
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: err.Error(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
log.Debug("CreateResourceGroup done",
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.SuccessLabel).Inc()
|
||||
metrics.ProxyReqLatency.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
||||
return t.result, nil
|
||||
}
|
||||
|
||||
func (node *Proxy) DropResourceGroup(ctx context.Context, request *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
|
||||
if !node.checkHealthy() {
|
||||
return unhealthyStatus(), nil
|
||||
}
|
||||
|
||||
sp, ctx := trace.StartSpanFromContextWithOperationName(ctx, "Proxy-DropResourceGroup")
|
||||
defer sp.Finish()
|
||||
method := "DropResourceGroup"
|
||||
tr := timerecord.NewTimeRecorder(method)
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.TotalLabel).Inc()
|
||||
t := &DropResourceGroupTask{
|
||||
ctx: ctx,
|
||||
Condition: NewTaskCondition(ctx),
|
||||
DropResourceGroupRequest: request,
|
||||
queryCoord: node.queryCoord,
|
||||
}
|
||||
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.String("role", typeutil.ProxyRole),
|
||||
)
|
||||
|
||||
log.Debug("DropResourceGroup received")
|
||||
|
||||
if err := node.sched.ddQueue.Enqueue(t); err != nil {
|
||||
log.Warn("DropResourceGroup failed to enqueue",
|
||||
zap.Error(err))
|
||||
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.AbandonLabel).Inc()
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: err.Error(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
log.Debug("DropResourceGroup enqueued",
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
|
||||
if err := t.WaitToFinish(); err != nil {
|
||||
log.Warn("DropResourceGroup failed to WaitToFinish",
|
||||
zap.Error(err),
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.FailLabel).Inc()
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: err.Error(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
log.Debug("DropResourceGroup done",
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.SuccessLabel).Inc()
|
||||
metrics.ProxyReqLatency.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
||||
return t.result, nil
|
||||
}
|
||||
|
||||
func (node *Proxy) TransferNode(ctx context.Context, request *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
|
||||
if !node.checkHealthy() {
|
||||
return unhealthyStatus(), nil
|
||||
}
|
||||
|
||||
sp, ctx := trace.StartSpanFromContextWithOperationName(ctx, "Proxy-TransferNode")
|
||||
defer sp.Finish()
|
||||
method := "TransferNode"
|
||||
tr := timerecord.NewTimeRecorder(method)
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.TotalLabel).Inc()
|
||||
t := &TransferNodeTask{
|
||||
ctx: ctx,
|
||||
Condition: NewTaskCondition(ctx),
|
||||
TransferNodeRequest: request,
|
||||
queryCoord: node.queryCoord,
|
||||
}
|
||||
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.String("role", typeutil.ProxyRole),
|
||||
)
|
||||
|
||||
log.Debug("TransferNode received")
|
||||
|
||||
if err := node.sched.ddQueue.Enqueue(t); err != nil {
|
||||
log.Warn("TransferNode failed to enqueue",
|
||||
zap.Error(err))
|
||||
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.AbandonLabel).Inc()
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: err.Error(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
log.Debug("TransferNode enqueued",
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
|
||||
if err := t.WaitToFinish(); err != nil {
|
||||
log.Warn("TransferNode failed to WaitToFinish",
|
||||
zap.Error(err),
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.FailLabel).Inc()
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: err.Error(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
log.Debug("TransferNode done",
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.SuccessLabel).Inc()
|
||||
metrics.ProxyReqLatency.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
||||
return t.result, nil
|
||||
}
|
||||
|
||||
func (node *Proxy) TransferReplica(ctx context.Context, request *milvuspb.TransferReplicaRequest) (*commonpb.Status, error) {
|
||||
if !node.checkHealthy() {
|
||||
return unhealthyStatus(), nil
|
||||
}
|
||||
|
||||
sp, ctx := trace.StartSpanFromContextWithOperationName(ctx, "Proxy-TransferReplica")
|
||||
defer sp.Finish()
|
||||
method := "TransferReplica"
|
||||
tr := timerecord.NewTimeRecorder(method)
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.TotalLabel).Inc()
|
||||
t := &TransferReplicaTask{
|
||||
ctx: ctx,
|
||||
Condition: NewTaskCondition(ctx),
|
||||
TransferReplicaRequest: request,
|
||||
queryCoord: node.queryCoord,
|
||||
}
|
||||
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.String("role", typeutil.ProxyRole),
|
||||
)
|
||||
|
||||
log.Debug("TransferReplica received")
|
||||
|
||||
if err := node.sched.ddQueue.Enqueue(t); err != nil {
|
||||
log.Warn("TransferReplica failed to enqueue",
|
||||
zap.Error(err))
|
||||
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.AbandonLabel).Inc()
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: err.Error(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
log.Debug("TransferReplica enqueued",
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
|
||||
if err := t.WaitToFinish(); err != nil {
|
||||
log.Warn("TransferReplica failed to WaitToFinish",
|
||||
zap.Error(err),
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.FailLabel).Inc()
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: err.Error(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
log.Debug("TransferReplica done",
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.SuccessLabel).Inc()
|
||||
metrics.ProxyReqLatency.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
||||
return t.result, nil
|
||||
}
|
||||
|
||||
func (node *Proxy) ListResourceGroups(ctx context.Context, request *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
|
||||
if !node.checkHealthy() {
|
||||
return &milvuspb.ListResourceGroupsResponse{
|
||||
Status: unhealthyStatus(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
sp, ctx := trace.StartSpanFromContextWithOperationName(ctx, "Proxy-ListResourceGroups")
|
||||
defer sp.Finish()
|
||||
method := "ListResourceGroups"
|
||||
tr := timerecord.NewTimeRecorder(method)
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.TotalLabel).Inc()
|
||||
t := &ListResourceGroupsTask{
|
||||
ctx: ctx,
|
||||
Condition: NewTaskCondition(ctx),
|
||||
ListResourceGroupsRequest: request,
|
||||
queryCoord: node.queryCoord,
|
||||
}
|
||||
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.String("role", typeutil.ProxyRole),
|
||||
)
|
||||
|
||||
log.Debug("ListResourceGroups received")
|
||||
|
||||
if err := node.sched.ddQueue.Enqueue(t); err != nil {
|
||||
log.Warn("ListResourceGroups failed to enqueue",
|
||||
zap.Error(err))
|
||||
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.AbandonLabel).Inc()
|
||||
return &milvuspb.ListResourceGroupsResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: err.Error(),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
log.Debug("ListResourceGroups enqueued",
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
|
||||
if err := t.WaitToFinish(); err != nil {
|
||||
log.Warn("ListResourceGroups failed to WaitToFinish",
|
||||
zap.Error(err),
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.FailLabel).Inc()
|
||||
return &milvuspb.ListResourceGroupsResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: err.Error(),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
log.Debug("ListResourceGroups done",
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.SuccessLabel).Inc()
|
||||
metrics.ProxyReqLatency.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
||||
return t.result, nil
|
||||
}
|
||||
|
||||
func (node *Proxy) DescribeResourceGroup(ctx context.Context, request *milvuspb.DescribeResourceGroupRequest) (*milvuspb.DescribeResourceGroupResponse, error) {
|
||||
if !node.checkHealthy() {
|
||||
return &milvuspb.DescribeResourceGroupResponse{
|
||||
Status: unhealthyStatus(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
sp, ctx := trace.StartSpanFromContextWithOperationName(ctx, "Proxy-DescribeResourceGroup")
|
||||
defer sp.Finish()
|
||||
method := "DescribeResourceGroup"
|
||||
tr := timerecord.NewTimeRecorder(method)
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.TotalLabel).Inc()
|
||||
t := &DescribeResourceGroupTask{
|
||||
ctx: ctx,
|
||||
Condition: NewTaskCondition(ctx),
|
||||
DescribeResourceGroupRequest: request,
|
||||
queryCoord: node.queryCoord,
|
||||
}
|
||||
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.String("role", typeutil.ProxyRole),
|
||||
)
|
||||
|
||||
log.Debug("DescribeResourceGroup received")
|
||||
|
||||
if err := node.sched.ddQueue.Enqueue(t); err != nil {
|
||||
log.Warn("DescribeResourceGroup failed to enqueue",
|
||||
zap.Error(err))
|
||||
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.AbandonLabel).Inc()
|
||||
return &milvuspb.DescribeResourceGroupResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: err.Error(),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
log.Debug("DescribeResourceGroup enqueued",
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
|
||||
if err := t.WaitToFinish(); err != nil {
|
||||
log.Warn("DescribeResourceGroup failed to WaitToFinish",
|
||||
zap.Error(err),
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.FailLabel).Inc()
|
||||
return &milvuspb.DescribeResourceGroupResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
Reason: err.Error(),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
log.Debug("DescribeResourceGroup done",
|
||||
zap.Uint64("BeginTS", t.BeginTs()),
|
||||
zap.Uint64("EndTS", t.EndTs()))
|
||||
|
||||
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method,
|
||||
metrics.SuccessLabel).Inc()
|
||||
metrics.ProxyReqLatency.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), method).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
||||
return t.result, nil
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ import (
|
|||
"github.com/milvus-io/milvus-proto/go-api/milvuspb"
|
||||
"github.com/milvus-io/milvus/internal/log"
|
||||
"github.com/milvus-io/milvus/internal/proto/proxypb"
|
||||
"github.com/milvus-io/milvus/internal/util/dependency"
|
||||
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
@ -146,3 +147,76 @@ func TestProxy_CheckHealth(t *testing.T) {
|
|||
assert.Equal(t, 2, len(resp.GetReasons()))
|
||||
})
|
||||
}
|
||||
|
||||
func TestProxy_ResourceGroup(t *testing.T) {
|
||||
factory := dependency.NewDefaultFactory(true)
|
||||
ctx := context.Background()
|
||||
|
||||
node, err := NewProxy(ctx, factory)
|
||||
assert.NoError(t, err)
|
||||
node.multiRateLimiter = NewMultiRateLimiter()
|
||||
node.stateCode.Store(commonpb.StateCode_Healthy)
|
||||
|
||||
qc := NewQueryCoordMock()
|
||||
node.SetQueryCoordClient(qc)
|
||||
|
||||
tsoAllocatorIns := newMockTsoAllocator()
|
||||
node.sched, err = newTaskScheduler(node.ctx, tsoAllocatorIns, node.factory)
|
||||
node.sched.ddQueue.setMaxTaskNum(1024)
|
||||
assert.NoError(t, err)
|
||||
node.sched.Start()
|
||||
defer node.sched.Close()
|
||||
|
||||
rc := &MockRootCoordClientInterface{}
|
||||
mgr := newShardClientMgr()
|
||||
InitMetaCache(ctx, rc, qc, mgr)
|
||||
|
||||
t.Run("create resource group", func(t *testing.T) {
|
||||
resp, err := node.CreateResourceGroup(ctx, &milvuspb.CreateResourceGroupRequest{
|
||||
ResourceGroup: "rg",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, resp.ErrorCode, commonpb.ErrorCode_Success)
|
||||
})
|
||||
|
||||
t.Run("drop resource group", func(t *testing.T) {
|
||||
resp, err := node.DropResourceGroup(ctx, &milvuspb.DropResourceGroupRequest{
|
||||
ResourceGroup: "rg",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, resp.ErrorCode, commonpb.ErrorCode_Success)
|
||||
})
|
||||
|
||||
t.Run("transfer node", func(t *testing.T) {
|
||||
resp, err := node.TransferNode(ctx, &milvuspb.TransferNodeRequest{
|
||||
SourceResourceGroup: "rg1",
|
||||
TargetResourceGroup: "rg2",
|
||||
NumNode: 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, resp.ErrorCode, commonpb.ErrorCode_Success)
|
||||
})
|
||||
|
||||
t.Run("transfer replica", func(t *testing.T) {
|
||||
resp, err := node.TransferReplica(ctx, &milvuspb.TransferReplicaRequest{
|
||||
SourceResourceGroup: "rg1",
|
||||
TargetResourceGroup: "rg2",
|
||||
NumReplica: 1,
|
||||
CollectionName: "collection1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, resp.ErrorCode, commonpb.ErrorCode_Success)
|
||||
})
|
||||
|
||||
t.Run("list resource group", func(t *testing.T) {
|
||||
resp, err := node.ListResourceGroups(ctx, &milvuspb.ListResourceGroupsRequest{})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, resp.Status.ErrorCode, commonpb.ErrorCode_Success)
|
||||
})
|
||||
|
||||
t.Run("describe resource group", func(t *testing.T) {
|
||||
resp, err := node.DescribeResourceGroup(ctx, &milvuspb.DescribeResourceGroupRequest{})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, resp.Status.ErrorCode, commonpb.ErrorCode_Success)
|
||||
})
|
||||
}
|
||||
|
|
|
@ -50,6 +50,8 @@ import (
|
|||
type Cache interface {
|
||||
// GetCollectionID get collection's id by name.
|
||||
GetCollectionID(ctx context.Context, collectionName string) (typeutil.UniqueID, error)
|
||||
// GetCollectionName get collection's name by id
|
||||
GetCollectionName(ctx context.Context, collectionID int64) (string, error)
|
||||
// GetCollectionInfo get collection's information by name, such as collection id, schema, and etc.
|
||||
GetCollectionInfo(ctx context.Context, collectionName string) (*collectionInfo, error)
|
||||
// GetPartitionID get partition's identifier of specific collection.
|
||||
|
@ -195,7 +197,7 @@ func (m *MetaCache) GetCollectionID(ctx context.Context, collectionName string)
|
|||
metrics.ProxyCacheStatsCounter.WithLabelValues(strconv.FormatInt(Params.ProxyCfg.GetNodeID(), 10), "GeCollectionID", metrics.CacheMissLabel).Inc()
|
||||
tr := timerecord.NewTimeRecorder("UpdateCache")
|
||||
m.mu.RUnlock()
|
||||
coll, err := m.describeCollection(ctx, collectionName)
|
||||
coll, err := m.describeCollection(ctx, collectionName, 0)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
@ -212,6 +214,37 @@ func (m *MetaCache) GetCollectionID(ctx context.Context, collectionName string)
|
|||
return collInfo.collID, nil
|
||||
}
|
||||
|
||||
// GetCollectionName returns the corresponding collection name for provided collection id
|
||||
func (m *MetaCache) GetCollectionName(ctx context.Context, collectionID int64) (string, error) {
|
||||
m.mu.RLock()
|
||||
var collInfo *collectionInfo
|
||||
for _, coll := range m.collInfo {
|
||||
if coll.collID == collectionID {
|
||||
collInfo = coll
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if collInfo == nil || !collInfo.isCollectionCached() {
|
||||
metrics.ProxyCacheStatsCounter.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), "GeCollectionName", metrics.CacheMissLabel).Inc()
|
||||
tr := timerecord.NewTimeRecorder("UpdateCache")
|
||||
m.mu.RUnlock()
|
||||
coll, err := m.describeCollection(ctx, "", collectionID)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
m.updateCollection(coll, coll.Schema.Name)
|
||||
metrics.ProxyUpdateCacheLatency.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10)).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
||||
return coll.Schema.Name, nil
|
||||
}
|
||||
defer m.mu.RUnlock()
|
||||
metrics.ProxyCacheStatsCounter.WithLabelValues(strconv.FormatInt(Params.QueryCoordCfg.GetNodeID(), 10), "GeCollectionName", metrics.CacheHitLabel).Inc()
|
||||
|
||||
return collInfo.schema.Name, nil
|
||||
}
|
||||
|
||||
// GetCollectionInfo returns the collection information related to provided collection name
|
||||
// If the information is not found, proxy will try to fetch information for other source (RootCoord for now)
|
||||
func (m *MetaCache) GetCollectionInfo(ctx context.Context, collectionName string) (*collectionInfo, error) {
|
||||
|
@ -223,7 +256,7 @@ func (m *MetaCache) GetCollectionInfo(ctx context.Context, collectionName string
|
|||
if !ok || !collInfo.isCollectionCached() {
|
||||
tr := timerecord.NewTimeRecorder("UpdateCache")
|
||||
metrics.ProxyCacheStatsCounter.WithLabelValues(strconv.FormatInt(Params.ProxyCfg.GetNodeID(), 10), "GetCollectionInfo", metrics.CacheMissLabel).Inc()
|
||||
coll, err := m.describeCollection(ctx, collectionName)
|
||||
coll, err := m.describeCollection(ctx, collectionName, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -280,7 +313,7 @@ func (m *MetaCache) GetCollectionSchema(ctx context.Context, collectionName stri
|
|||
metrics.ProxyCacheStatsCounter.WithLabelValues(strconv.FormatInt(Params.ProxyCfg.GetNodeID(), 10), "GetCollectionSchema", metrics.CacheMissLabel).Inc()
|
||||
tr := timerecord.NewTimeRecorder("UpdateCache")
|
||||
m.mu.RUnlock()
|
||||
coll, err := m.describeCollection(ctx, collectionName)
|
||||
coll, err := m.describeCollection(ctx, collectionName, 0)
|
||||
if err != nil {
|
||||
log.Warn("Failed to load collection from rootcoord ",
|
||||
zap.String("collection name ", collectionName),
|
||||
|
@ -293,7 +326,7 @@ func (m *MetaCache) GetCollectionSchema(ctx context.Context, collectionName stri
|
|||
collInfo = m.collInfo[collectionName]
|
||||
metrics.ProxyUpdateCacheLatency.WithLabelValues(strconv.FormatInt(Params.ProxyCfg.GetNodeID(), 10)).Observe(float64(tr.ElapseSpan().Milliseconds()))
|
||||
log.Debug("Reload collection from root coordinator ",
|
||||
zap.String("collection name ", collectionName),
|
||||
zap.String("collection name", collectionName),
|
||||
zap.Any("time (milliseconds) take ", tr.ElapseSpan().Milliseconds()))
|
||||
return collInfo.schema, nil
|
||||
}
|
||||
|
@ -423,12 +456,13 @@ func (m *MetaCache) GetPartitionInfo(ctx context.Context, collectionName string,
|
|||
}
|
||||
|
||||
// Get the collection information from rootcoord.
|
||||
func (m *MetaCache) describeCollection(ctx context.Context, collectionName string) (*milvuspb.DescribeCollectionResponse, error) {
|
||||
func (m *MetaCache) describeCollection(ctx context.Context, collectionName string, collectionID int64) (*milvuspb.DescribeCollectionResponse, error) {
|
||||
req := &milvuspb.DescribeCollectionRequest{
|
||||
Base: commonpbutil.NewMsgBase(
|
||||
commonpbutil.WithMsgType(commonpb.MsgType_DescribeCollection),
|
||||
),
|
||||
CollectionName: collectionName,
|
||||
CollectionID: collectionID,
|
||||
}
|
||||
coll, err := m.rootCoord.DescribeCollection(ctx, req)
|
||||
if err != nil {
|
||||
|
|
|
@ -127,7 +127,7 @@ func (m *MockRootCoordClientInterface) DescribeCollection(ctx context.Context, i
|
|||
return nil, errors.New("mocked error")
|
||||
}
|
||||
m.IncAccessCount()
|
||||
if in.CollectionName == "collection1" {
|
||||
if in.CollectionName == "collection1" || in.CollectionID == 1 {
|
||||
return &milvuspb.DescribeCollectionResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_Success,
|
||||
|
@ -135,10 +135,11 @@ func (m *MockRootCoordClientInterface) DescribeCollection(ctx context.Context, i
|
|||
CollectionID: typeutil.UniqueID(1),
|
||||
Schema: &schemapb.CollectionSchema{
|
||||
AutoID: true,
|
||||
Name: "collection1",
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
if in.CollectionName == "collection2" {
|
||||
if in.CollectionName == "collection2" || in.CollectionID == 2 {
|
||||
return &milvuspb.DescribeCollectionResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_Success,
|
||||
|
@ -146,6 +147,7 @@ func (m *MockRootCoordClientInterface) DescribeCollection(ctx context.Context, i
|
|||
CollectionID: typeutil.UniqueID(2),
|
||||
Schema: &schemapb.CollectionSchema{
|
||||
AutoID: true,
|
||||
Name: "collection2",
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
@ -230,7 +232,7 @@ func (m *MockQueryCoordClientInterface) ShowCollections(ctx context.Context, req
|
|||
return rsp, nil
|
||||
}
|
||||
|
||||
//Simulate the cache path and the
|
||||
// Simulate the cache path and the
|
||||
func TestMetaCache_GetCollection(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
rootCoord := &MockRootCoordClientInterface{}
|
||||
|
@ -251,6 +253,7 @@ func TestMetaCache_GetCollection(t *testing.T) {
|
|||
assert.Equal(t, schema, &schemapb.CollectionSchema{
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{},
|
||||
Name: "collection1",
|
||||
})
|
||||
id, err = globalMetaCache.GetCollectionID(ctx, "collection2")
|
||||
assert.Equal(t, rootCoord.GetAccessCount(), 2)
|
||||
|
@ -262,6 +265,7 @@ func TestMetaCache_GetCollection(t *testing.T) {
|
|||
assert.Equal(t, schema, &schemapb.CollectionSchema{
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{},
|
||||
Name: "collection2",
|
||||
})
|
||||
|
||||
// test to get from cache, this should trigger root request
|
||||
|
@ -275,10 +279,61 @@ func TestMetaCache_GetCollection(t *testing.T) {
|
|||
assert.Equal(t, schema, &schemapb.CollectionSchema{
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{},
|
||||
Name: "collection1",
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func TestMetaCache_GetCollectionName(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
rootCoord := &MockRootCoordClientInterface{}
|
||||
queryCoord := &MockQueryCoordClientInterface{}
|
||||
mgr := newShardClientMgr()
|
||||
err := InitMetaCache(ctx, rootCoord, queryCoord, mgr)
|
||||
assert.Nil(t, err)
|
||||
|
||||
collection, err := globalMetaCache.GetCollectionName(ctx, 1)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, collection, "collection1")
|
||||
assert.Equal(t, rootCoord.GetAccessCount(), 1)
|
||||
|
||||
// should'nt be accessed to remote root coord.
|
||||
schema, err := globalMetaCache.GetCollectionSchema(ctx, "collection1")
|
||||
assert.Equal(t, rootCoord.GetAccessCount(), 1)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, schema, &schemapb.CollectionSchema{
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{},
|
||||
Name: "collection1",
|
||||
})
|
||||
collection, err = globalMetaCache.GetCollectionName(ctx, 1)
|
||||
assert.Equal(t, rootCoord.GetAccessCount(), 1)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, collection, "collection1")
|
||||
schema, err = globalMetaCache.GetCollectionSchema(ctx, "collection2")
|
||||
assert.Equal(t, rootCoord.GetAccessCount(), 2)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, schema, &schemapb.CollectionSchema{
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{},
|
||||
Name: "collection2",
|
||||
})
|
||||
|
||||
// test to get from cache, this should trigger root request
|
||||
collection, err = globalMetaCache.GetCollectionName(ctx, 1)
|
||||
assert.Equal(t, rootCoord.GetAccessCount(), 2)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, collection, "collection1")
|
||||
schema, err = globalMetaCache.GetCollectionSchema(ctx, "collection1")
|
||||
assert.Equal(t, rootCoord.GetAccessCount(), 2)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, schema, &schemapb.CollectionSchema{
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{},
|
||||
Name: "collection1",
|
||||
})
|
||||
}
|
||||
|
||||
func TestMetaCache_GetCollectionFailure(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
rootCoord := &MockRootCoordClientInterface{}
|
||||
|
@ -299,6 +354,7 @@ func TestMetaCache_GetCollectionFailure(t *testing.T) {
|
|||
assert.Equal(t, schema, &schemapb.CollectionSchema{
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{},
|
||||
Name: "collection1",
|
||||
})
|
||||
|
||||
rootCoord.Error = true
|
||||
|
@ -307,6 +363,7 @@ func TestMetaCache_GetCollectionFailure(t *testing.T) {
|
|||
assert.Equal(t, schema, &schemapb.CollectionSchema{
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{},
|
||||
Name: "collection1",
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -367,6 +424,7 @@ func TestMetaCache_ConcurrentTest1(t *testing.T) {
|
|||
assert.Equal(t, schema, &schemapb.CollectionSchema{
|
||||
AutoID: true,
|
||||
Fields: []*schemapb.FieldSchema{},
|
||||
Name: "collection1",
|
||||
})
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
)
|
||||
|
||||
type getCollectionIDFunc func(ctx context.Context, collectionName string) (typeutil.UniqueID, error)
|
||||
type getCollectionNameFunc func(ctx context.Context, collectionID int64) (string, error)
|
||||
type getCollectionSchemaFunc func(ctx context.Context, collectionName string) (*schemapb.CollectionSchema, error)
|
||||
type getCollectionInfoFunc func(ctx context.Context, collectionName string) (*collectionInfo, error)
|
||||
type getUserRoleFunc func(username string) []string
|
||||
|
@ -16,6 +17,7 @@ type getPartitionIDFunc func(ctx context.Context, collectionName string, partiti
|
|||
type mockCache struct {
|
||||
Cache
|
||||
getIDFunc getCollectionIDFunc
|
||||
getNameFunc getCollectionNameFunc
|
||||
getSchemaFunc getCollectionSchemaFunc
|
||||
getInfoFunc getCollectionInfoFunc
|
||||
getUserRoleFunc getUserRoleFunc
|
||||
|
@ -29,6 +31,13 @@ func (m *mockCache) GetCollectionID(ctx context.Context, collectionName string)
|
|||
return 0, nil
|
||||
}
|
||||
|
||||
func (m *mockCache) GetCollectionName(ctx context.Context, collectionID int64) (string, error) {
|
||||
if m.getIDFunc != nil {
|
||||
return m.getNameFunc(ctx, collectionID)
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (m *mockCache) GetCollectionSchema(ctx context.Context, collectionName string) (*schemapb.CollectionSchema, error) {
|
||||
if m.getSchemaFunc != nil {
|
||||
return m.getSchemaFunc(ctx, collectionName)
|
||||
|
|
|
@ -22,6 +22,7 @@ import (
|
|||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
"github.com/milvus-io/milvus/internal/util/funcutil"
|
||||
"github.com/milvus-io/milvus/internal/util/uniquegenerator"
|
||||
|
||||
|
@ -423,6 +424,60 @@ func (coord *QueryCoordMock) GetShardLeaders(ctx context.Context, req *querypb.G
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (coord *QueryCoordMock) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_Success,
|
||||
Reason: "",
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (coord *QueryCoordMock) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_Success,
|
||||
Reason: "",
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (coord *QueryCoordMock) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_Success,
|
||||
Reason: "",
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (coord *QueryCoordMock) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error) {
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_Success,
|
||||
Reason: "",
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (coord *QueryCoordMock) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
|
||||
return &milvuspb.ListResourceGroupsResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_Success,
|
||||
Reason: "",
|
||||
},
|
||||
ResourceGroups: []string{meta.DefaultResourceGroupName, "rg"},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (coord *QueryCoordMock) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error) {
|
||||
return &querypb.DescribeResourceGroupResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_Success,
|
||||
Reason: "",
|
||||
},
|
||||
ResourceGroup: &querypb.ResourceGroupInfo{
|
||||
Name: "rg",
|
||||
Capacity: 2,
|
||||
NumAvailableNode: 1,
|
||||
NumOutgoingNode: map[int64]int32{1: 1},
|
||||
NumIncomingNode: map[int64]int32{2: 2},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func NewQueryCoordMock(opts ...QueryCoordMockOption) *QueryCoordMock {
|
||||
coord := &QueryCoordMock{
|
||||
nodeID: UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt()),
|
||||
|
|
|
@ -37,6 +37,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/types"
|
||||
"github.com/milvus-io/milvus/internal/util/commonpbutil"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/samber/lo"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -49,26 +50,33 @@ const (
|
|||
OffsetKey = "offset"
|
||||
LimitKey = "limit"
|
||||
|
||||
InsertTaskName = "InsertTask"
|
||||
CreateCollectionTaskName = "CreateCollectionTask"
|
||||
DropCollectionTaskName = "DropCollectionTask"
|
||||
HasCollectionTaskName = "HasCollectionTask"
|
||||
DescribeCollectionTaskName = "DescribeCollectionTask"
|
||||
ShowCollectionTaskName = "ShowCollectionTask"
|
||||
CreatePartitionTaskName = "CreatePartitionTask"
|
||||
DropPartitionTaskName = "DropPartitionTask"
|
||||
HasPartitionTaskName = "HasPartitionTask"
|
||||
ShowPartitionTaskName = "ShowPartitionTask"
|
||||
FlushTaskName = "FlushTask"
|
||||
LoadCollectionTaskName = "LoadCollectionTask"
|
||||
ReleaseCollectionTaskName = "ReleaseCollectionTask"
|
||||
LoadPartitionTaskName = "LoadPartitionsTask"
|
||||
ReleasePartitionTaskName = "ReleasePartitionsTask"
|
||||
deleteTaskName = "DeleteTask"
|
||||
CreateAliasTaskName = "CreateAliasTask"
|
||||
DropAliasTaskName = "DropAliasTask"
|
||||
AlterAliasTaskName = "AlterAliasTask"
|
||||
AlterCollectionTaskName = "AlterCollectionTask"
|
||||
InsertTaskName = "InsertTask"
|
||||
CreateCollectionTaskName = "CreateCollectionTask"
|
||||
DropCollectionTaskName = "DropCollectionTask"
|
||||
HasCollectionTaskName = "HasCollectionTask"
|
||||
DescribeCollectionTaskName = "DescribeCollectionTask"
|
||||
ShowCollectionTaskName = "ShowCollectionTask"
|
||||
CreatePartitionTaskName = "CreatePartitionTask"
|
||||
DropPartitionTaskName = "DropPartitionTask"
|
||||
HasPartitionTaskName = "HasPartitionTask"
|
||||
ShowPartitionTaskName = "ShowPartitionTask"
|
||||
FlushTaskName = "FlushTask"
|
||||
LoadCollectionTaskName = "LoadCollectionTask"
|
||||
ReleaseCollectionTaskName = "ReleaseCollectionTask"
|
||||
LoadPartitionTaskName = "LoadPartitionsTask"
|
||||
ReleasePartitionTaskName = "ReleasePartitionsTask"
|
||||
deleteTaskName = "DeleteTask"
|
||||
CreateAliasTaskName = "CreateAliasTask"
|
||||
DropAliasTaskName = "DropAliasTask"
|
||||
AlterAliasTaskName = "AlterAliasTask"
|
||||
AlterCollectionTaskName = "AlterCollectionTask"
|
||||
UpsertTaskName = "UpsertTask"
|
||||
CreateResourceGroupTaskName = "CreateResourceGroupTask"
|
||||
DropResourceGroupTaskName = "DropResourceGroupTask"
|
||||
TransferNodeTaskName = "TransferNodeTask"
|
||||
TransferReplicaTaskName = "TransferReplicaTask"
|
||||
ListResourceGroupsTaskName = "ListResourceGroupsTask"
|
||||
DescribeResourceGroupTaskName = "DescribeResourceGroupTask"
|
||||
|
||||
// minFloat32 minimum float.
|
||||
minFloat32 = -1 * float32(math.MaxFloat32)
|
||||
|
@ -1907,3 +1915,412 @@ func (a *AlterAliasTask) Execute(ctx context.Context) error {
|
|||
func (a *AlterAliasTask) PostExecute(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type CreateResourceGroupTask struct {
|
||||
Condition
|
||||
*milvuspb.CreateResourceGroupRequest
|
||||
ctx context.Context
|
||||
queryCoord types.QueryCoord
|
||||
result *commonpb.Status
|
||||
}
|
||||
|
||||
func (t *CreateResourceGroupTask) TraceCtx() context.Context {
|
||||
return t.ctx
|
||||
}
|
||||
|
||||
func (t *CreateResourceGroupTask) ID() UniqueID {
|
||||
return t.Base.MsgID
|
||||
}
|
||||
|
||||
func (t *CreateResourceGroupTask) SetID(uid UniqueID) {
|
||||
t.Base.MsgID = uid
|
||||
}
|
||||
|
||||
func (t *CreateResourceGroupTask) Name() string {
|
||||
return CreateResourceGroupTaskName
|
||||
}
|
||||
|
||||
func (t *CreateResourceGroupTask) Type() commonpb.MsgType {
|
||||
return t.Base.MsgType
|
||||
}
|
||||
|
||||
func (t *CreateResourceGroupTask) BeginTs() Timestamp {
|
||||
return t.Base.Timestamp
|
||||
}
|
||||
|
||||
func (t *CreateResourceGroupTask) EndTs() Timestamp {
|
||||
return t.Base.Timestamp
|
||||
}
|
||||
|
||||
func (t *CreateResourceGroupTask) SetTs(ts Timestamp) {
|
||||
t.Base.Timestamp = ts
|
||||
}
|
||||
|
||||
func (t *CreateResourceGroupTask) OnEnqueue() error {
|
||||
t.Base = commonpbutil.NewMsgBase()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *CreateResourceGroupTask) PreExecute(ctx context.Context) error {
|
||||
t.Base.MsgType = commonpb.MsgType_CreateResourceGroup
|
||||
t.Base.SourceID = Params.QueryCoordCfg.GetNodeID()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *CreateResourceGroupTask) Execute(ctx context.Context) error {
|
||||
var err error
|
||||
t.result, err = t.queryCoord.CreateResourceGroup(ctx, t.CreateResourceGroupRequest)
|
||||
return err
|
||||
}
|
||||
|
||||
func (t *CreateResourceGroupTask) PostExecute(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type DropResourceGroupTask struct {
|
||||
Condition
|
||||
*milvuspb.DropResourceGroupRequest
|
||||
ctx context.Context
|
||||
queryCoord types.QueryCoord
|
||||
result *commonpb.Status
|
||||
}
|
||||
|
||||
func (t *DropResourceGroupTask) TraceCtx() context.Context {
|
||||
return t.ctx
|
||||
}
|
||||
|
||||
func (t *DropResourceGroupTask) ID() UniqueID {
|
||||
return t.Base.MsgID
|
||||
}
|
||||
|
||||
func (t *DropResourceGroupTask) SetID(uid UniqueID) {
|
||||
t.Base.MsgID = uid
|
||||
}
|
||||
|
||||
func (t *DropResourceGroupTask) Name() string {
|
||||
return DropResourceGroupTaskName
|
||||
}
|
||||
|
||||
func (t *DropResourceGroupTask) Type() commonpb.MsgType {
|
||||
return t.Base.MsgType
|
||||
}
|
||||
|
||||
func (t *DropResourceGroupTask) BeginTs() Timestamp {
|
||||
return t.Base.Timestamp
|
||||
}
|
||||
|
||||
func (t *DropResourceGroupTask) EndTs() Timestamp {
|
||||
return t.Base.Timestamp
|
||||
}
|
||||
|
||||
func (t *DropResourceGroupTask) SetTs(ts Timestamp) {
|
||||
t.Base.Timestamp = ts
|
||||
}
|
||||
|
||||
func (t *DropResourceGroupTask) OnEnqueue() error {
|
||||
t.Base = commonpbutil.NewMsgBase()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *DropResourceGroupTask) PreExecute(ctx context.Context) error {
|
||||
t.Base.MsgType = commonpb.MsgType_DropResourceGroup
|
||||
t.Base.SourceID = Params.QueryCoordCfg.GetNodeID()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *DropResourceGroupTask) Execute(ctx context.Context) error {
|
||||
var err error
|
||||
t.result, err = t.queryCoord.DropResourceGroup(ctx, t.DropResourceGroupRequest)
|
||||
return err
|
||||
}
|
||||
|
||||
func (t *DropResourceGroupTask) PostExecute(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type DescribeResourceGroupTask struct {
|
||||
Condition
|
||||
*milvuspb.DescribeResourceGroupRequest
|
||||
ctx context.Context
|
||||
queryCoord types.QueryCoord
|
||||
result *milvuspb.DescribeResourceGroupResponse
|
||||
}
|
||||
|
||||
func (t *DescribeResourceGroupTask) TraceCtx() context.Context {
|
||||
return t.ctx
|
||||
}
|
||||
|
||||
func (t *DescribeResourceGroupTask) ID() UniqueID {
|
||||
return t.Base.MsgID
|
||||
}
|
||||
|
||||
func (t *DescribeResourceGroupTask) SetID(uid UniqueID) {
|
||||
t.Base.MsgID = uid
|
||||
}
|
||||
|
||||
func (t *DescribeResourceGroupTask) Name() string {
|
||||
return DescribeResourceGroupTaskName
|
||||
}
|
||||
|
||||
func (t *DescribeResourceGroupTask) Type() commonpb.MsgType {
|
||||
return t.Base.MsgType
|
||||
}
|
||||
|
||||
func (t *DescribeResourceGroupTask) BeginTs() Timestamp {
|
||||
return t.Base.Timestamp
|
||||
}
|
||||
|
||||
func (t *DescribeResourceGroupTask) EndTs() Timestamp {
|
||||
return t.Base.Timestamp
|
||||
}
|
||||
|
||||
func (t *DescribeResourceGroupTask) SetTs(ts Timestamp) {
|
||||
t.Base.Timestamp = ts
|
||||
}
|
||||
|
||||
func (t *DescribeResourceGroupTask) OnEnqueue() error {
|
||||
t.Base = commonpbutil.NewMsgBase()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *DescribeResourceGroupTask) PreExecute(ctx context.Context) error {
|
||||
t.Base.MsgType = commonpb.MsgType_DescribeResourceGroup
|
||||
t.Base.SourceID = Params.QueryCoordCfg.GetNodeID()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *DescribeResourceGroupTask) Execute(ctx context.Context) error {
|
||||
var err error
|
||||
resp, err := t.queryCoord.DescribeResourceGroup(ctx, &querypb.DescribeResourceGroupRequest{
|
||||
ResourceGroup: t.ResourceGroup,
|
||||
})
|
||||
rgInfo := resp.GetResourceGroup()
|
||||
|
||||
getCollectionNameFunc := func(value int32, key int64) string {
|
||||
name, err := globalMetaCache.GetCollectionName(ctx, key)
|
||||
if err != nil {
|
||||
// unreachable logic path
|
||||
return "unavailable_collection"
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
loadReplicas := lo.MapKeys(rgInfo.NumLoadedReplica, getCollectionNameFunc)
|
||||
outgoingNodes := lo.MapKeys(rgInfo.NumOutgoingNode, getCollectionNameFunc)
|
||||
incomingNodes := lo.MapKeys(rgInfo.NumIncomingNode, getCollectionNameFunc)
|
||||
|
||||
t.result = &milvuspb.DescribeResourceGroupResponse{
|
||||
Status: resp.Status,
|
||||
ResourceGroup: &milvuspb.ResourceGroup{
|
||||
Name: rgInfo.GetName(),
|
||||
Capacity: rgInfo.GetCapacity(),
|
||||
NumAvailableNode: rgInfo.NumAvailableNode,
|
||||
NumLoadedReplica: loadReplicas,
|
||||
NumOutgoingNode: outgoingNodes,
|
||||
NumIncomingNode: incomingNodes,
|
||||
},
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (t *DescribeResourceGroupTask) PostExecute(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type TransferNodeTask struct {
|
||||
Condition
|
||||
*milvuspb.TransferNodeRequest
|
||||
ctx context.Context
|
||||
queryCoord types.QueryCoord
|
||||
result *commonpb.Status
|
||||
}
|
||||
|
||||
func (t *TransferNodeTask) TraceCtx() context.Context {
|
||||
return t.ctx
|
||||
}
|
||||
|
||||
func (t *TransferNodeTask) ID() UniqueID {
|
||||
return t.Base.MsgID
|
||||
}
|
||||
|
||||
func (t *TransferNodeTask) SetID(uid UniqueID) {
|
||||
t.Base.MsgID = uid
|
||||
}
|
||||
|
||||
func (t *TransferNodeTask) Name() string {
|
||||
return TransferNodeTaskName
|
||||
}
|
||||
|
||||
func (t *TransferNodeTask) Type() commonpb.MsgType {
|
||||
return t.Base.MsgType
|
||||
}
|
||||
|
||||
func (t *TransferNodeTask) BeginTs() Timestamp {
|
||||
return t.Base.Timestamp
|
||||
}
|
||||
|
||||
func (t *TransferNodeTask) EndTs() Timestamp {
|
||||
return t.Base.Timestamp
|
||||
}
|
||||
|
||||
func (t *TransferNodeTask) SetTs(ts Timestamp) {
|
||||
t.Base.Timestamp = ts
|
||||
}
|
||||
|
||||
func (t *TransferNodeTask) OnEnqueue() error {
|
||||
t.Base = commonpbutil.NewMsgBase()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *TransferNodeTask) PreExecute(ctx context.Context) error {
|
||||
t.Base.MsgType = commonpb.MsgType_TransferNode
|
||||
t.Base.SourceID = Params.QueryCoordCfg.GetNodeID()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *TransferNodeTask) Execute(ctx context.Context) error {
|
||||
var err error
|
||||
t.result, err = t.queryCoord.TransferNode(ctx, t.TransferNodeRequest)
|
||||
return err
|
||||
}
|
||||
|
||||
func (t *TransferNodeTask) PostExecute(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type TransferReplicaTask struct {
|
||||
Condition
|
||||
*milvuspb.TransferReplicaRequest
|
||||
ctx context.Context
|
||||
queryCoord types.QueryCoord
|
||||
result *commonpb.Status
|
||||
}
|
||||
|
||||
func (t *TransferReplicaTask) TraceCtx() context.Context {
|
||||
return t.ctx
|
||||
}
|
||||
|
||||
func (t *TransferReplicaTask) ID() UniqueID {
|
||||
return t.Base.MsgID
|
||||
}
|
||||
|
||||
func (t *TransferReplicaTask) SetID(uid UniqueID) {
|
||||
t.Base.MsgID = uid
|
||||
}
|
||||
|
||||
func (t *TransferReplicaTask) Name() string {
|
||||
return TransferReplicaTaskName
|
||||
}
|
||||
|
||||
func (t *TransferReplicaTask) Type() commonpb.MsgType {
|
||||
return t.Base.MsgType
|
||||
}
|
||||
|
||||
func (t *TransferReplicaTask) BeginTs() Timestamp {
|
||||
return t.Base.Timestamp
|
||||
}
|
||||
|
||||
func (t *TransferReplicaTask) EndTs() Timestamp {
|
||||
return t.Base.Timestamp
|
||||
}
|
||||
|
||||
func (t *TransferReplicaTask) SetTs(ts Timestamp) {
|
||||
t.Base.Timestamp = ts
|
||||
}
|
||||
|
||||
func (t *TransferReplicaTask) OnEnqueue() error {
|
||||
t.Base = commonpbutil.NewMsgBase()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *TransferReplicaTask) PreExecute(ctx context.Context) error {
|
||||
t.Base.MsgType = commonpb.MsgType_TransferReplica
|
||||
t.Base.SourceID = Params.QueryCoordCfg.GetNodeID()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *TransferReplicaTask) Execute(ctx context.Context) error {
|
||||
var err error
|
||||
collID, err := globalMetaCache.GetCollectionID(ctx, t.CollectionName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t.result, err = t.queryCoord.TransferReplica(ctx, &querypb.TransferReplicaRequest{
|
||||
SourceResourceGroup: t.SourceResourceGroup,
|
||||
TargetResourceGroup: t.TargetResourceGroup,
|
||||
CollectionID: collID,
|
||||
NumReplica: t.NumReplica,
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func (t *TransferReplicaTask) PostExecute(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type ListResourceGroupsTask struct {
|
||||
Condition
|
||||
*milvuspb.ListResourceGroupsRequest
|
||||
ctx context.Context
|
||||
queryCoord types.QueryCoord
|
||||
result *milvuspb.ListResourceGroupsResponse
|
||||
}
|
||||
|
||||
func (t *ListResourceGroupsTask) TraceCtx() context.Context {
|
||||
return t.ctx
|
||||
}
|
||||
|
||||
func (t *ListResourceGroupsTask) ID() UniqueID {
|
||||
return t.Base.MsgID
|
||||
}
|
||||
|
||||
func (t *ListResourceGroupsTask) SetID(uid UniqueID) {
|
||||
t.Base.MsgID = uid
|
||||
}
|
||||
|
||||
func (t *ListResourceGroupsTask) Name() string {
|
||||
return ListResourceGroupsTaskName
|
||||
}
|
||||
|
||||
func (t *ListResourceGroupsTask) Type() commonpb.MsgType {
|
||||
return t.Base.MsgType
|
||||
}
|
||||
|
||||
func (t *ListResourceGroupsTask) BeginTs() Timestamp {
|
||||
return t.Base.Timestamp
|
||||
}
|
||||
|
||||
func (t *ListResourceGroupsTask) EndTs() Timestamp {
|
||||
return t.Base.Timestamp
|
||||
}
|
||||
|
||||
func (t *ListResourceGroupsTask) SetTs(ts Timestamp) {
|
||||
t.Base.Timestamp = ts
|
||||
}
|
||||
|
||||
func (t *ListResourceGroupsTask) OnEnqueue() error {
|
||||
t.Base = commonpbutil.NewMsgBase()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *ListResourceGroupsTask) PreExecute(ctx context.Context) error {
|
||||
t.Base.MsgType = commonpb.MsgType_ListResourceGroups
|
||||
t.Base.SourceID = Params.QueryCoordCfg.GetNodeID()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *ListResourceGroupsTask) Execute(ctx context.Context) error {
|
||||
var err error
|
||||
t.result, err = t.queryCoord.ListResourceGroups(ctx, t.ListResourceGroupsRequest)
|
||||
return err
|
||||
}
|
||||
|
||||
func (t *ListResourceGroupsTask) PostExecute(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/mocks"
|
||||
"github.com/stretchr/testify/mock"
|
||||
|
@ -2474,3 +2475,248 @@ func Test_loadPartitionTask_Execute(t *testing.T) {
|
|||
assert.Error(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestCreateResourceGroupTask(t *testing.T) {
|
||||
rc := NewRootCoordMock()
|
||||
rc.Start()
|
||||
defer rc.Stop()
|
||||
qc := NewQueryCoordMock()
|
||||
qc.Start()
|
||||
defer qc.Stop()
|
||||
ctx := context.Background()
|
||||
mgr := newShardClientMgr()
|
||||
InitMetaCache(ctx, rc, qc, mgr)
|
||||
|
||||
createRGReq := &milvuspb.CreateResourceGroupRequest{
|
||||
Base: &commonpb.MsgBase{
|
||||
MsgID: 1,
|
||||
Timestamp: 2,
|
||||
TargetID: 3,
|
||||
},
|
||||
ResourceGroup: "rg",
|
||||
}
|
||||
|
||||
task := &CreateResourceGroupTask{
|
||||
CreateResourceGroupRequest: createRGReq,
|
||||
ctx: ctx,
|
||||
queryCoord: qc,
|
||||
}
|
||||
task.PreExecute(ctx)
|
||||
|
||||
assert.Equal(t, commonpb.MsgType_CreateResourceGroup, task.Type())
|
||||
assert.Equal(t, UniqueID(1), task.ID())
|
||||
assert.Equal(t, Timestamp(2), task.BeginTs())
|
||||
assert.Equal(t, Timestamp(2), task.EndTs())
|
||||
assert.Equal(t, Params.QueryCoordCfg.GetNodeID(), task.Base.GetSourceID())
|
||||
assert.Equal(t, UniqueID(3), task.Base.GetTargetID())
|
||||
|
||||
err := task.Execute(ctx)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, commonpb.ErrorCode_Success, task.result.ErrorCode)
|
||||
}
|
||||
|
||||
func TestDropResourceGroupTask(t *testing.T) {
|
||||
rc := NewRootCoordMock()
|
||||
rc.Start()
|
||||
defer rc.Stop()
|
||||
qc := NewQueryCoordMock()
|
||||
qc.Start()
|
||||
defer qc.Stop()
|
||||
ctx := context.Background()
|
||||
mgr := newShardClientMgr()
|
||||
InitMetaCache(ctx, rc, qc, mgr)
|
||||
|
||||
dropRGReq := &milvuspb.DropResourceGroupRequest{
|
||||
Base: &commonpb.MsgBase{
|
||||
MsgID: 1,
|
||||
Timestamp: 2,
|
||||
TargetID: 3,
|
||||
},
|
||||
ResourceGroup: "rg",
|
||||
}
|
||||
|
||||
task := &DropResourceGroupTask{
|
||||
DropResourceGroupRequest: dropRGReq,
|
||||
ctx: ctx,
|
||||
queryCoord: qc,
|
||||
}
|
||||
task.PreExecute(ctx)
|
||||
|
||||
assert.Equal(t, commonpb.MsgType_DropResourceGroup, task.Type())
|
||||
assert.Equal(t, UniqueID(1), task.ID())
|
||||
assert.Equal(t, Timestamp(2), task.BeginTs())
|
||||
assert.Equal(t, Timestamp(2), task.EndTs())
|
||||
assert.Equal(t, Params.QueryCoordCfg.GetNodeID(), task.Base.GetSourceID())
|
||||
assert.Equal(t, UniqueID(3), task.Base.GetTargetID())
|
||||
|
||||
err := task.Execute(ctx)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, commonpb.ErrorCode_Success, task.result.ErrorCode)
|
||||
}
|
||||
|
||||
func TestTransferNodeTask(t *testing.T) {
|
||||
rc := NewRootCoordMock()
|
||||
rc.Start()
|
||||
defer rc.Stop()
|
||||
qc := NewQueryCoordMock()
|
||||
qc.Start()
|
||||
defer qc.Stop()
|
||||
ctx := context.Background()
|
||||
mgr := newShardClientMgr()
|
||||
InitMetaCache(ctx, rc, qc, mgr)
|
||||
|
||||
req := &milvuspb.TransferNodeRequest{
|
||||
Base: &commonpb.MsgBase{
|
||||
MsgID: 1,
|
||||
Timestamp: 2,
|
||||
TargetID: 3,
|
||||
},
|
||||
SourceResourceGroup: "rg1",
|
||||
TargetResourceGroup: "rg2",
|
||||
NumNode: 1,
|
||||
}
|
||||
|
||||
task := &TransferNodeTask{
|
||||
TransferNodeRequest: req,
|
||||
ctx: ctx,
|
||||
queryCoord: qc,
|
||||
}
|
||||
task.PreExecute(ctx)
|
||||
|
||||
assert.Equal(t, commonpb.MsgType_TransferNode, task.Type())
|
||||
assert.Equal(t, UniqueID(1), task.ID())
|
||||
assert.Equal(t, Timestamp(2), task.BeginTs())
|
||||
assert.Equal(t, Timestamp(2), task.EndTs())
|
||||
assert.Equal(t, Params.QueryCoordCfg.GetNodeID(), task.Base.GetSourceID())
|
||||
assert.Equal(t, UniqueID(3), task.Base.GetTargetID())
|
||||
|
||||
err := task.Execute(ctx)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, commonpb.ErrorCode_Success, task.result.ErrorCode)
|
||||
}
|
||||
|
||||
func TestTransferReplicaTask(t *testing.T) {
|
||||
rc := &MockRootCoordClientInterface{}
|
||||
qc := NewQueryCoordMock()
|
||||
qc.Start()
|
||||
defer qc.Stop()
|
||||
ctx := context.Background()
|
||||
mgr := newShardClientMgr()
|
||||
InitMetaCache(ctx, rc, qc, mgr)
|
||||
// make it avoid remote call on rc
|
||||
globalMetaCache.GetCollectionSchema(context.Background(), "collection1")
|
||||
|
||||
req := &milvuspb.TransferReplicaRequest{
|
||||
Base: &commonpb.MsgBase{
|
||||
MsgID: 1,
|
||||
Timestamp: 2,
|
||||
TargetID: 3,
|
||||
},
|
||||
CollectionName: "collection1",
|
||||
SourceResourceGroup: "rg1",
|
||||
TargetResourceGroup: "rg2",
|
||||
NumReplica: 1,
|
||||
}
|
||||
|
||||
task := &TransferReplicaTask{
|
||||
TransferReplicaRequest: req,
|
||||
ctx: ctx,
|
||||
queryCoord: qc,
|
||||
}
|
||||
task.PreExecute(ctx)
|
||||
|
||||
assert.Equal(t, commonpb.MsgType_TransferReplica, task.Type())
|
||||
assert.Equal(t, UniqueID(1), task.ID())
|
||||
assert.Equal(t, Timestamp(2), task.BeginTs())
|
||||
assert.Equal(t, Timestamp(2), task.EndTs())
|
||||
assert.Equal(t, Params.QueryCoordCfg.GetNodeID(), task.Base.GetSourceID())
|
||||
assert.Equal(t, UniqueID(3), task.Base.GetTargetID())
|
||||
|
||||
err := task.Execute(ctx)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, commonpb.ErrorCode_Success, task.result.ErrorCode)
|
||||
}
|
||||
|
||||
func TestListResourceGroupsTask(t *testing.T) {
|
||||
rc := &MockRootCoordClientInterface{}
|
||||
qc := NewQueryCoordMock()
|
||||
qc.Start()
|
||||
defer qc.Stop()
|
||||
ctx := context.Background()
|
||||
mgr := newShardClientMgr()
|
||||
InitMetaCache(ctx, rc, qc, mgr)
|
||||
|
||||
req := &milvuspb.ListResourceGroupsRequest{
|
||||
Base: &commonpb.MsgBase{
|
||||
MsgID: 1,
|
||||
Timestamp: 2,
|
||||
TargetID: 3,
|
||||
},
|
||||
}
|
||||
|
||||
task := &ListResourceGroupsTask{
|
||||
ListResourceGroupsRequest: req,
|
||||
ctx: ctx,
|
||||
queryCoord: qc,
|
||||
}
|
||||
task.PreExecute(ctx)
|
||||
|
||||
assert.Equal(t, commonpb.MsgType_ListResourceGroups, task.Type())
|
||||
assert.Equal(t, UniqueID(1), task.ID())
|
||||
assert.Equal(t, Timestamp(2), task.BeginTs())
|
||||
assert.Equal(t, Timestamp(2), task.EndTs())
|
||||
assert.Equal(t, Params.QueryCoordCfg.GetNodeID(), task.Base.GetSourceID())
|
||||
assert.Equal(t, UniqueID(3), task.Base.GetTargetID())
|
||||
|
||||
err := task.Execute(ctx)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, commonpb.ErrorCode_Success, task.result.Status.ErrorCode)
|
||||
groups := task.result.GetResourceGroups()
|
||||
assert.Contains(t, groups, meta.DefaultResourceGroupName)
|
||||
assert.Contains(t, groups, "rg")
|
||||
}
|
||||
|
||||
func TestDescribeResourceGroupTask(t *testing.T) {
|
||||
rc := &MockRootCoordClientInterface{}
|
||||
qc := NewQueryCoordMock()
|
||||
qc.Start()
|
||||
defer qc.Stop()
|
||||
ctx := context.Background()
|
||||
mgr := newShardClientMgr()
|
||||
InitMetaCache(ctx, rc, qc, mgr)
|
||||
// make it avoid remote call on rc
|
||||
globalMetaCache.GetCollectionSchema(context.Background(), "collection1")
|
||||
globalMetaCache.GetCollectionSchema(context.Background(), "collection2")
|
||||
|
||||
req := &milvuspb.DescribeResourceGroupRequest{
|
||||
Base: &commonpb.MsgBase{
|
||||
MsgID: 1,
|
||||
Timestamp: 2,
|
||||
TargetID: 3,
|
||||
},
|
||||
ResourceGroup: "rg",
|
||||
}
|
||||
|
||||
task := &DescribeResourceGroupTask{
|
||||
DescribeResourceGroupRequest: req,
|
||||
ctx: ctx,
|
||||
queryCoord: qc,
|
||||
}
|
||||
task.PreExecute(ctx)
|
||||
|
||||
assert.Equal(t, commonpb.MsgType_DescribeResourceGroup, task.Type())
|
||||
assert.Equal(t, UniqueID(1), task.ID())
|
||||
assert.Equal(t, Timestamp(2), task.BeginTs())
|
||||
assert.Equal(t, Timestamp(2), task.EndTs())
|
||||
assert.Equal(t, Params.QueryCoordCfg.GetNodeID(), task.Base.GetSourceID())
|
||||
assert.Equal(t, UniqueID(3), task.Base.GetTargetID())
|
||||
|
||||
err := task.Execute(ctx)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, commonpb.ErrorCode_Success, task.result.Status.ErrorCode)
|
||||
groupInfo := task.result.GetResourceGroup()
|
||||
outgoingNodeNum := groupInfo.GetNumOutgoingNode()
|
||||
incomingNodeNum := groupInfo.GetNumIncomingNode()
|
||||
assert.NotNil(t, outgoingNodeNum["collection1"])
|
||||
assert.NotNil(t, incomingNodeNum["collection2"])
|
||||
}
|
||||
|
|
|
@ -104,7 +104,7 @@ func (b *RowCountBasedBalancer) Balance() ([]SegmentAssignPlan, []ChannelAssignP
|
|||
}
|
||||
|
||||
func (b *RowCountBasedBalancer) balanceReplica(replica *meta.Replica) ([]SegmentAssignPlan, []ChannelAssignPlan) {
|
||||
nodes := replica.Nodes.Collect()
|
||||
nodes := replica.GetNodes()
|
||||
if len(nodes) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
@ -112,6 +112,9 @@ func (b *RowCountBasedBalancer) balanceReplica(replica *meta.Replica) ([]Segment
|
|||
nodesSegments := make(map[int64][]*meta.Segment)
|
||||
stoppingNodesSegments := make(map[int64][]*meta.Segment)
|
||||
|
||||
outboundNodes := b.meta.ResourceManager.CheckOutboundNodes(replica)
|
||||
log.Info("nodes info", zap.Int64("collection", replica.CollectionID), zap.Int64("replica", replica.ID), zap.Int64s("nodes", nodes))
|
||||
|
||||
totalCnt := 0
|
||||
for _, nid := range nodes {
|
||||
segments := b.dist.SegmentDistManager.GetByCollectionAndNode(replica.GetCollectionID(), nid)
|
||||
|
@ -125,6 +128,14 @@ func (b *RowCountBasedBalancer) balanceReplica(replica *meta.Replica) ([]Segment
|
|||
continue
|
||||
} else if isStopping {
|
||||
stoppingNodesSegments[nid] = segments
|
||||
} else if outboundNodes.Contain(nid) {
|
||||
// if node is stop or transfer to other rg
|
||||
log.RatedInfo(10, "meet outbound node, try to move out all segment/channel",
|
||||
zap.Int64("collectionID", replica.GetCollectionID()),
|
||||
zap.Int64("replicaID", replica.GetCollectionID()),
|
||||
zap.Int64("node", nid),
|
||||
)
|
||||
stoppingNodesSegments[nid] = segments
|
||||
} else {
|
||||
nodesSegments[nid] = segments
|
||||
}
|
||||
|
@ -220,7 +231,7 @@ outer:
|
|||
node.setPriority(node.getPriority() + int(s.GetNumOfRows()))
|
||||
queue.push(node)
|
||||
}
|
||||
return plans, b.getChannelPlan(replica, stoppingNodesSegments)
|
||||
return plans, b.getChannelPlan(replica, lo.Keys(nodesSegments), lo.Keys(stoppingNodesSegments))
|
||||
}
|
||||
|
||||
func (b *RowCountBasedBalancer) handleStoppingNodes(replica *meta.Replica, nodeSegments map[int64][]*meta.Segment) ([]SegmentAssignPlan, []ChannelAssignPlan) {
|
||||
|
@ -267,17 +278,11 @@ func (b *RowCountBasedBalancer) collectionStoppingSegments(stoppingNodesSegments
|
|||
return segments, removeRowCnt
|
||||
}
|
||||
|
||||
func (b *RowCountBasedBalancer) getChannelPlan(replica *meta.Replica, stoppingNodesSegments map[int64][]*meta.Segment) []ChannelAssignPlan {
|
||||
// maybe it will have some strategies to balance the channel in the future
|
||||
// but now, only balance the channel for the stopping nodes.
|
||||
return b.getChannelPlanForStoppingNodes(replica, stoppingNodesSegments)
|
||||
}
|
||||
|
||||
func (b *RowCountBasedBalancer) getChannelPlanForStoppingNodes(replica *meta.Replica, stoppingNodesSegments map[int64][]*meta.Segment) []ChannelAssignPlan {
|
||||
func (b *RowCountBasedBalancer) getChannelPlan(replica *meta.Replica, onlineNodes []int64, offlineNodes []int64) []ChannelAssignPlan {
|
||||
channelPlans := make([]ChannelAssignPlan, 0)
|
||||
for nodeID := range stoppingNodesSegments {
|
||||
for _, nodeID := range offlineNodes {
|
||||
dmChannels := b.dist.ChannelDistManager.GetByCollectionAndNode(replica.GetCollectionID(), nodeID)
|
||||
plans := b.AssignChannel(dmChannels, replica.Replica.GetNodes())
|
||||
plans := b.AssignChannel(dmChannels, onlineNodes)
|
||||
for i := range plans {
|
||||
plans[i].From = nodeID
|
||||
plans[i].ReplicaID = replica.ID
|
||||
|
|
|
@ -62,11 +62,11 @@ func (suite *RowCountBasedBalancerTestSuite) SetupTest() {
|
|||
|
||||
store := meta.NewMetaStore(suite.kv)
|
||||
idAllocator := RandomIncrementIDAllocator()
|
||||
testMeta := meta.NewMeta(idAllocator, store)
|
||||
nodeManager := session.NewNodeManager()
|
||||
testMeta := meta.NewMeta(idAllocator, store, nodeManager)
|
||||
testTarget := meta.NewTargetManager(suite.broker, testMeta)
|
||||
|
||||
distManager := meta.NewDistributionManager()
|
||||
nodeManager := session.NewNodeManager()
|
||||
suite.mockScheduler = task.NewMockScheduler(suite.T())
|
||||
suite.balancer = NewRowCountBasedBalancer(suite.mockScheduler, nodeManager, distManager, testMeta, testTarget)
|
||||
}
|
||||
|
@ -272,8 +272,10 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
|
|||
for i := range c.nodes {
|
||||
nodeInfo := session.NewNodeInfo(c.nodes[i], "127.0.0.1:0")
|
||||
nodeInfo.UpdateStats(session.WithSegmentCnt(c.segmentCnts[i]))
|
||||
nodeInfo.UpdateStats(session.WithChannelCnt(len(c.distributionChannels[c.nodes[i]])))
|
||||
nodeInfo.SetState(c.states[i])
|
||||
suite.balancer.nodeManager.Add(nodeInfo)
|
||||
suite.balancer.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, c.nodes[i])
|
||||
}
|
||||
segmentPlans, channelPlans := balancer.Balance()
|
||||
suite.ElementsMatch(c.expectChannelPlans, channelPlans)
|
||||
|
@ -283,6 +285,111 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
|
|||
|
||||
}
|
||||
|
||||
func (suite *RowCountBasedBalancerTestSuite) TestBalanceOutboundNodes() {
|
||||
cases := []struct {
|
||||
name string
|
||||
nodes []int64
|
||||
notExistedNodes []int64
|
||||
segmentCnts []int
|
||||
states []session.State
|
||||
shouldMock bool
|
||||
distributions map[int64][]*meta.Segment
|
||||
distributionChannels map[int64][]*meta.DmChannel
|
||||
expectPlans []SegmentAssignPlan
|
||||
expectChannelPlans []ChannelAssignPlan
|
||||
}{
|
||||
{
|
||||
name: "balance out bound nodes",
|
||||
nodes: []int64{1, 2, 3},
|
||||
segmentCnts: []int{1, 2, 2},
|
||||
states: []session.State{session.NodeStateNormal, session.NodeStateNormal, session.NodeStateNormal},
|
||||
shouldMock: true,
|
||||
distributions: map[int64][]*meta.Segment{
|
||||
1: {{SegmentInfo: &datapb.SegmentInfo{ID: 1, CollectionID: 1, NumOfRows: 10}, Node: 1}},
|
||||
2: {
|
||||
{SegmentInfo: &datapb.SegmentInfo{ID: 2, CollectionID: 1, NumOfRows: 20}, Node: 2},
|
||||
{SegmentInfo: &datapb.SegmentInfo{ID: 3, CollectionID: 1, NumOfRows: 30}, Node: 2},
|
||||
},
|
||||
3: {
|
||||
{SegmentInfo: &datapb.SegmentInfo{ID: 4, CollectionID: 1, NumOfRows: 10}, Node: 3},
|
||||
{SegmentInfo: &datapb.SegmentInfo{ID: 5, CollectionID: 1, NumOfRows: 10}, Node: 3},
|
||||
},
|
||||
},
|
||||
distributionChannels: map[int64][]*meta.DmChannel{
|
||||
2: {
|
||||
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2},
|
||||
},
|
||||
3: {
|
||||
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 3},
|
||||
},
|
||||
},
|
||||
expectPlans: []SegmentAssignPlan{
|
||||
{Segment: &meta.Segment{SegmentInfo: &datapb.SegmentInfo{ID: 4, CollectionID: 1, NumOfRows: 10}, Node: 3}, From: 3, To: 1, ReplicaID: 1, Weight: weightHigh},
|
||||
{Segment: &meta.Segment{SegmentInfo: &datapb.SegmentInfo{ID: 5, CollectionID: 1, NumOfRows: 10}, Node: 3}, From: 3, To: 1, ReplicaID: 1, Weight: weightHigh},
|
||||
},
|
||||
expectChannelPlans: []ChannelAssignPlan{
|
||||
{Channel: &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 3}, From: 3, To: 1, ReplicaID: 1, Weight: weightHigh},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
suite.mockScheduler.Mock.On("GetNodeChannelDelta", mock.Anything).Return(0)
|
||||
for _, c := range cases {
|
||||
suite.Run(c.name, func() {
|
||||
suite.SetupSuite()
|
||||
defer suite.TearDownTest()
|
||||
balancer := suite.balancer
|
||||
collection := utils.CreateTestCollection(1, 1)
|
||||
segments := []*datapb.SegmentBinlogs{
|
||||
{
|
||||
SegmentID: 1,
|
||||
},
|
||||
{
|
||||
SegmentID: 2,
|
||||
},
|
||||
{
|
||||
SegmentID: 3,
|
||||
},
|
||||
{
|
||||
SegmentID: 4,
|
||||
},
|
||||
{
|
||||
SegmentID: 5,
|
||||
},
|
||||
}
|
||||
suite.broker.EXPECT().GetRecoveryInfo(mock.Anything, int64(1), int64(1)).Return(
|
||||
nil, segments, nil)
|
||||
balancer.targetMgr.UpdateCollectionNextTargetWithPartitions(int64(1), int64(1))
|
||||
balancer.targetMgr.UpdateCollectionCurrentTarget(1, 1)
|
||||
collection.LoadPercentage = 100
|
||||
collection.Status = querypb.LoadStatus_Loaded
|
||||
balancer.meta.CollectionManager.PutCollection(collection)
|
||||
balancer.meta.ReplicaManager.Put(utils.CreateTestReplica(1, 1, append(c.nodes, c.notExistedNodes...)))
|
||||
for node, s := range c.distributions {
|
||||
balancer.dist.SegmentDistManager.Update(node, s...)
|
||||
}
|
||||
for node, v := range c.distributionChannels {
|
||||
balancer.dist.ChannelDistManager.Update(node, v...)
|
||||
}
|
||||
for i := range c.nodes {
|
||||
nodeInfo := session.NewNodeInfo(c.nodes[i], "127.0.0.1:0")
|
||||
nodeInfo.UpdateStats(session.WithSegmentCnt(c.segmentCnts[i]))
|
||||
nodeInfo.UpdateStats(session.WithChannelCnt(len(c.distributionChannels[c.nodes[i]])))
|
||||
nodeInfo.SetState(c.states[i])
|
||||
suite.balancer.nodeManager.Add(nodeInfo)
|
||||
}
|
||||
// make node-3 outbound
|
||||
err := balancer.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, 1)
|
||||
suite.NoError(err)
|
||||
err = balancer.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, 2)
|
||||
suite.NoError(err)
|
||||
segmentPlans, channelPlans := balancer.Balance()
|
||||
suite.ElementsMatch(c.expectChannelPlans, channelPlans)
|
||||
suite.ElementsMatch(c.expectPlans, segmentPlans)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *RowCountBasedBalancerTestSuite) TestBalanceOnLoadingCollection() {
|
||||
cases := []struct {
|
||||
name string
|
||||
|
|
|
@ -26,6 +26,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/querycoordv2/task"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/samber/lo"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
|
@ -133,7 +134,7 @@ func (c *ChannelChecker) getDmChannelDiff(targetMgr *meta.TargetManager,
|
|||
|
||||
func (c *ChannelChecker) getChannelDist(distMgr *meta.DistributionManager, replica *meta.Replica) []*meta.DmChannel {
|
||||
dist := make([]*meta.DmChannel, 0)
|
||||
for _, nodeID := range replica.Nodes.Collect() {
|
||||
for _, nodeID := range replica.GetNodes() {
|
||||
dist = append(dist, distMgr.ChannelDistManager.GetByCollectionAndNode(replica.GetCollectionID(), nodeID)...)
|
||||
}
|
||||
return dist
|
||||
|
@ -169,7 +170,11 @@ func (c *ChannelChecker) findRepeatedChannels(distMgr *meta.DistributionManager,
|
|||
}
|
||||
|
||||
func (c *ChannelChecker) createChannelLoadTask(ctx context.Context, channels []*meta.DmChannel, replica *meta.Replica) []task.Task {
|
||||
plans := c.balancer.AssignChannel(channels, replica.Replica.GetNodes())
|
||||
outboundNodes := c.meta.ResourceManager.CheckOutboundNodes(replica)
|
||||
availableNodes := lo.Filter(replica.Replica.GetNodes(), func(node int64, _ int) bool {
|
||||
return !outboundNodes.Contain(node)
|
||||
})
|
||||
plans := c.balancer.AssignChannel(channels, availableNodes)
|
||||
for i := range plans {
|
||||
plans[i].ReplicaID = replica.GetID()
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/querycoordv2/balance"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/task"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
|
||||
"github.com/milvus-io/milvus/internal/util/etcd"
|
||||
|
@ -39,6 +40,8 @@ type ChannelCheckerTestSuite struct {
|
|||
checker *ChannelChecker
|
||||
meta *meta.Meta
|
||||
broker *meta.MockBroker
|
||||
|
||||
nodeMgr *session.NodeManager
|
||||
}
|
||||
|
||||
func (suite *ChannelCheckerTestSuite) SetupSuite() {
|
||||
|
@ -62,7 +65,8 @@ func (suite *ChannelCheckerTestSuite) SetupTest() {
|
|||
// meta
|
||||
store := meta.NewMetaStore(suite.kv)
|
||||
idAllocator := RandomIncrementIDAllocator()
|
||||
suite.meta = meta.NewMeta(idAllocator, store)
|
||||
suite.nodeMgr = session.NewNodeManager()
|
||||
suite.meta = meta.NewMeta(idAllocator, store, suite.nodeMgr)
|
||||
suite.broker = meta.NewMockBroker(suite.T())
|
||||
targetManager := meta.NewTargetManager(suite.broker, suite.meta)
|
||||
|
||||
|
@ -98,6 +102,8 @@ func (suite *ChannelCheckerTestSuite) TestLoadChannel() {
|
|||
checker := suite.checker
|
||||
checker.meta.CollectionManager.PutCollection(utils.CreateTestCollection(1, 1))
|
||||
checker.meta.ReplicaManager.Put(utils.CreateTestReplica(1, 1, []int64{1}))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
|
||||
checker.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, 1)
|
||||
|
||||
channels := []*datapb.VchannelInfo{
|
||||
{
|
||||
|
|
|
@ -28,6 +28,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/querycoordv2/task"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/samber/lo"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
|
@ -143,7 +144,7 @@ func (c *SegmentChecker) getStreamingSegmentDiff(targetMgr *meta.TargetManager,
|
|||
|
||||
func (c *SegmentChecker) getStreamingSegmentsDist(distMgr *meta.DistributionManager, replica *meta.Replica) map[int64]*meta.Segment {
|
||||
segments := make(map[int64]*meta.Segment, 0)
|
||||
for _, node := range replica.Nodes.Collect() {
|
||||
for _, node := range replica.GetNodes() {
|
||||
segmentsOnNodes := distMgr.LeaderViewManager.GetGrowingSegmentDistByCollectionAndNode(replica.CollectionID, node)
|
||||
for k, v := range segmentsOnNodes {
|
||||
segments[k] = v
|
||||
|
@ -195,7 +196,7 @@ func (c *SegmentChecker) getHistoricalSegmentDiff(targetMgr *meta.TargetManager,
|
|||
|
||||
func (c *SegmentChecker) getHistoricalSegmentsDist(distMgr *meta.DistributionManager, replica *meta.Replica) []*meta.Segment {
|
||||
ret := make([]*meta.Segment, 0)
|
||||
for _, node := range replica.Nodes.Collect() {
|
||||
for _, node := range replica.GetNodes() {
|
||||
ret = append(ret, distMgr.SegmentDistManager.GetByCollectionAndNode(replica.CollectionID, node)...)
|
||||
}
|
||||
return ret
|
||||
|
@ -265,7 +266,11 @@ func (c *SegmentChecker) createSegmentLoadTasks(ctx context.Context, segments []
|
|||
}
|
||||
packedSegments = append(packedSegments, &meta.Segment{SegmentInfo: s})
|
||||
}
|
||||
plans := c.balancer.AssignSegment(packedSegments, replica.Replica.GetNodes())
|
||||
outboundNodes := c.meta.ResourceManager.CheckOutboundNodes(replica)
|
||||
availableNodes := lo.Filter(replica.Replica.GetNodes(), func(node int64, _ int) bool {
|
||||
return !outboundNodes.Contain(node)
|
||||
})
|
||||
plans := c.balancer.AssignSegment(packedSegments, availableNodes)
|
||||
for i := range plans {
|
||||
plans[i].ReplicaID = replica.GetID()
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/querycoordv2/balance"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/task"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
|
||||
"github.com/milvus-io/milvus/internal/util/etcd"
|
||||
|
@ -41,6 +42,7 @@ type SegmentCheckerTestSuite struct {
|
|||
checker *SegmentChecker
|
||||
meta *meta.Meta
|
||||
broker *meta.MockBroker
|
||||
nodeMgr *session.NodeManager
|
||||
}
|
||||
|
||||
func (suite *SegmentCheckerTestSuite) SetupSuite() {
|
||||
|
@ -64,7 +66,8 @@ func (suite *SegmentCheckerTestSuite) SetupTest() {
|
|||
// meta
|
||||
store := meta.NewMetaStore(suite.kv)
|
||||
idAllocator := RandomIncrementIDAllocator()
|
||||
suite.meta = meta.NewMeta(idAllocator, store)
|
||||
suite.nodeMgr = session.NewNodeManager()
|
||||
suite.meta = meta.NewMeta(idAllocator, store, suite.nodeMgr)
|
||||
distManager := meta.NewDistributionManager()
|
||||
suite.broker = meta.NewMockBroker(suite.T())
|
||||
targetManager := meta.NewTargetManager(suite.broker, suite.meta)
|
||||
|
@ -100,6 +103,10 @@ func (suite *SegmentCheckerTestSuite) TestLoadSegments() {
|
|||
// set meta
|
||||
checker.meta.CollectionManager.PutCollection(utils.CreateTestCollection(1, 1))
|
||||
checker.meta.ReplicaManager.Put(utils.CreateTestReplica(1, 1, []int64{1, 2}))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
|
||||
checker.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, 1)
|
||||
checker.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, 2)
|
||||
|
||||
// set target
|
||||
segments := []*datapb.SegmentBinlogs{
|
||||
|
|
|
@ -65,7 +65,7 @@ func (suite *DistControllerTestSuite) SetupTest() {
|
|||
// meta
|
||||
store := meta.NewMetaStore(suite.kv)
|
||||
idAllocator := RandomIncrementIDAllocator()
|
||||
suite.meta = meta.NewMeta(idAllocator, store)
|
||||
suite.meta = meta.NewMeta(idAllocator, store, session.NewNodeManager())
|
||||
|
||||
suite.mockCluster = session.NewMockCluster(suite.T())
|
||||
nodeManager := session.NewNodeManager()
|
||||
|
|
|
@ -46,7 +46,7 @@ import (
|
|||
func (s *Server) checkAnyReplicaAvailable(collectionID int64) bool {
|
||||
for _, replica := range s.meta.ReplicaManager.GetByCollection(collectionID) {
|
||||
isAvailable := true
|
||||
for node := range replica.Nodes {
|
||||
for _, node := range replica.GetNodes() {
|
||||
if s.nodeMgr.Get(node) == nil {
|
||||
isAvailable = false
|
||||
break
|
||||
|
@ -80,7 +80,11 @@ func (s *Server) balanceSegments(ctx context.Context, req *querypb.LoadBalanceRe
|
|||
srcNode := req.GetSourceNodeIDs()[0]
|
||||
dstNodeSet := typeutil.NewUniqueSet(req.GetDstNodeIDs()...)
|
||||
if dstNodeSet.Len() == 0 {
|
||||
dstNodeSet.Insert(replica.GetNodes()...)
|
||||
outboundNodes := s.meta.ResourceManager.CheckOutboundNodes(replica)
|
||||
availableNodes := lo.Filter(replica.Replica.GetNodes(), func(node int64, _ int) bool {
|
||||
return !outboundNodes.Contain(node)
|
||||
})
|
||||
dstNodeSet.Insert(availableNodes...)
|
||||
}
|
||||
dstNodeSet.Remove(srcNode)
|
||||
|
||||
|
@ -288,7 +292,13 @@ func (s *Server) tryGetNodesMetrics(ctx context.Context, req *milvuspb.GetMetric
|
|||
}
|
||||
|
||||
func (s *Server) fillReplicaInfo(replica *meta.Replica, withShardNodes bool) (*milvuspb.ReplicaInfo, error) {
|
||||
info := utils.Replica2ReplicaInfo(replica.Replica)
|
||||
info := &milvuspb.ReplicaInfo{
|
||||
ReplicaID: replica.GetID(),
|
||||
CollectionID: replica.GetCollectionID(),
|
||||
NodeIds: replica.GetNodes(),
|
||||
ResourceGroupName: replica.GetResourceGroup(),
|
||||
NumOutboundNode: s.meta.GetOutgoingNodeNumByReplica(replica),
|
||||
}
|
||||
|
||||
channels := s.targetMgr.GetDmChannelsByCollection(replica.GetCollectionID(), meta.CurrentTarget)
|
||||
if len(channels) == 0 {
|
||||
|
@ -321,7 +331,7 @@ func (s *Server) fillReplicaInfo(replica *meta.Replica, withShardNodes bool) (*m
|
|||
}
|
||||
if withShardNodes {
|
||||
shardNodes := lo.FilterMap(segments, func(segment *meta.Segment, _ int) (int64, bool) {
|
||||
if replica.Nodes.Contain(segment.Node) {
|
||||
if replica.Contains(segment.Node) {
|
||||
return segment.Node, true
|
||||
}
|
||||
return 0, false
|
||||
|
|
|
@ -192,10 +192,11 @@ func (job *LoadCollectionJob) Execute() error {
|
|||
}
|
||||
|
||||
// Create replicas
|
||||
replicas, err := utils.SpawnReplicas(job.meta.ReplicaManager,
|
||||
job.nodeMgr,
|
||||
replicas, err := utils.SpawnReplicasWithRG(job.meta,
|
||||
req.GetCollectionID(),
|
||||
req.GetReplicaNumber())
|
||||
req.GetResourceGroups(),
|
||||
req.GetReplicaNumber(),
|
||||
)
|
||||
if err != nil {
|
||||
msg := "failed to spawn replica for collection"
|
||||
log.Error(msg, zap.Error(err))
|
||||
|
@ -204,7 +205,8 @@ func (job *LoadCollectionJob) Execute() error {
|
|||
for _, replica := range replicas {
|
||||
log.Info("replica created",
|
||||
zap.Int64("replicaID", replica.GetID()),
|
||||
zap.Int64s("nodes", replica.GetNodes()))
|
||||
zap.Int64s("nodes", replica.GetNodes()),
|
||||
zap.String("resourceGroup", replica.GetResourceGroup()))
|
||||
}
|
||||
|
||||
// Fetch channels and segments from DataCoord
|
||||
|
@ -405,10 +407,11 @@ func (job *LoadPartitionJob) Execute() error {
|
|||
}
|
||||
|
||||
// Create replicas
|
||||
replicas, err := utils.SpawnReplicas(job.meta.ReplicaManager,
|
||||
job.nodeMgr,
|
||||
replicas, err := utils.SpawnReplicasWithRG(job.meta,
|
||||
req.GetCollectionID(),
|
||||
req.GetReplicaNumber())
|
||||
req.GetResourceGroups(),
|
||||
req.GetReplicaNumber(),
|
||||
)
|
||||
if err != nil {
|
||||
msg := "failed to spawn replica for collection"
|
||||
log.Error(msg, zap.Error(err))
|
||||
|
@ -417,7 +420,8 @@ func (job *LoadPartitionJob) Execute() error {
|
|||
for _, replica := range replicas {
|
||||
log.Info("replica created",
|
||||
zap.Int64("replicaID", replica.GetID()),
|
||||
zap.Int64s("nodes", replica.GetNodes()))
|
||||
zap.Int64s("nodes", replica.GetNodes()),
|
||||
zap.String("resourceGroup", replica.GetResourceGroup()))
|
||||
}
|
||||
|
||||
err = job.targetMgr.UpdateCollectionNextTargetWithPartitions(req.GetCollectionID(), req.GetPartitionIDs()...)
|
||||
|
|
|
@ -131,19 +131,29 @@ func (suite *JobSuite) SetupTest() {
|
|||
|
||||
suite.store = meta.NewMetaStore(suite.kv)
|
||||
suite.dist = meta.NewDistributionManager()
|
||||
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), suite.store)
|
||||
suite.nodeMgr = session.NewNodeManager()
|
||||
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), suite.store, suite.nodeMgr)
|
||||
suite.targetMgr = meta.NewTargetManager(suite.broker, suite.meta)
|
||||
suite.targetObserver = observers.NewTargetObserver(suite.meta,
|
||||
suite.targetMgr,
|
||||
suite.dist,
|
||||
suite.broker,
|
||||
)
|
||||
suite.nodeMgr = session.NewNodeManager()
|
||||
suite.nodeMgr.Add(&session.NodeInfo{})
|
||||
suite.scheduler = NewScheduler()
|
||||
|
||||
suite.scheduler.Start(context.Background())
|
||||
meta.GlobalFailedLoadCache = meta.NewFailedLoadCache()
|
||||
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(1000, "localhost"))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(2000, "localhost"))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(3000, "localhost"))
|
||||
|
||||
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 1000)
|
||||
suite.NoError(err)
|
||||
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 2000)
|
||||
suite.NoError(err)
|
||||
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 3000)
|
||||
suite.NoError(err)
|
||||
}
|
||||
|
||||
func (suite *JobSuite) TearDownTest() {
|
||||
|
@ -265,6 +275,48 @@ func (suite *JobSuite) TestLoadCollection() {
|
|||
err := job.Wait()
|
||||
suite.ErrorIs(err, ErrLoadParameterMismatched)
|
||||
}
|
||||
|
||||
suite.meta.ResourceManager.AddResourceGroup("rg1")
|
||||
suite.meta.ResourceManager.AddResourceGroup("rg2")
|
||||
suite.meta.ResourceManager.AddResourceGroup("rg3")
|
||||
|
||||
// Load with 3 replica on 1 rg
|
||||
req := &querypb.LoadCollectionRequest{
|
||||
CollectionID: 1001,
|
||||
ReplicaNumber: 3,
|
||||
ResourceGroups: []string{"rg1"},
|
||||
}
|
||||
job := NewLoadCollectionJob(
|
||||
ctx,
|
||||
req,
|
||||
suite.dist,
|
||||
suite.meta,
|
||||
suite.targetMgr,
|
||||
suite.broker,
|
||||
suite.nodeMgr,
|
||||
)
|
||||
suite.scheduler.Add(job)
|
||||
err := job.Wait()
|
||||
suite.ErrorContains(err, meta.ErrNodeNotEnough.Error())
|
||||
|
||||
// Load with 3 replica on 3 rg
|
||||
req = &querypb.LoadCollectionRequest{
|
||||
CollectionID: 1002,
|
||||
ReplicaNumber: 3,
|
||||
ResourceGroups: []string{"rg1", "rg2", "rg3"},
|
||||
}
|
||||
job = NewLoadCollectionJob(
|
||||
ctx,
|
||||
req,
|
||||
suite.dist,
|
||||
suite.meta,
|
||||
suite.targetMgr,
|
||||
suite.broker,
|
||||
suite.nodeMgr,
|
||||
)
|
||||
suite.scheduler.Add(job)
|
||||
err = job.Wait()
|
||||
suite.ErrorContains(err, meta.ErrNodeNotEnough.Error())
|
||||
}
|
||||
|
||||
func (suite *JobSuite) TestLoadCollectionWithReplicas() {
|
||||
|
@ -278,7 +330,7 @@ func (suite *JobSuite) TestLoadCollectionWithReplicas() {
|
|||
// Load with 3 replica
|
||||
req := &querypb.LoadCollectionRequest{
|
||||
CollectionID: collection,
|
||||
ReplicaNumber: 3,
|
||||
ReplicaNumber: 5,
|
||||
}
|
||||
job := NewLoadCollectionJob(
|
||||
ctx,
|
||||
|
@ -482,6 +534,50 @@ func (suite *JobSuite) TestLoadPartition() {
|
|||
err := job.Wait()
|
||||
suite.ErrorIs(err, ErrLoadParameterMismatched)
|
||||
}
|
||||
|
||||
suite.meta.ResourceManager.AddResourceGroup("rg1")
|
||||
suite.meta.ResourceManager.AddResourceGroup("rg2")
|
||||
suite.meta.ResourceManager.AddResourceGroup("rg3")
|
||||
|
||||
// test load 3 replica in 1 rg, should pass rg check
|
||||
req := &querypb.LoadPartitionsRequest{
|
||||
CollectionID: 100,
|
||||
PartitionIDs: []int64{1001},
|
||||
ReplicaNumber: 3,
|
||||
ResourceGroups: []string{"rg1"},
|
||||
}
|
||||
job := NewLoadPartitionJob(
|
||||
ctx,
|
||||
req,
|
||||
suite.dist,
|
||||
suite.meta,
|
||||
suite.targetMgr,
|
||||
suite.broker,
|
||||
suite.nodeMgr,
|
||||
)
|
||||
suite.scheduler.Add(job)
|
||||
err := job.Wait()
|
||||
suite.Contains(err.Error(), meta.ErrNodeNotEnough.Error())
|
||||
|
||||
// test load 3 replica in 3 rg, should pass rg check
|
||||
req = &querypb.LoadPartitionsRequest{
|
||||
CollectionID: 102,
|
||||
PartitionIDs: []int64{1001},
|
||||
ReplicaNumber: 3,
|
||||
ResourceGroups: []string{"rg1", "rg2", "rg3"},
|
||||
}
|
||||
job = NewLoadPartitionJob(
|
||||
ctx,
|
||||
req,
|
||||
suite.dist,
|
||||
suite.meta,
|
||||
suite.targetMgr,
|
||||
suite.broker,
|
||||
suite.nodeMgr,
|
||||
)
|
||||
suite.scheduler.Add(job)
|
||||
err = job.Wait()
|
||||
suite.Contains(err.Error(), meta.ErrNodeNotEnough.Error())
|
||||
}
|
||||
|
||||
func (suite *JobSuite) TestLoadPartitionWithReplicas() {
|
||||
|
@ -496,7 +592,7 @@ func (suite *JobSuite) TestLoadPartitionWithReplicas() {
|
|||
req := &querypb.LoadPartitionsRequest{
|
||||
CollectionID: collection,
|
||||
PartitionIDs: suite.partitions[collection],
|
||||
ReplicaNumber: 3,
|
||||
ReplicaNumber: 5,
|
||||
}
|
||||
job := NewLoadPartitionJob(
|
||||
ctx,
|
||||
|
@ -707,7 +803,16 @@ func (suite *JobSuite) TestReleasePartition() {
|
|||
func (suite *JobSuite) TestLoadCollectionStoreFailed() {
|
||||
// Store collection failed
|
||||
store := meta.NewMockStore(suite.T())
|
||||
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), store)
|
||||
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), store, suite.nodeMgr)
|
||||
|
||||
store.EXPECT().SaveResourceGroup(mock.Anything, mock.Anything).Return(nil)
|
||||
err := suite.meta.AssignNode(meta.DefaultResourceGroupName, 1000)
|
||||
suite.NoError(err)
|
||||
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 2000)
|
||||
suite.NoError(err)
|
||||
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 3000)
|
||||
suite.NoError(err)
|
||||
|
||||
for _, collection := range suite.collections {
|
||||
if suite.loadTypes[collection] != querypb.LoadType_LoadCollection {
|
||||
continue
|
||||
|
@ -743,8 +848,17 @@ func (suite *JobSuite) TestLoadCollectionStoreFailed() {
|
|||
func (suite *JobSuite) TestLoadPartitionStoreFailed() {
|
||||
// Store partition failed
|
||||
store := meta.NewMockStore(suite.T())
|
||||
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), store)
|
||||
err := errors.New("failed to store collection")
|
||||
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), store, suite.nodeMgr)
|
||||
|
||||
store.EXPECT().SaveResourceGroup(mock.Anything, mock.Anything).Return(nil)
|
||||
err := suite.meta.AssignNode(meta.DefaultResourceGroupName, 1000)
|
||||
suite.NoError(err)
|
||||
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 2000)
|
||||
suite.NoError(err)
|
||||
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 3000)
|
||||
suite.NoError(err)
|
||||
|
||||
err = errors.New("failed to store collection")
|
||||
for _, collection := range suite.collections {
|
||||
if suite.loadTypes[collection] != querypb.LoadType_LoadPartition {
|
||||
continue
|
||||
|
@ -775,7 +889,7 @@ func (suite *JobSuite) TestLoadPartitionStoreFailed() {
|
|||
|
||||
func (suite *JobSuite) TestLoadCreateReplicaFailed() {
|
||||
// Store replica failed
|
||||
suite.meta = meta.NewMeta(ErrorIDAllocator(), suite.store)
|
||||
suite.meta = meta.NewMeta(ErrorIDAllocator(), suite.store, session.NewNodeManager())
|
||||
for _, collection := range suite.collections {
|
||||
req := &querypb.LoadCollectionRequest{
|
||||
CollectionID: collection,
|
||||
|
|
|
@ -91,7 +91,7 @@ func (m *ChannelDistManager) GetShardLeader(replica *Replica, shard string) (int
|
|||
m.rwmutex.RLock()
|
||||
defer m.rwmutex.RUnlock()
|
||||
|
||||
for node := range replica.Nodes {
|
||||
for _, node := range replica.GetNodes() {
|
||||
channels := m.channels[node]
|
||||
for _, dmc := range channels {
|
||||
if dmc.ChannelName == shard {
|
||||
|
@ -108,7 +108,7 @@ func (m *ChannelDistManager) GetShardLeadersByReplica(replica *Replica) map[stri
|
|||
defer m.rwmutex.RUnlock()
|
||||
|
||||
ret := make(map[string]int64)
|
||||
for node := range replica.Nodes {
|
||||
for _, node := range replica.GetNodes() {
|
||||
channels := m.channels[node]
|
||||
for _, dmc := range channels {
|
||||
if dmc.GetCollectionID() == replica.GetCollectionID() {
|
||||
|
|
|
@ -100,18 +100,18 @@ func (suite *ChannelDistManagerSuite) TestGetBy() {
|
|||
|
||||
func (suite *ChannelDistManagerSuite) TestGetShardLeader() {
|
||||
replicas := []*Replica{
|
||||
{
|
||||
Replica: &querypb.Replica{
|
||||
NewReplica(
|
||||
&querypb.Replica{
|
||||
CollectionID: suite.collection,
|
||||
},
|
||||
Nodes: typeutil.NewUniqueSet(suite.nodes[0], suite.nodes[2]),
|
||||
},
|
||||
{
|
||||
Replica: &querypb.Replica{
|
||||
typeutil.NewUniqueSet(suite.nodes[0], suite.nodes[2]),
|
||||
),
|
||||
NewReplica(
|
||||
&querypb.Replica{
|
||||
CollectionID: suite.collection,
|
||||
},
|
||||
Nodes: typeutil.NewUniqueSet(suite.nodes[1]),
|
||||
},
|
||||
typeutil.NewUniqueSet(suite.nodes[1]),
|
||||
),
|
||||
}
|
||||
|
||||
// Test on replica 0
|
||||
|
|
|
@ -16,17 +16,22 @@
|
|||
|
||||
package meta
|
||||
|
||||
import "github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
|
||||
type Meta struct {
|
||||
*CollectionManager
|
||||
*ReplicaManager
|
||||
*ResourceManager
|
||||
}
|
||||
|
||||
func NewMeta(
|
||||
idAllocator func() (int64, error),
|
||||
store Store,
|
||||
nodeMgr *session.NodeManager,
|
||||
) *Meta {
|
||||
return &Meta{
|
||||
NewCollectionManager(store),
|
||||
NewReplicaManager(idAllocator, store),
|
||||
NewResourceManager(store, nodeMgr),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -155,6 +155,51 @@ func (_c *MockStore_GetReplicas_Call) Return(_a0 []*querypb.Replica, _a1 error)
|
|||
return _c
|
||||
}
|
||||
|
||||
// GetResourceGroups provides a mock function with given fields:
|
||||
func (_m *MockStore) GetResourceGroups() ([]*querypb.ResourceGroup, error) {
|
||||
ret := _m.Called()
|
||||
|
||||
var r0 []*querypb.ResourceGroup
|
||||
if rf, ok := ret.Get(0).(func() []*querypb.ResourceGroup); ok {
|
||||
r0 = rf()
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).([]*querypb.ResourceGroup)
|
||||
}
|
||||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func() error); ok {
|
||||
r1 = rf()
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
// MockStore_GetResourceGroups_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetResourceGroups'
|
||||
type MockStore_GetResourceGroups_Call struct {
|
||||
*mock.Call
|
||||
}
|
||||
|
||||
// GetResourceGroups is a helper method to define mock.On call
|
||||
func (_e *MockStore_Expecter) GetResourceGroups() *MockStore_GetResourceGroups_Call {
|
||||
return &MockStore_GetResourceGroups_Call{Call: _e.mock.On("GetResourceGroups")}
|
||||
}
|
||||
|
||||
func (_c *MockStore_GetResourceGroups_Call) Run(run func()) *MockStore_GetResourceGroups_Call {
|
||||
_c.Call.Run(func(args mock.Arguments) {
|
||||
run()
|
||||
})
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *MockStore_GetResourceGroups_Call) Return(_a0 []*querypb.ResourceGroup, _a1 error) *MockStore_GetResourceGroups_Call {
|
||||
_c.Call.Return(_a0, _a1)
|
||||
return _c
|
||||
}
|
||||
|
||||
// ReleaseCollection provides a mock function with given fields: id
|
||||
func (_m *MockStore) ReleaseCollection(id int64) error {
|
||||
ret := _m.Called(id)
|
||||
|
@ -319,6 +364,43 @@ func (_c *MockStore_ReleaseReplicas_Call) Return(_a0 error) *MockStore_ReleaseRe
|
|||
return _c
|
||||
}
|
||||
|
||||
// RemoveResourceGroup provides a mock function with given fields: rgName
|
||||
func (_m *MockStore) RemoveResourceGroup(rgName string) error {
|
||||
ret := _m.Called(rgName)
|
||||
|
||||
var r0 error
|
||||
if rf, ok := ret.Get(0).(func(string) error); ok {
|
||||
r0 = rf(rgName)
|
||||
} else {
|
||||
r0 = ret.Error(0)
|
||||
}
|
||||
|
||||
return r0
|
||||
}
|
||||
|
||||
// MockStore_RemoveResourceGroup_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'RemoveResourceGroup'
|
||||
type MockStore_RemoveResourceGroup_Call struct {
|
||||
*mock.Call
|
||||
}
|
||||
|
||||
// RemoveResourceGroup is a helper method to define mock.On call
|
||||
// - rgName string
|
||||
func (_e *MockStore_Expecter) RemoveResourceGroup(rgName interface{}) *MockStore_RemoveResourceGroup_Call {
|
||||
return &MockStore_RemoveResourceGroup_Call{Call: _e.mock.On("RemoveResourceGroup", rgName)}
|
||||
}
|
||||
|
||||
func (_c *MockStore_RemoveResourceGroup_Call) Run(run func(rgName string)) *MockStore_RemoveResourceGroup_Call {
|
||||
_c.Call.Run(func(args mock.Arguments) {
|
||||
run(args[0].(string))
|
||||
})
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *MockStore_RemoveResourceGroup_Call) Return(_a0 error) *MockStore_RemoveResourceGroup_Call {
|
||||
_c.Call.Return(_a0)
|
||||
return _c
|
||||
}
|
||||
|
||||
// SaveCollection provides a mock function with given fields: info
|
||||
func (_m *MockStore) SaveCollection(info *querypb.CollectionLoadInfo) error {
|
||||
ret := _m.Called(info)
|
||||
|
@ -443,6 +525,56 @@ func (_c *MockStore_SaveReplica_Call) Return(_a0 error) *MockStore_SaveReplica_C
|
|||
return _c
|
||||
}
|
||||
|
||||
// SaveResourceGroup provides a mock function with given fields: rgs
|
||||
func (_m *MockStore) SaveResourceGroup(rgs ...*querypb.ResourceGroup) error {
|
||||
_va := make([]interface{}, len(rgs))
|
||||
for _i := range rgs {
|
||||
_va[_i] = rgs[_i]
|
||||
}
|
||||
var _ca []interface{}
|
||||
_ca = append(_ca, _va...)
|
||||
ret := _m.Called(_ca...)
|
||||
|
||||
var r0 error
|
||||
if rf, ok := ret.Get(0).(func(...*querypb.ResourceGroup) error); ok {
|
||||
r0 = rf(rgs...)
|
||||
} else {
|
||||
r0 = ret.Error(0)
|
||||
}
|
||||
|
||||
return r0
|
||||
}
|
||||
|
||||
// MockStore_SaveResourceGroup_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SaveResourceGroup'
|
||||
type MockStore_SaveResourceGroup_Call struct {
|
||||
*mock.Call
|
||||
}
|
||||
|
||||
// SaveResourceGroup is a helper method to define mock.On call
|
||||
// - rgs ...*querypb.ResourceGroup
|
||||
func (_e *MockStore_Expecter) SaveResourceGroup(rgs ...interface{}) *MockStore_SaveResourceGroup_Call {
|
||||
return &MockStore_SaveResourceGroup_Call{Call: _e.mock.On("SaveResourceGroup",
|
||||
append([]interface{}{}, rgs...)...)}
|
||||
}
|
||||
|
||||
func (_c *MockStore_SaveResourceGroup_Call) Run(run func(rgs ...*querypb.ResourceGroup)) *MockStore_SaveResourceGroup_Call {
|
||||
_c.Call.Run(func(args mock.Arguments) {
|
||||
variadicArgs := make([]*querypb.ResourceGroup, len(args)-0)
|
||||
for i, a := range args[0:] {
|
||||
if a != nil {
|
||||
variadicArgs[i] = a.(*querypb.ResourceGroup)
|
||||
}
|
||||
}
|
||||
run(variadicArgs...)
|
||||
})
|
||||
return _c
|
||||
}
|
||||
|
||||
func (_c *MockStore_SaveResourceGroup_Call) Return(_a0 error) *MockStore_SaveResourceGroup_Call {
|
||||
_c.Call.Return(_a0)
|
||||
return _c
|
||||
}
|
||||
|
||||
type mockConstructorTestingTNewMockStore interface {
|
||||
mock.TestingT
|
||||
Cleanup(func())
|
||||
|
|
|
@ -30,23 +30,66 @@ import (
|
|||
|
||||
type Replica struct {
|
||||
*querypb.Replica
|
||||
Nodes UniqueSet // a helper field for manipulating replica's Nodes slice field
|
||||
nodes UniqueSet // a helper field for manipulating replica's Nodes slice field
|
||||
rwmutex sync.RWMutex
|
||||
}
|
||||
|
||||
func NewReplica(replica *querypb.Replica, nodes UniqueSet) *Replica {
|
||||
return &Replica{
|
||||
Replica: replica,
|
||||
nodes: nodes,
|
||||
}
|
||||
}
|
||||
|
||||
func (replica *Replica) AddNode(nodes ...int64) {
|
||||
replica.Nodes.Insert(nodes...)
|
||||
replica.Replica.Nodes = replica.Nodes.Collect()
|
||||
replica.rwmutex.Lock()
|
||||
defer replica.rwmutex.Unlock()
|
||||
replica.nodes.Insert(nodes...)
|
||||
replica.Replica.Nodes = replica.nodes.Collect()
|
||||
}
|
||||
|
||||
func (replica *Replica) GetNodes() []int64 {
|
||||
replica.rwmutex.RLock()
|
||||
defer replica.rwmutex.RUnlock()
|
||||
if replica != nil {
|
||||
return replica.nodes.Collect()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (replica *Replica) Len() int {
|
||||
replica.rwmutex.RLock()
|
||||
defer replica.rwmutex.RUnlock()
|
||||
if replica != nil {
|
||||
return replica.nodes.Len()
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (replica *Replica) Contains(node int64) bool {
|
||||
replica.rwmutex.RLock()
|
||||
defer replica.rwmutex.RUnlock()
|
||||
if replica != nil {
|
||||
return replica.nodes.Contain(node)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (replica *Replica) RemoveNode(nodes ...int64) {
|
||||
replica.Nodes.Remove(nodes...)
|
||||
replica.Replica.Nodes = replica.Nodes.Collect()
|
||||
replica.rwmutex.Lock()
|
||||
defer replica.rwmutex.Unlock()
|
||||
replica.nodes.Remove(nodes...)
|
||||
replica.Replica.Nodes = replica.nodes.Collect()
|
||||
}
|
||||
|
||||
func (replica *Replica) Clone() *Replica {
|
||||
replica.rwmutex.RLock()
|
||||
defer replica.rwmutex.RUnlock()
|
||||
return &Replica{
|
||||
Replica: proto.Clone(replica.Replica).(*querypb.Replica),
|
||||
Nodes: NewUniqueSet(replica.Replica.Nodes...),
|
||||
nodes: NewUniqueSet(replica.Replica.Nodes...),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -75,10 +118,14 @@ func (m *ReplicaManager) Recover(collections []int64) error {
|
|||
|
||||
collectionSet := typeutil.NewUniqueSet(collections...)
|
||||
for _, replica := range replicas {
|
||||
if len(replica.GetResourceGroup()) == 0 {
|
||||
replica.ResourceGroup = DefaultResourceGroupName
|
||||
}
|
||||
|
||||
if collectionSet.Contain(replica.GetCollectionID()) {
|
||||
m.replicas[replica.GetID()] = &Replica{
|
||||
Replica: replica,
|
||||
Nodes: NewUniqueSet(replica.GetNodes()...),
|
||||
nodes: NewUniqueSet(replica.GetNodes()...),
|
||||
}
|
||||
log.Info("recover replica",
|
||||
zap.Int64("collectionID", replica.GetCollectionID()),
|
||||
|
@ -109,13 +156,13 @@ func (m *ReplicaManager) Get(id UniqueID) *Replica {
|
|||
|
||||
// Spawn spawns replicas of the given number, for given collection,
|
||||
// this doesn't store these replicas and assign nodes to them.
|
||||
func (m *ReplicaManager) Spawn(collection int64, replicaNumber int32) ([]*Replica, error) {
|
||||
func (m *ReplicaManager) Spawn(collection int64, replicaNumber int32, rgName string) ([]*Replica, error) {
|
||||
var (
|
||||
replicas = make([]*Replica, replicaNumber)
|
||||
err error
|
||||
)
|
||||
for i := range replicas {
|
||||
replicas[i], err = m.spawn(collection)
|
||||
replicas[i], err = m.spawn(collection, rgName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -130,17 +177,18 @@ func (m *ReplicaManager) Put(replicas ...*Replica) error {
|
|||
return m.put(replicas...)
|
||||
}
|
||||
|
||||
func (m *ReplicaManager) spawn(collectionID UniqueID) (*Replica, error) {
|
||||
func (m *ReplicaManager) spawn(collectionID UniqueID, rgName string) (*Replica, error) {
|
||||
id, err := m.idAllocator()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Replica{
|
||||
Replica: &querypb.Replica{
|
||||
ID: id,
|
||||
CollectionID: collectionID,
|
||||
ID: id,
|
||||
CollectionID: collectionID,
|
||||
ResourceGroup: rgName,
|
||||
},
|
||||
Nodes: make(UniqueSet),
|
||||
nodes: make(UniqueSet),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -192,7 +240,7 @@ func (m *ReplicaManager) GetByCollectionAndNode(collectionID, nodeID UniqueID) *
|
|||
defer m.rwmutex.RUnlock()
|
||||
|
||||
for _, replica := range m.replicas {
|
||||
if replica.CollectionID == collectionID && replica.Nodes.Contain(nodeID) {
|
||||
if replica.CollectionID == collectionID && replica.nodes.Contain(nodeID) {
|
||||
return replica
|
||||
}
|
||||
}
|
||||
|
@ -200,6 +248,34 @@ func (m *ReplicaManager) GetByCollectionAndNode(collectionID, nodeID UniqueID) *
|
|||
return nil
|
||||
}
|
||||
|
||||
func (m *ReplicaManager) GetByCollectionAndRG(collectionID int64, rgName string) []*Replica {
|
||||
m.rwmutex.RLock()
|
||||
defer m.rwmutex.RUnlock()
|
||||
|
||||
ret := make([]*Replica, 0)
|
||||
for _, replica := range m.replicas {
|
||||
if replica.GetCollectionID() == collectionID && replica.GetResourceGroup() == rgName {
|
||||
ret = append(ret, replica)
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (m *ReplicaManager) GetByResourceGroup(rgName string) []*Replica {
|
||||
m.rwmutex.RLock()
|
||||
defer m.rwmutex.RUnlock()
|
||||
|
||||
ret := make([]*Replica, 0)
|
||||
for _, replica := range m.replicas {
|
||||
if replica.GetResourceGroup() == rgName {
|
||||
ret = append(ret, replica)
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (m *ReplicaManager) AddNode(replicaID UniqueID, nodes ...UniqueID) error {
|
||||
m.rwmutex.Lock()
|
||||
defer m.rwmutex.Unlock()
|
||||
|
@ -227,3 +303,17 @@ func (m *ReplicaManager) RemoveNode(replicaID UniqueID, nodes ...UniqueID) error
|
|||
replica.RemoveNode(nodes...)
|
||||
return m.put(replica)
|
||||
}
|
||||
|
||||
func (m *ReplicaManager) GetResourceGroupByCollection(collection UniqueID) typeutil.Set[string] {
|
||||
m.rwmutex.Lock()
|
||||
defer m.rwmutex.Unlock()
|
||||
|
||||
ret := typeutil.NewSet[string]()
|
||||
for _, r := range m.replicas {
|
||||
if r.GetCollectionID() == collection {
|
||||
ret.Insert(r.GetResourceGroup())
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
|
|
@ -76,14 +76,14 @@ func (suite *ReplicaManagerSuite) TestSpawn() {
|
|||
mgr := suite.mgr
|
||||
|
||||
for i, collection := range suite.collections {
|
||||
replicas, err := mgr.Spawn(collection, suite.replicaNumbers[i])
|
||||
replicas, err := mgr.Spawn(collection, suite.replicaNumbers[i], DefaultResourceGroupName)
|
||||
suite.NoError(err)
|
||||
suite.Len(replicas, int(suite.replicaNumbers[i]))
|
||||
}
|
||||
|
||||
mgr.idAllocator = ErrorIDAllocator()
|
||||
for i, collection := range suite.collections {
|
||||
_, err := mgr.Spawn(collection, suite.replicaNumbers[i])
|
||||
_, err := mgr.Spawn(collection, suite.replicaNumbers[i], DefaultResourceGroupName)
|
||||
suite.Error(err)
|
||||
}
|
||||
}
|
||||
|
@ -98,8 +98,8 @@ func (suite *ReplicaManagerSuite) TestGet() {
|
|||
for _, replica := range replicas {
|
||||
suite.Equal(collection, replica.GetCollectionID())
|
||||
suite.Equal(replica, mgr.Get(replica.GetID()))
|
||||
suite.Equal(replica.Replica.Nodes, replica.Nodes.Collect())
|
||||
replicaNodes[replica.GetID()] = replica.Replica.Nodes
|
||||
suite.Equal(replica.Replica.GetNodes(), replica.GetNodes())
|
||||
replicaNodes[replica.GetID()] = replica.Replica.GetNodes()
|
||||
nodes = append(nodes, replica.Replica.Nodes...)
|
||||
}
|
||||
suite.Len(nodes, int(suite.replicaNumbers[i]))
|
||||
|
@ -137,9 +137,9 @@ func (suite *ReplicaManagerSuite) TestRecover() {
|
|||
suite.NotNil(replica)
|
||||
suite.EqualValues(1000, replica.CollectionID)
|
||||
suite.EqualValues([]int64{1, 2, 3}, replica.Replica.Nodes)
|
||||
suite.Len(replica.Nodes, len(replica.Replica.GetNodes()))
|
||||
suite.Len(replica.GetNodes(), len(replica.Replica.GetNodes()))
|
||||
for _, node := range replica.Replica.GetNodes() {
|
||||
suite.True(replica.Nodes.Contain(node))
|
||||
suite.True(replica.Contains(node))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -175,7 +175,7 @@ func (suite *ReplicaManagerSuite) TestNodeManipulate() {
|
|||
suite.NoError(err)
|
||||
|
||||
replica = mgr.GetByCollectionAndNode(collection, newNode)
|
||||
suite.Contains(replica.Nodes, newNode)
|
||||
suite.Contains(replica.GetNodes(), newNode)
|
||||
suite.Contains(replica.Replica.GetNodes(), newNode)
|
||||
|
||||
err = mgr.RemoveNode(replica.GetID(), firstNode)
|
||||
|
@ -192,7 +192,7 @@ func (suite *ReplicaManagerSuite) TestNodeManipulate() {
|
|||
suite.Nil(replica)
|
||||
|
||||
replica = mgr.GetByCollectionAndNode(collection, newNode)
|
||||
suite.Contains(replica.Nodes, newNode)
|
||||
suite.Contains(replica.GetNodes(), newNode)
|
||||
suite.Contains(replica.Replica.GetNodes(), newNode)
|
||||
}
|
||||
}
|
||||
|
@ -201,7 +201,7 @@ func (suite *ReplicaManagerSuite) spawnAndPutAll() {
|
|||
mgr := suite.mgr
|
||||
|
||||
for i, collection := range suite.collections {
|
||||
replicas, err := mgr.Spawn(collection, suite.replicaNumbers[i])
|
||||
replicas, err := mgr.Spawn(collection, suite.replicaNumbers[i], DefaultResourceGroupName)
|
||||
suite.NoError(err)
|
||||
suite.Len(replicas, int(suite.replicaNumbers[i]))
|
||||
for j, replica := range replicas {
|
||||
|
@ -212,6 +212,27 @@ func (suite *ReplicaManagerSuite) spawnAndPutAll() {
|
|||
}
|
||||
}
|
||||
|
||||
func (suite *ReplicaManagerSuite) TestResourceGroup() {
|
||||
mgr := NewReplicaManager(suite.idAllocator, suite.store)
|
||||
replica1, err := mgr.spawn(int64(1000), DefaultResourceGroupName)
|
||||
replica1.AddNode(1)
|
||||
suite.NoError(err)
|
||||
mgr.Put(replica1)
|
||||
|
||||
replica2, err := mgr.spawn(int64(2000), DefaultResourceGroupName)
|
||||
replica2.AddNode(1)
|
||||
suite.NoError(err)
|
||||
mgr.Put(replica2)
|
||||
|
||||
replicas := mgr.GetByResourceGroup(DefaultResourceGroupName)
|
||||
suite.Len(replicas, 2)
|
||||
replicas = mgr.GetByCollectionAndRG(int64(1000), DefaultResourceGroupName)
|
||||
suite.Len(replicas, 1)
|
||||
rgNames := mgr.GetResourceGroupByCollection(int64(1000))
|
||||
suite.Len(rgNames, 1)
|
||||
suite.True(rgNames.Contain(DefaultResourceGroupName))
|
||||
}
|
||||
|
||||
func (suite *ReplicaManagerSuite) clearMemory() {
|
||||
suite.mgr.replicas = make(map[int64]*Replica)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,632 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package meta
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/log"
|
||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
. "github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/samber/lo"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNodeAlreadyAssign = errors.New("node already assign to other resource group")
|
||||
ErrRGIsFull = errors.New("resource group is full")
|
||||
ErrRGIsEmpty = errors.New("resource group is empty")
|
||||
ErrRGNotExist = errors.New("resource group doesn't exist")
|
||||
ErrRGAlreadyExist = errors.New("resource group already exist")
|
||||
ErrRGAssignNodeFailed = errors.New("failed to assign node to resource group")
|
||||
ErrRGUnAssignNodeFailed = errors.New("failed to unassign node from resource group")
|
||||
ErrSaveResourceGroupToStore = errors.New("failed to save resource group to store")
|
||||
ErrRemoveResourceGroupFromStore = errors.New("failed to remove resource group from store")
|
||||
ErrRecoverResourceGroupToStore = errors.New("failed to recover resource group to store")
|
||||
ErrNodeNotAssignToRG = errors.New("node hasn't been assign to any resource group")
|
||||
ErrRGNameIsEmpty = errors.New("resource group name couldn't be empty")
|
||||
ErrDeleteDefaultRG = errors.New("delete default rg is not permitted")
|
||||
ErrDeleteNonEmptyRG = errors.New("delete non-empty rg is not permitted")
|
||||
ErrNodeNotExist = errors.New("node does not exist")
|
||||
ErrNodeStopped = errors.New("node has been stopped")
|
||||
ErrRGLimit = errors.New("resource group num reach limit 1024")
|
||||
ErrNodeNotEnough = errors.New("nodes not enough")
|
||||
)
|
||||
|
||||
var DefaultResourceGroupName = "__default_resource_group"
|
||||
|
||||
type ResourceGroup struct {
|
||||
nodes UniqueSet
|
||||
capacity int
|
||||
}
|
||||
|
||||
func NewResourceGroup(capacity int) *ResourceGroup {
|
||||
rg := &ResourceGroup{
|
||||
nodes: typeutil.NewUniqueSet(),
|
||||
capacity: capacity,
|
||||
}
|
||||
|
||||
return rg
|
||||
}
|
||||
|
||||
// assign node to resource group
|
||||
func (rg *ResourceGroup) assignNode(id int64) error {
|
||||
if rg.containsNode(id) {
|
||||
return ErrNodeAlreadyAssign
|
||||
}
|
||||
|
||||
rg.nodes.Insert(id)
|
||||
rg.capacity++
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// unassign node from resource group
|
||||
func (rg *ResourceGroup) unassignNode(id int64) error {
|
||||
if !rg.containsNode(id) {
|
||||
// remove non exist node should be tolerable
|
||||
return nil
|
||||
}
|
||||
|
||||
rg.nodes.Remove(id)
|
||||
rg.capacity--
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rg *ResourceGroup) handleNodeUp(id int64) error {
|
||||
if rg.LackOfNodes() == 0 {
|
||||
return ErrRGIsFull
|
||||
}
|
||||
|
||||
if rg.containsNode(id) {
|
||||
return ErrNodeAlreadyAssign
|
||||
}
|
||||
|
||||
rg.nodes.Insert(id)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rg *ResourceGroup) handleNodeDown(id int64) error {
|
||||
if !rg.containsNode(id) {
|
||||
// remove non exist node should be tolerable
|
||||
return nil
|
||||
}
|
||||
|
||||
rg.nodes.Remove(id)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rg *ResourceGroup) LackOfNodes() int {
|
||||
return rg.capacity - len(rg.nodes)
|
||||
}
|
||||
|
||||
func (rg *ResourceGroup) containsNode(id int64) bool {
|
||||
return rg.nodes.Contain(id)
|
||||
}
|
||||
|
||||
func (rg *ResourceGroup) GetNodes() []int64 {
|
||||
return rg.nodes.Collect()
|
||||
}
|
||||
|
||||
func (rg *ResourceGroup) GetCapacity() int {
|
||||
return rg.capacity
|
||||
}
|
||||
|
||||
type ResourceManager struct {
|
||||
groups map[string]*ResourceGroup
|
||||
store Store
|
||||
nodeMgr *session.NodeManager
|
||||
|
||||
rwmutex sync.RWMutex
|
||||
}
|
||||
|
||||
func NewResourceManager(store Store, nodeMgr *session.NodeManager) *ResourceManager {
|
||||
groupMap := make(map[string]*ResourceGroup)
|
||||
groupMap[DefaultResourceGroupName] = NewResourceGroup(1000000)
|
||||
return &ResourceManager{
|
||||
groups: groupMap,
|
||||
store: store,
|
||||
nodeMgr: nodeMgr,
|
||||
}
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) AddResourceGroup(rgName string) error {
|
||||
rm.rwmutex.Lock()
|
||||
defer rm.rwmutex.Unlock()
|
||||
if len(rgName) == 0 {
|
||||
return ErrRGNameIsEmpty
|
||||
}
|
||||
|
||||
if rm.groups[rgName] != nil {
|
||||
return ErrRGAlreadyExist
|
||||
}
|
||||
|
||||
if len(rm.groups) >= 1024 {
|
||||
return ErrRGLimit
|
||||
}
|
||||
|
||||
err := rm.store.SaveResourceGroup(&querypb.ResourceGroup{
|
||||
Name: rgName,
|
||||
Capacity: 0,
|
||||
})
|
||||
if err != nil {
|
||||
log.Info("failed to add resource group",
|
||||
zap.String("rgName", rgName),
|
||||
zap.Error(err),
|
||||
)
|
||||
return err
|
||||
}
|
||||
rm.groups[rgName] = NewResourceGroup(0)
|
||||
|
||||
log.Info("add resource group",
|
||||
zap.String("rgName", rgName),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) RemoveResourceGroup(rgName string) error {
|
||||
rm.rwmutex.Lock()
|
||||
defer rm.rwmutex.Unlock()
|
||||
if rgName == DefaultResourceGroupName {
|
||||
return ErrDeleteDefaultRG
|
||||
}
|
||||
|
||||
if rm.groups[rgName] == nil {
|
||||
// delete a non-exist rg should be tolerable
|
||||
return nil
|
||||
}
|
||||
|
||||
if rm.groups[rgName].GetCapacity() != 0 {
|
||||
return ErrDeleteNonEmptyRG
|
||||
}
|
||||
|
||||
err := rm.store.RemoveResourceGroup(rgName)
|
||||
if err != nil {
|
||||
log.Info("failed to remove resource group",
|
||||
zap.String("rgName", rgName),
|
||||
zap.Error(err),
|
||||
)
|
||||
return err
|
||||
}
|
||||
delete(rm.groups, rgName)
|
||||
|
||||
log.Info("remove resource group",
|
||||
zap.String("rgName", rgName),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) AssignNode(rgName string, node int64) error {
|
||||
rm.rwmutex.Lock()
|
||||
defer rm.rwmutex.Unlock()
|
||||
return rm.assignNode(rgName, node)
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) assignNode(rgName string, node int64) error {
|
||||
if rm.groups[rgName] == nil {
|
||||
return ErrRGNotExist
|
||||
}
|
||||
|
||||
if rm.nodeMgr.Get(node) == nil {
|
||||
return ErrNodeNotExist
|
||||
}
|
||||
|
||||
if ok, _ := rm.nodeMgr.IsStoppingNode(node); ok {
|
||||
return ErrNodeStopped
|
||||
}
|
||||
|
||||
rm.checkRGNodeStatus(rgName)
|
||||
if rm.checkNodeAssigned(node) {
|
||||
return ErrNodeAlreadyAssign
|
||||
}
|
||||
|
||||
newNodes := rm.groups[rgName].GetNodes()
|
||||
newNodes = append(newNodes, node)
|
||||
err := rm.store.SaveResourceGroup(&querypb.ResourceGroup{
|
||||
Name: rgName,
|
||||
Capacity: int32(rm.groups[rgName].GetCapacity()) + 1,
|
||||
Nodes: newNodes,
|
||||
})
|
||||
if err != nil {
|
||||
log.Info("failed to add node to resource group",
|
||||
zap.String("rgName", rgName),
|
||||
zap.Int64("node", node),
|
||||
zap.Error(err),
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
err = rm.groups[rgName].assignNode(node)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info("add node to resource group",
|
||||
zap.String("rgName", rgName),
|
||||
zap.Int64("node", node),
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) checkNodeAssigned(node int64) bool {
|
||||
for _, group := range rm.groups {
|
||||
if group.containsNode(node) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) UnassignNode(rgName string, node int64) error {
|
||||
rm.rwmutex.Lock()
|
||||
defer rm.rwmutex.Unlock()
|
||||
|
||||
return rm.unassignNode(rgName, node)
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) unassignNode(rgName string, node int64) error {
|
||||
if rm.groups[rgName] == nil {
|
||||
return ErrRGNotExist
|
||||
}
|
||||
|
||||
if rm.nodeMgr.Get(node) == nil {
|
||||
// remove non exist node should be tolerable
|
||||
return nil
|
||||
}
|
||||
|
||||
newNodes := make([]int64, 0)
|
||||
for nid := range rm.groups[rgName].nodes {
|
||||
if nid != node {
|
||||
newNodes = append(newNodes, nid)
|
||||
}
|
||||
}
|
||||
|
||||
err := rm.store.SaveResourceGroup(&querypb.ResourceGroup{
|
||||
Name: rgName,
|
||||
Capacity: int32(rm.groups[rgName].GetCapacity()) - 1,
|
||||
Nodes: newNodes,
|
||||
})
|
||||
if err != nil {
|
||||
log.Info("remove node from resource group",
|
||||
zap.String("rgName", rgName),
|
||||
zap.Int64("node", node),
|
||||
zap.Error(err),
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
rm.checkRGNodeStatus(rgName)
|
||||
err = rm.groups[rgName].unassignNode(node)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info("remove node from resource group",
|
||||
zap.String("rgName", rgName),
|
||||
zap.Int64("node", node),
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) GetNodes(rgName string) ([]int64, error) {
|
||||
rm.rwmutex.RLock()
|
||||
defer rm.rwmutex.RUnlock()
|
||||
if rm.groups[rgName] == nil {
|
||||
return nil, ErrRGNotExist
|
||||
}
|
||||
|
||||
rm.checkRGNodeStatus(rgName)
|
||||
|
||||
return rm.groups[rgName].GetNodes(), nil
|
||||
}
|
||||
|
||||
// return all outbound node
|
||||
func (rm *ResourceManager) CheckOutboundNodes(replica *Replica) typeutil.UniqueSet {
|
||||
rm.rwmutex.RLock()
|
||||
defer rm.rwmutex.RUnlock()
|
||||
|
||||
if rm.groups[replica.GetResourceGroup()] == nil {
|
||||
return typeutil.NewUniqueSet()
|
||||
}
|
||||
rg := rm.groups[replica.GetResourceGroup()]
|
||||
|
||||
ret := typeutil.NewUniqueSet()
|
||||
for _, node := range replica.GetNodes() {
|
||||
if !rg.containsNode(node) {
|
||||
ret.Insert(node)
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
// return outgoing node num on each rg from this replica
|
||||
func (rm *ResourceManager) GetOutgoingNodeNumByReplica(replica *Replica) map[string]int32 {
|
||||
rm.rwmutex.RLock()
|
||||
defer rm.rwmutex.RUnlock()
|
||||
|
||||
if rm.groups[replica.GetResourceGroup()] == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
rg := rm.groups[replica.GetResourceGroup()]
|
||||
ret := make(map[string]int32)
|
||||
for _, node := range replica.GetNodes() {
|
||||
if !rg.containsNode(node) {
|
||||
rgName, err := rm.findResourceGroupByNode(node)
|
||||
if err == nil {
|
||||
ret[rgName]++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) ContainsNode(rgName string, node int64) bool {
|
||||
rm.rwmutex.RLock()
|
||||
defer rm.rwmutex.RUnlock()
|
||||
if rm.groups[rgName] == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
rm.checkRGNodeStatus(rgName)
|
||||
return rm.groups[rgName].containsNode(node)
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) ContainResourceGroup(rgName string) bool {
|
||||
rm.rwmutex.RLock()
|
||||
defer rm.rwmutex.RUnlock()
|
||||
return rm.groups[rgName] != nil
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) GetResourceGroup(rgName string) (*ResourceGroup, error) {
|
||||
rm.rwmutex.RLock()
|
||||
defer rm.rwmutex.RUnlock()
|
||||
|
||||
if rm.groups[rgName] == nil {
|
||||
return nil, ErrRGNotExist
|
||||
}
|
||||
|
||||
rm.checkRGNodeStatus(rgName)
|
||||
return rm.groups[rgName], nil
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) ListResourceGroups() []string {
|
||||
rm.rwmutex.RLock()
|
||||
defer rm.rwmutex.RUnlock()
|
||||
|
||||
return lo.Keys(rm.groups)
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) FindResourceGroupByNode(node int64) (string, error) {
|
||||
rm.rwmutex.RLock()
|
||||
defer rm.rwmutex.RUnlock()
|
||||
|
||||
return rm.findResourceGroupByNode(node)
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) findResourceGroupByNode(node int64) (string, error) {
|
||||
for name, group := range rm.groups {
|
||||
if group.containsNode(node) {
|
||||
return name, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", ErrNodeNotAssignToRG
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) HandleNodeUp(node int64) (string, error) {
|
||||
rm.rwmutex.Lock()
|
||||
defer rm.rwmutex.Unlock()
|
||||
|
||||
if rm.nodeMgr.Get(node) == nil {
|
||||
return "", ErrNodeNotExist
|
||||
}
|
||||
|
||||
if ok, _ := rm.nodeMgr.IsStoppingNode(node); ok {
|
||||
return "", ErrNodeStopped
|
||||
}
|
||||
|
||||
// if node already assign to rg
|
||||
rgName, err := rm.findResourceGroupByNode(node)
|
||||
if err == nil {
|
||||
log.Info("HandleNodeUp: node already assign to resource group",
|
||||
zap.String("rgName", rgName),
|
||||
zap.Int64("node", node),
|
||||
)
|
||||
return rgName, nil
|
||||
}
|
||||
|
||||
// add new node to default rg
|
||||
rm.groups[DefaultResourceGroupName].handleNodeUp(node)
|
||||
log.Info("HandleNodeUp: assign node to default resource group",
|
||||
zap.String("rgName", DefaultResourceGroupName),
|
||||
zap.Int64("node", node),
|
||||
)
|
||||
return DefaultResourceGroupName, nil
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) HandleNodeDown(node int64) (string, error) {
|
||||
rm.rwmutex.Lock()
|
||||
defer rm.rwmutex.Unlock()
|
||||
|
||||
if rm.nodeMgr.Get(node) == nil {
|
||||
return "", ErrNodeNotExist
|
||||
}
|
||||
|
||||
rgName, err := rm.findResourceGroupByNode(node)
|
||||
if err == nil {
|
||||
log.Info("HandleNodeDown: remove node from resource group",
|
||||
zap.String("rgName", rgName),
|
||||
zap.Int64("node", node),
|
||||
)
|
||||
return rgName, rm.groups[rgName].handleNodeDown(node)
|
||||
}
|
||||
|
||||
return "", ErrNodeNotAssignToRG
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) TransferNode(from, to string) error {
|
||||
rm.rwmutex.Lock()
|
||||
defer rm.rwmutex.Unlock()
|
||||
|
||||
if rm.groups[from] == nil || rm.groups[to] == nil {
|
||||
return ErrRGNotExist
|
||||
}
|
||||
|
||||
if len(rm.groups[from].nodes) == 0 {
|
||||
return ErrRGIsEmpty
|
||||
}
|
||||
|
||||
rm.checkRGNodeStatus(from)
|
||||
rm.checkRGNodeStatus(to)
|
||||
|
||||
//todo: a better way to choose a node with least balance cost
|
||||
node := rm.groups[from].GetNodes()[0]
|
||||
if err := rm.transferNodeInStore(from, to, node); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err := rm.groups[from].unassignNode(node)
|
||||
if err != nil {
|
||||
// interrupt transfer, unreachable logic path
|
||||
return err
|
||||
}
|
||||
|
||||
err = rm.groups[to].assignNode(node)
|
||||
if err != nil {
|
||||
// interrupt transfer, unreachable logic path
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) transferNodeInStore(from string, to string, node int64) error {
|
||||
fromNodeList := make([]int64, 0)
|
||||
for nid := range rm.groups[from].nodes {
|
||||
if nid != node {
|
||||
fromNodeList = append(fromNodeList, nid)
|
||||
}
|
||||
}
|
||||
toNodeList := rm.groups[to].GetNodes()
|
||||
toNodeList = append(toNodeList, node)
|
||||
|
||||
fromRG := &querypb.ResourceGroup{
|
||||
Name: from,
|
||||
Capacity: int32(rm.groups[from].GetCapacity()) - 1,
|
||||
Nodes: fromNodeList,
|
||||
}
|
||||
|
||||
toRG := &querypb.ResourceGroup{
|
||||
Name: to,
|
||||
Capacity: int32(rm.groups[to].GetCapacity()) + 1,
|
||||
Nodes: toNodeList,
|
||||
}
|
||||
|
||||
return rm.store.SaveResourceGroup(fromRG, toRG)
|
||||
}
|
||||
|
||||
// auto recover rg, return recover used node num
|
||||
func (rm *ResourceManager) AutoRecoverResourceGroup(rgName string) (int, error) {
|
||||
rm.rwmutex.Lock()
|
||||
defer rm.rwmutex.Unlock()
|
||||
|
||||
if rm.groups[rgName] == nil {
|
||||
return 0, ErrRGNotExist
|
||||
}
|
||||
|
||||
rm.checkRGNodeStatus(rgName)
|
||||
lackNodesNum := rm.groups[rgName].LackOfNodes()
|
||||
nodesInDefault := rm.groups[DefaultResourceGroupName].GetNodes()
|
||||
for i := 0; i < len(nodesInDefault) && i < lackNodesNum; i++ {
|
||||
//todo: a better way to choose a node with least balance cost
|
||||
node := nodesInDefault[i]
|
||||
err := rm.unassignNode(DefaultResourceGroupName, node)
|
||||
if err != nil {
|
||||
// interrupt transfer, unreachable logic path
|
||||
return i + 1, err
|
||||
}
|
||||
|
||||
err = rm.groups[rgName].handleNodeUp(node)
|
||||
if err != nil {
|
||||
// roll back, unreachable logic path
|
||||
rm.assignNode(DefaultResourceGroupName, node)
|
||||
}
|
||||
}
|
||||
|
||||
return lackNodesNum, nil
|
||||
}
|
||||
|
||||
func (rm *ResourceManager) Recover() error {
|
||||
rm.rwmutex.Lock()
|
||||
defer rm.rwmutex.Unlock()
|
||||
rgs, err := rm.store.GetResourceGroups()
|
||||
if err != nil {
|
||||
return ErrRecoverResourceGroupToStore
|
||||
}
|
||||
|
||||
for _, rg := range rgs {
|
||||
rm.groups[rg.GetName()] = NewResourceGroup(0)
|
||||
for _, node := range rg.GetNodes() {
|
||||
rm.groups[rg.GetName()].assignNode(node)
|
||||
}
|
||||
rm.checkRGNodeStatus(rg.GetName())
|
||||
log.Info("Recover resource group",
|
||||
zap.String("rgName", rg.GetName()),
|
||||
zap.Int64s("nodes", rg.GetNodes()),
|
||||
zap.Int32("capacity", rg.GetCapacity()),
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// every operation which involves nodes access, should check nodes status first
|
||||
func (rm *ResourceManager) checkRGNodeStatus(rgName string) {
|
||||
for _, node := range rm.groups[rgName].GetNodes() {
|
||||
if rm.nodeMgr.Get(node) == nil {
|
||||
log.Info("found node down, remove it",
|
||||
zap.String("rgName", rgName),
|
||||
zap.Int64("nodeID", node),
|
||||
)
|
||||
|
||||
rm.groups[rgName].handleNodeDown(node)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// return lack of nodes num
|
||||
func (rm *ResourceManager) CheckLackOfNode(rgName string) int {
|
||||
rm.rwmutex.Lock()
|
||||
defer rm.rwmutex.Unlock()
|
||||
if rm.groups[rgName] == nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
rm.checkRGNodeStatus(rgName)
|
||||
|
||||
return rm.groups[rgName].LackOfNodes()
|
||||
}
|
|
@ -0,0 +1,294 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
package meta
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/util/etcd"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/stretchr/testify/suite"
|
||||
)
|
||||
|
||||
type ResourceManagerSuite struct {
|
||||
suite.Suite
|
||||
|
||||
kv *etcdkv.EtcdKV
|
||||
manager *ResourceManager
|
||||
}
|
||||
|
||||
func (suite *ResourceManagerSuite) SetupSuite() {
|
||||
Params.Init()
|
||||
}
|
||||
|
||||
func (suite *ResourceManagerSuite) SetupTest() {
|
||||
config := GenerateEtcdConfig()
|
||||
cli, err := etcd.GetEtcdClient(
|
||||
config.UseEmbedEtcd,
|
||||
config.EtcdUseSSL,
|
||||
config.Endpoints,
|
||||
config.EtcdTLSCert,
|
||||
config.EtcdTLSKey,
|
||||
config.EtcdTLSCACert,
|
||||
config.EtcdTLSMinVersion)
|
||||
suite.Require().NoError(err)
|
||||
suite.kv = etcdkv.NewEtcdKV(cli, config.MetaRootPath)
|
||||
|
||||
store := NewMetaStore(suite.kv)
|
||||
suite.manager = NewResourceManager(store, session.NewNodeManager())
|
||||
}
|
||||
|
||||
func (suite *ResourceManagerSuite) TestManipulateResourceGroup() {
|
||||
// test add rg
|
||||
err := suite.manager.AddResourceGroup("rg1")
|
||||
suite.NoError(err)
|
||||
suite.True(suite.manager.ContainResourceGroup("rg1"))
|
||||
suite.Len(suite.manager.ListResourceGroups(), 2)
|
||||
|
||||
// test add duplicate rg
|
||||
err = suite.manager.AddResourceGroup("rg1")
|
||||
suite.ErrorIs(err, ErrRGAlreadyExist)
|
||||
// test delete rg
|
||||
err = suite.manager.RemoveResourceGroup("rg1")
|
||||
suite.NoError(err)
|
||||
|
||||
// test delete rg which doesn't exist
|
||||
err = suite.manager.RemoveResourceGroup("rg1")
|
||||
suite.NoError(err)
|
||||
// test delete default rg
|
||||
err = suite.manager.RemoveResourceGroup(DefaultResourceGroupName)
|
||||
suite.ErrorIs(ErrDeleteDefaultRG, err)
|
||||
}
|
||||
|
||||
func (suite *ResourceManagerSuite) TestManipulateNode() {
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
|
||||
err := suite.manager.AddResourceGroup("rg1")
|
||||
suite.NoError(err)
|
||||
// test add node to rg
|
||||
err = suite.manager.AssignNode("rg1", 1)
|
||||
suite.NoError(err)
|
||||
|
||||
// test add non-exist node to rg
|
||||
err = suite.manager.AssignNode("rg1", 2)
|
||||
suite.ErrorIs(err, ErrNodeNotExist)
|
||||
|
||||
// test add node to non-exist rg
|
||||
err = suite.manager.AssignNode("rg2", 1)
|
||||
suite.ErrorIs(err, ErrRGNotExist)
|
||||
|
||||
// test remove node from rg
|
||||
err = suite.manager.UnassignNode("rg1", 1)
|
||||
suite.NoError(err)
|
||||
|
||||
// test remove non-exist node from rg
|
||||
err = suite.manager.UnassignNode("rg1", 2)
|
||||
suite.NoError(err)
|
||||
|
||||
// test remove node from non-exist rg
|
||||
err = suite.manager.UnassignNode("rg2", 1)
|
||||
suite.ErrorIs(err, ErrRGNotExist)
|
||||
|
||||
// add node which already assign to rg to another rg
|
||||
err = suite.manager.AddResourceGroup("rg2")
|
||||
suite.NoError(err)
|
||||
err = suite.manager.AssignNode("rg1", 1)
|
||||
suite.NoError(err)
|
||||
err = suite.manager.AssignNode("rg2", 1)
|
||||
println(err.Error())
|
||||
suite.ErrorIs(err, ErrNodeAlreadyAssign)
|
||||
|
||||
// transfer node between rgs
|
||||
err = suite.manager.TransferNode("rg1", "rg2")
|
||||
suite.NoError(err)
|
||||
|
||||
// transfer meet non exist rg
|
||||
err = suite.manager.TransferNode("rgggg", "rg2")
|
||||
suite.ErrorIs(err, ErrRGNotExist)
|
||||
}
|
||||
|
||||
func (suite *ResourceManagerSuite) TestHandleNodeUp() {
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(3, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(100, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(101, "localhost"))
|
||||
err := suite.manager.AddResourceGroup("rg1")
|
||||
suite.NoError(err)
|
||||
|
||||
suite.manager.AssignNode("rg1", 1)
|
||||
suite.manager.AssignNode("rg1", 2)
|
||||
suite.manager.AssignNode("rg1", 3)
|
||||
|
||||
// test query node id not change, expect assign back to origin rg
|
||||
rg, err := suite.manager.GetResourceGroup("rg1")
|
||||
suite.NoError(err)
|
||||
suite.Equal(rg.GetCapacity(), 3)
|
||||
suite.Equal(len(rg.GetNodes()), 3)
|
||||
suite.manager.HandleNodeUp(1)
|
||||
suite.Equal(rg.GetCapacity(), 3)
|
||||
suite.Equal(len(rg.GetNodes()), 3)
|
||||
|
||||
suite.manager.HandleNodeDown(2)
|
||||
rg, err = suite.manager.GetResourceGroup("rg1")
|
||||
suite.NoError(err)
|
||||
suite.Equal(rg.GetCapacity(), 3)
|
||||
suite.Equal(len(rg.GetNodes()), 2)
|
||||
suite.NoError(err)
|
||||
defaultRG, err := suite.manager.GetResourceGroup(DefaultResourceGroupName)
|
||||
suite.NoError(err)
|
||||
oldNodesNum := len(defaultRG.GetNodes())
|
||||
suite.manager.HandleNodeUp(101)
|
||||
rg, err = suite.manager.GetResourceGroup("rg1")
|
||||
suite.NoError(err)
|
||||
suite.Equal(rg.GetCapacity(), 3)
|
||||
suite.Equal(len(rg.GetNodes()), 2)
|
||||
suite.False(suite.manager.ContainsNode("rg1", 101))
|
||||
nodes, err := suite.manager.GetNodes(DefaultResourceGroupName)
|
||||
suite.NoError(err)
|
||||
suite.Equal(len(nodes), oldNodesNum+1)
|
||||
}
|
||||
|
||||
func (suite *ResourceManagerSuite) TestRecover() {
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(3, "localhost"))
|
||||
err := suite.manager.AddResourceGroup("rg")
|
||||
suite.NoError(err)
|
||||
|
||||
suite.manager.AssignNode("rg", 1)
|
||||
suite.manager.AssignNode("rg", 2)
|
||||
suite.manager.AssignNode("rg", 3)
|
||||
|
||||
suite.manager.UnassignNode("rg", 3)
|
||||
|
||||
// clear resource manager in hack way
|
||||
delete(suite.manager.groups, "rg")
|
||||
delete(suite.manager.groups, DefaultResourceGroupName)
|
||||
suite.manager.Recover()
|
||||
|
||||
rg, err := suite.manager.GetResourceGroup("rg")
|
||||
suite.NoError(err)
|
||||
suite.Equal(2, rg.GetCapacity())
|
||||
suite.True(suite.manager.ContainsNode("rg", 1))
|
||||
suite.True(suite.manager.ContainsNode("rg", 2))
|
||||
suite.False(suite.manager.ContainsNode("rg", 3))
|
||||
}
|
||||
|
||||
func (suite *ResourceManagerSuite) TestCheckOutboundNodes() {
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(3, "localhost"))
|
||||
err := suite.manager.AddResourceGroup("rg")
|
||||
suite.NoError(err)
|
||||
suite.manager.AssignNode("rg", 1)
|
||||
suite.manager.AssignNode("rg", 2)
|
||||
suite.manager.AssignNode("rg", 3)
|
||||
|
||||
replica := NewReplica(
|
||||
&querypb.Replica{
|
||||
ID: 1,
|
||||
CollectionID: 1,
|
||||
Nodes: []int64{1, 2, 3, 4},
|
||||
ResourceGroup: "rg",
|
||||
},
|
||||
typeutil.NewUniqueSet(1, 2, 3, 4),
|
||||
)
|
||||
|
||||
outboundNodes := suite.manager.CheckOutboundNodes(replica)
|
||||
suite.Len(outboundNodes, 1)
|
||||
suite.True(outboundNodes.Contain(4))
|
||||
}
|
||||
|
||||
func (suite *ResourceManagerSuite) TestCheckResourceGroup() {
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(3, "localhost"))
|
||||
err := suite.manager.AddResourceGroup("rg")
|
||||
suite.NoError(err)
|
||||
suite.manager.AssignNode("rg", 1)
|
||||
suite.manager.AssignNode("rg", 2)
|
||||
suite.manager.AssignNode("rg", 3)
|
||||
|
||||
suite.manager.HandleNodeDown(1)
|
||||
lackNodes := suite.manager.CheckLackOfNode("rg")
|
||||
suite.Equal(lackNodes, 1)
|
||||
|
||||
suite.manager.nodeMgr.Remove(2)
|
||||
suite.manager.checkRGNodeStatus("rg")
|
||||
lackNodes = suite.manager.CheckLackOfNode("rg")
|
||||
suite.Equal(lackNodes, 2)
|
||||
|
||||
rg, err := suite.manager.FindResourceGroupByNode(3)
|
||||
suite.NoError(err)
|
||||
suite.Equal(rg, "rg")
|
||||
}
|
||||
|
||||
func (suite *ResourceManagerSuite) TestGetOutboundNode() {
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(3, "localhost"))
|
||||
suite.manager.AddResourceGroup("rg")
|
||||
suite.manager.AddResourceGroup("rg1")
|
||||
suite.manager.AssignNode("rg", 1)
|
||||
suite.manager.AssignNode("rg", 2)
|
||||
suite.manager.AssignNode("rg1", 3)
|
||||
|
||||
replica := NewReplica(
|
||||
&querypb.Replica{
|
||||
ID: 1,
|
||||
CollectionID: 100,
|
||||
ResourceGroup: "rg",
|
||||
Nodes: []int64{1, 2, 3},
|
||||
},
|
||||
typeutil.NewUniqueSet(1, 2, 3),
|
||||
)
|
||||
|
||||
outgoingNodes := suite.manager.GetOutgoingNodeNumByReplica(replica)
|
||||
suite.NotNil(outgoingNodes)
|
||||
suite.Len(outgoingNodes, 1)
|
||||
suite.NotNil(outgoingNodes["rg1"])
|
||||
suite.Equal(outgoingNodes["rg1"], int32(1))
|
||||
}
|
||||
|
||||
func (suite *ResourceManagerSuite) TestAutoRecover() {
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
|
||||
suite.manager.nodeMgr.Add(session.NewNodeInfo(3, "localhost"))
|
||||
err := suite.manager.AddResourceGroup("rg")
|
||||
suite.NoError(err)
|
||||
suite.manager.AssignNode(DefaultResourceGroupName, 1)
|
||||
suite.manager.AssignNode(DefaultResourceGroupName, 2)
|
||||
suite.manager.AssignNode("rg", 3)
|
||||
|
||||
suite.manager.HandleNodeDown(3)
|
||||
lackNodes := suite.manager.CheckLackOfNode("rg")
|
||||
suite.Equal(lackNodes, 1)
|
||||
suite.manager.AutoRecoverResourceGroup("rg")
|
||||
lackNodes = suite.manager.CheckLackOfNode("rg")
|
||||
suite.Equal(lackNodes, 0)
|
||||
}
|
||||
|
||||
func (suite *ResourceManagerSuite) TearDownSuite() {
|
||||
suite.kv.Close()
|
||||
}
|
||||
|
||||
func TestResourceManager(t *testing.T) {
|
||||
suite.Run(t, new(ResourceManagerSuite))
|
||||
}
|
|
@ -150,7 +150,7 @@ func (m *SegmentDistManager) GetByShardWithReplica(shard string, replica *Replic
|
|||
|
||||
ret := make([]*Segment, 0)
|
||||
for nodeID, segments := range m.segments {
|
||||
if !replica.Nodes.Contain(nodeID) {
|
||||
if !replica.Contains(nodeID) {
|
||||
continue
|
||||
}
|
||||
for _, segment := range segments {
|
||||
|
|
|
@ -28,7 +28,6 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/kv"
|
||||
"github.com/milvus-io/milvus/internal/metastore"
|
||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
"github.com/milvus-io/milvus/internal/util"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -41,6 +40,7 @@ const (
|
|||
ReplicaPrefix = "querycoord-replica"
|
||||
CollectionMetaPrefixV1 = "queryCoord-collectionMeta"
|
||||
ReplicaMetaPrefixV1 = "queryCoord-ReplicaMeta"
|
||||
ResourceGroupPrefix = "queryCoord-ResourceGroup"
|
||||
)
|
||||
|
||||
type WatchStoreChan = clientv3.WatchChan
|
||||
|
@ -91,6 +91,26 @@ func (s metaStore) SaveReplica(replica *querypb.Replica) error {
|
|||
return s.cli.Save(key, string(value))
|
||||
}
|
||||
|
||||
func (s metaStore) SaveResourceGroup(rgs ...*querypb.ResourceGroup) error {
|
||||
ret := make(map[string]string)
|
||||
for _, rg := range rgs {
|
||||
key := encodeResourceGroupKey(rg.GetName())
|
||||
value, err := proto.Marshal(rg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ret[key] = string(value)
|
||||
}
|
||||
|
||||
return s.cli.MultiSave(ret)
|
||||
}
|
||||
|
||||
func (s metaStore) RemoveResourceGroup(rgName string) error {
|
||||
key := encodeResourceGroupKey(rgName)
|
||||
return s.cli.Remove(key)
|
||||
}
|
||||
|
||||
func (s metaStore) GetCollections() ([]*querypb.CollectionLoadInfo, error) {
|
||||
_, values, err := s.cli.LoadWithPrefix(CollectionLoadInfoPrefix)
|
||||
if err != nil {
|
||||
|
@ -171,6 +191,25 @@ func (s metaStore) getReplicasFromV1() ([]*querypb.Replica, error) {
|
|||
return ret, nil
|
||||
}
|
||||
|
||||
func (s metaStore) GetResourceGroups() ([]*querypb.ResourceGroup, error) {
|
||||
_, rgs, err := s.cli.LoadWithPrefix(ResourceGroupPrefix)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ret := make([]*querypb.ResourceGroup, 0, len(rgs))
|
||||
for _, value := range rgs {
|
||||
rg := &querypb.ResourceGroup{}
|
||||
err := proto.Unmarshal([]byte(value), rg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ret = append(ret, rg)
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func (s metaStore) ReleaseCollection(id int64) error {
|
||||
k := encodeCollectionLoadInfoKey(id)
|
||||
return s.cli.Remove(k)
|
||||
|
@ -209,6 +248,6 @@ func encodeCollectionReplicaKey(collection int64) string {
|
|||
return fmt.Sprintf("%s/%d", ReplicaPrefix, collection)
|
||||
}
|
||||
|
||||
func encodeHandoffEventKey(collection, partition, segment int64) string {
|
||||
return fmt.Sprintf("%s/%d/%d/%d", util.HandoffSegmentPrefix, collection, partition, segment)
|
||||
func encodeResourceGroupKey(rgName string) string {
|
||||
return fmt.Sprintf("%s/%s", ResourceGroupPrefix, rgName)
|
||||
}
|
||||
|
|
|
@ -17,22 +17,151 @@
|
|||
package meta
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/kv"
|
||||
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/util/etcd"
|
||||
"github.com/stretchr/testify/suite"
|
||||
)
|
||||
|
||||
type StoreTestSuite struct {
|
||||
suite.Suite
|
||||
|
||||
kv kv.MetaKv
|
||||
store metaStore
|
||||
}
|
||||
|
||||
func (suite *StoreTestSuite) SetupTest() {
|
||||
//kv := memkv.NewMemoryKV()
|
||||
//suite.store = NewMetaStore(kv)
|
||||
func (suite *StoreTestSuite) SetupSuite() {
|
||||
Params.Init()
|
||||
}
|
||||
|
||||
func (suite *StoreTestSuite) TearDownTest() {}
|
||||
func (suite *StoreTestSuite) SetupTest() {
|
||||
config := GenerateEtcdConfig()
|
||||
cli, err := etcd.GetEtcdClient(
|
||||
config.UseEmbedEtcd,
|
||||
config.EtcdUseSSL,
|
||||
config.Endpoints,
|
||||
config.EtcdTLSCert,
|
||||
config.EtcdTLSKey,
|
||||
config.EtcdTLSCACert,
|
||||
config.EtcdTLSMinVersion)
|
||||
suite.Require().NoError(err)
|
||||
suite.kv = etcdkv.NewEtcdKV(cli, config.MetaRootPath)
|
||||
suite.store = NewMetaStore(suite.kv)
|
||||
}
|
||||
|
||||
func (suite *StoreTestSuite) TearDownTest() {
|
||||
if suite.kv != nil {
|
||||
suite.kv.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *StoreTestSuite) TestCollection() {
|
||||
suite.store.SaveCollection(&querypb.CollectionLoadInfo{
|
||||
CollectionID: 1,
|
||||
})
|
||||
|
||||
suite.store.SaveCollection(&querypb.CollectionLoadInfo{
|
||||
CollectionID: 2,
|
||||
})
|
||||
|
||||
suite.store.SaveCollection(&querypb.CollectionLoadInfo{
|
||||
CollectionID: 3,
|
||||
})
|
||||
|
||||
suite.store.ReleaseCollection(1)
|
||||
suite.store.ReleaseCollection(2)
|
||||
|
||||
collections, err := suite.store.GetCollections()
|
||||
suite.NoError(err)
|
||||
suite.Len(collections, 1)
|
||||
}
|
||||
|
||||
func (suite *StoreTestSuite) TestPartition() {
|
||||
suite.store.SavePartition(&querypb.PartitionLoadInfo{
|
||||
PartitionID: 1,
|
||||
})
|
||||
|
||||
suite.store.SavePartition(&querypb.PartitionLoadInfo{
|
||||
PartitionID: 2,
|
||||
})
|
||||
|
||||
suite.store.SavePartition(&querypb.PartitionLoadInfo{
|
||||
PartitionID: 3,
|
||||
})
|
||||
|
||||
suite.store.ReleasePartition(1)
|
||||
suite.store.ReleasePartition(2)
|
||||
|
||||
partitions, err := suite.store.GetPartitions()
|
||||
suite.NoError(err)
|
||||
suite.Len(partitions, 1)
|
||||
}
|
||||
|
||||
func (suite *StoreTestSuite) TestReplica() {
|
||||
suite.store.SaveReplica(&querypb.Replica{
|
||||
CollectionID: 1,
|
||||
ID: 1,
|
||||
})
|
||||
|
||||
suite.store.SaveReplica(&querypb.Replica{
|
||||
CollectionID: 1,
|
||||
ID: 2,
|
||||
})
|
||||
|
||||
suite.store.SaveReplica(&querypb.Replica{
|
||||
CollectionID: 1,
|
||||
ID: 3,
|
||||
})
|
||||
|
||||
suite.store.ReleaseReplica(1, 1)
|
||||
suite.store.ReleaseReplica(1, 2)
|
||||
|
||||
replicas, err := suite.store.GetReplicas()
|
||||
suite.NoError(err)
|
||||
suite.Len(replicas, 1)
|
||||
}
|
||||
|
||||
func (suite *StoreTestSuite) TestResourceGroup() {
|
||||
suite.store.SaveResourceGroup(&querypb.ResourceGroup{
|
||||
Name: "rg1",
|
||||
Capacity: 3,
|
||||
Nodes: []int64{1, 2, 3},
|
||||
})
|
||||
suite.store.SaveResourceGroup(&querypb.ResourceGroup{
|
||||
Name: "rg2",
|
||||
Capacity: 3,
|
||||
Nodes: []int64{4, 5},
|
||||
})
|
||||
|
||||
suite.store.SaveResourceGroup(&querypb.ResourceGroup{
|
||||
Name: "rg3",
|
||||
Capacity: 0,
|
||||
Nodes: []int64{},
|
||||
})
|
||||
|
||||
suite.store.RemoveResourceGroup("rg3")
|
||||
|
||||
groups, err := suite.store.GetResourceGroups()
|
||||
suite.NoError(err)
|
||||
suite.Len(groups, 2)
|
||||
|
||||
sort.Slice(groups, func(i, j int) bool {
|
||||
return groups[i].GetName() < groups[j].GetName()
|
||||
})
|
||||
|
||||
suite.Equal("rg1", groups[0].GetName())
|
||||
suite.Equal(int32(3), groups[0].GetCapacity())
|
||||
suite.Equal([]int64{1, 2, 3}, groups[0].GetNodes())
|
||||
|
||||
suite.Equal("rg2", groups[1].GetName())
|
||||
suite.Equal(int32(3), groups[1].GetCapacity())
|
||||
suite.Equal([]int64{4, 5}, groups[1].GetNodes())
|
||||
}
|
||||
|
||||
func (suite *StoreTestSuite) TestLoadRelease() {
|
||||
// TODO(sunby): add ut
|
||||
|
|
|
@ -27,6 +27,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/util/etcd"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
)
|
||||
|
@ -101,7 +102,7 @@ func (suite *TargetManagerSuite) SetupTest() {
|
|||
// meta
|
||||
store := NewMetaStore(suite.kv)
|
||||
idAllocator := RandomIncrementIDAllocator()
|
||||
suite.meta = NewMeta(idAllocator, store)
|
||||
suite.meta = NewMeta(idAllocator, store, session.NewNodeManager())
|
||||
suite.broker = NewMockBroker(suite.T())
|
||||
suite.mgr = NewTargetManager(suite.broker, suite.meta)
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/util/etcd"
|
||||
)
|
||||
|
||||
|
@ -173,7 +174,7 @@ func (suite *CollectionObserverSuite) SetupTest() {
|
|||
|
||||
// Dependencies
|
||||
suite.dist = meta.NewDistributionManager()
|
||||
suite.meta = meta.NewMeta(suite.idAllocator, suite.store)
|
||||
suite.meta = meta.NewMeta(suite.idAllocator, suite.store, session.NewNodeManager())
|
||||
suite.broker = meta.NewMockBroker(suite.T())
|
||||
suite.targetMgr = meta.NewTargetManager(suite.broker, suite.meta)
|
||||
suite.targetObserver = NewTargetObserver(suite.meta,
|
||||
|
@ -317,7 +318,7 @@ func (suite *CollectionObserverSuite) loadAll() {
|
|||
|
||||
func (suite *CollectionObserverSuite) load(collection int64) {
|
||||
// Mock meta data
|
||||
replicas, err := suite.meta.ReplicaManager.Spawn(collection, suite.replicaNumber[collection])
|
||||
replicas, err := suite.meta.ReplicaManager.Spawn(collection, suite.replicaNumber[collection], meta.DefaultResourceGroupName)
|
||||
suite.NoError(err)
|
||||
for _, replica := range replicas {
|
||||
replica.AddNode(suite.nodes...)
|
||||
|
|
|
@ -67,7 +67,7 @@ func (suite *LeaderObserverTestSuite) SetupTest() {
|
|||
// meta
|
||||
store := meta.NewMetaStore(suite.kv)
|
||||
idAllocator := RandomIncrementIDAllocator()
|
||||
suite.meta = meta.NewMeta(idAllocator, store)
|
||||
suite.meta = meta.NewMeta(idAllocator, store, session.NewNodeManager())
|
||||
suite.broker = meta.NewMockBroker(suite.T())
|
||||
|
||||
suite.mockCluster = session.NewMockCluster(suite.T())
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package observers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/log"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
)
|
||||
|
||||
// check replica, find outbound nodes and remove it from replica if all segment/channel has been moved
|
||||
type ReplicaObserver struct {
|
||||
c chan struct{}
|
||||
wg sync.WaitGroup
|
||||
meta *meta.Meta
|
||||
distMgr *meta.DistributionManager
|
||||
|
||||
stopOnce sync.Once
|
||||
}
|
||||
|
||||
func NewReplicaObserver(meta *meta.Meta, distMgr *meta.DistributionManager) *ReplicaObserver {
|
||||
return &ReplicaObserver{
|
||||
c: make(chan struct{}),
|
||||
meta: meta,
|
||||
distMgr: distMgr,
|
||||
}
|
||||
}
|
||||
|
||||
func (ob *ReplicaObserver) Start(ctx context.Context) {
|
||||
ob.wg.Add(1)
|
||||
go ob.schedule(ctx)
|
||||
}
|
||||
|
||||
func (ob *ReplicaObserver) Stop() {
|
||||
ob.stopOnce.Do(func() {
|
||||
close(ob.c)
|
||||
ob.wg.Wait()
|
||||
})
|
||||
}
|
||||
|
||||
func (ob *ReplicaObserver) schedule(ctx context.Context) {
|
||||
defer ob.wg.Done()
|
||||
log.Info("Start check replica loop")
|
||||
|
||||
ticker := time.NewTicker(params.Params.QueryCoordCfg.CheckNodeInReplicaInterval)
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Info("Close replica observer due to context canceled")
|
||||
return
|
||||
case <-ob.c:
|
||||
log.Info("Close replica observer")
|
||||
return
|
||||
|
||||
case <-ticker.C:
|
||||
ob.checkNodesInReplica()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ob *ReplicaObserver) checkNodesInReplica() {
|
||||
collections := ob.meta.GetAll()
|
||||
for _, collectionID := range collections {
|
||||
replicas := ob.meta.ReplicaManager.GetByCollection(collectionID)
|
||||
|
||||
for _, replica := range replicas {
|
||||
outboundNodes := ob.meta.ResourceManager.CheckOutboundNodes(replica)
|
||||
if len(outboundNodes) > 0 {
|
||||
log.RatedInfo(10, "found outbound nodes in replica",
|
||||
zap.Int64("collectionID", replica.GetCollectionID()),
|
||||
zap.Int64("replicaID", replica.GetCollectionID()),
|
||||
zap.Int64s("allOutboundNodes", outboundNodes.Collect()),
|
||||
)
|
||||
|
||||
for node := range outboundNodes {
|
||||
channels := ob.distMgr.ChannelDistManager.GetByCollectionAndNode(collectionID, node)
|
||||
segments := ob.distMgr.SegmentDistManager.GetByCollectionAndNode(collectionID, node)
|
||||
|
||||
if len(channels) == 0 && len(segments) == 0 {
|
||||
replica.RemoveNode(node)
|
||||
log.Info("all segment/channel has been removed from outbound node, remove it from replica",
|
||||
zap.Int64("collectionID", replica.GetCollectionID()),
|
||||
zap.Int64("replicaID", replica.GetCollectionID()),
|
||||
zap.Int64("removedNodes", node),
|
||||
zap.Int64s("availableNodes", replica.GetNodes()),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,133 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
package observers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
|
||||
"github.com/milvus-io/milvus/internal/util/etcd"
|
||||
"github.com/stretchr/testify/suite"
|
||||
)
|
||||
|
||||
type ReplicaObserverSuite struct {
|
||||
suite.Suite
|
||||
|
||||
kv *etcdkv.EtcdKV
|
||||
//dependency
|
||||
meta *meta.Meta
|
||||
distMgr *meta.DistributionManager
|
||||
|
||||
observer *ReplicaObserver
|
||||
|
||||
collectionID int64
|
||||
partitionID int64
|
||||
}
|
||||
|
||||
func (suite *ReplicaObserverSuite) SetupSuite() {
|
||||
Params.Init()
|
||||
Params.QueryCoordCfg.CheckNodeInReplicaInterval = 1 * time.Second
|
||||
}
|
||||
|
||||
func (suite *ReplicaObserverSuite) SetupTest() {
|
||||
var err error
|
||||
config := GenerateEtcdConfig()
|
||||
cli, err := etcd.GetEtcdClient(
|
||||
config.UseEmbedEtcd,
|
||||
config.EtcdUseSSL,
|
||||
config.Endpoints,
|
||||
config.EtcdTLSCert,
|
||||
config.EtcdTLSKey,
|
||||
config.EtcdTLSCACert,
|
||||
config.EtcdTLSMinVersion)
|
||||
suite.Require().NoError(err)
|
||||
suite.kv = etcdkv.NewEtcdKV(cli, config.MetaRootPath)
|
||||
|
||||
// meta
|
||||
store := meta.NewMetaStore(suite.kv)
|
||||
idAllocator := RandomIncrementIDAllocator()
|
||||
suite.meta = meta.NewMeta(idAllocator, store, session.NewNodeManager())
|
||||
|
||||
suite.distMgr = meta.NewDistributionManager()
|
||||
suite.observer = NewReplicaObserver(suite.meta, suite.distMgr)
|
||||
suite.observer.Start(context.TODO())
|
||||
suite.collectionID = int64(1000)
|
||||
suite.partitionID = int64(100)
|
||||
|
||||
suite.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, 1)
|
||||
err = suite.meta.CollectionManager.PutCollection(utils.CreateTestCollection(suite.collectionID, 1))
|
||||
suite.NoError(err)
|
||||
replicas, err := suite.meta.ReplicaManager.Spawn(suite.collectionID, 1, meta.DefaultResourceGroupName)
|
||||
suite.NoError(err)
|
||||
err = suite.meta.ReplicaManager.Put(replicas...)
|
||||
suite.NoError(err)
|
||||
}
|
||||
|
||||
func (suite *ReplicaObserverSuite) TestCheckNodesInReplica() {
|
||||
replicas := suite.meta.ReplicaManager.GetByCollection(suite.collectionID)
|
||||
|
||||
suite.distMgr.ChannelDistManager.Update(1, utils.CreateTestChannel(suite.collectionID, 2, 1, "test-insert-channel1"))
|
||||
suite.distMgr.SegmentDistManager.Update(1, utils.CreateTestSegment(suite.collectionID, suite.partitionID, 1, 100, 1, "test-insert-channel1"))
|
||||
replicas[0].AddNode(1)
|
||||
suite.distMgr.ChannelDistManager.Update(100, utils.CreateTestChannel(suite.collectionID, 100, 1, "test-insert-channel2"))
|
||||
suite.distMgr.SegmentDistManager.Update(100, utils.CreateTestSegment(suite.collectionID, suite.partitionID, 2, 100, 1, "test-insert-channel2"))
|
||||
replicas[0].AddNode(100)
|
||||
|
||||
suite.Eventually(func() bool {
|
||||
// node 100 should be kept
|
||||
replicas := suite.meta.ReplicaManager.GetByCollection(suite.collectionID)
|
||||
|
||||
for _, node := range replicas[0].GetNodes() {
|
||||
if node == 100 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}, 6*time.Second, 2*time.Second)
|
||||
suite.Len(replicas[0].GetNodes(), 2)
|
||||
|
||||
suite.distMgr.ChannelDistManager.Update(100)
|
||||
suite.distMgr.SegmentDistManager.Update(100)
|
||||
|
||||
suite.Eventually(func() bool {
|
||||
// node 100 should be removed
|
||||
replicas := suite.meta.ReplicaManager.GetByCollection(suite.collectionID)
|
||||
|
||||
for _, node := range replicas[0].GetNodes() {
|
||||
if node == 100 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}, 5*time.Second, 1*time.Second)
|
||||
suite.Len(replicas[0].GetNodes(), 1)
|
||||
suite.Equal([]int64{1}, replicas[0].GetNodes())
|
||||
}
|
||||
|
||||
func (suite *ReplicaObserverSuite) TearDownSuite() {
|
||||
suite.kv.Close()
|
||||
suite.observer.Stop()
|
||||
}
|
||||
|
||||
func TestReplicaObserver(t *testing.T) {
|
||||
suite.Run(t, new(ReplicaObserverSuite))
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package observers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/log"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// check whether rg lack of node, try to transfer node from default rg
|
||||
type ResourceObserver struct {
|
||||
c chan struct{}
|
||||
wg sync.WaitGroup
|
||||
meta *meta.Meta
|
||||
|
||||
stopOnce sync.Once
|
||||
}
|
||||
|
||||
func NewResourceObserver(meta *meta.Meta) *ResourceObserver {
|
||||
return &ResourceObserver{
|
||||
c: make(chan struct{}),
|
||||
meta: meta,
|
||||
}
|
||||
}
|
||||
|
||||
func (ob *ResourceObserver) Start(ctx context.Context) {
|
||||
ob.wg.Add(1)
|
||||
go ob.schedule(ctx)
|
||||
}
|
||||
|
||||
func (ob *ResourceObserver) Stop() {
|
||||
ob.stopOnce.Do(func() {
|
||||
close(ob.c)
|
||||
ob.wg.Wait()
|
||||
})
|
||||
}
|
||||
|
||||
func (ob *ResourceObserver) schedule(ctx context.Context) {
|
||||
defer ob.wg.Done()
|
||||
log.Info("Start check resource group loop")
|
||||
|
||||
ticker := time.NewTicker(params.Params.QueryCoordCfg.CheckResourceGroupInterval)
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Info("Close resource group observer due to context canceled")
|
||||
return
|
||||
case <-ob.c:
|
||||
log.Info("Close resource group observer")
|
||||
return
|
||||
|
||||
case <-ticker.C:
|
||||
ob.checkResourceGroup()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ob *ResourceObserver) checkResourceGroup() {
|
||||
manager := ob.meta.ResourceManager
|
||||
rgNames := manager.ListResourceGroups()
|
||||
|
||||
enableRGAutoRecover := params.Params.QueryCoordCfg.EnableRGAutoRecover
|
||||
|
||||
for _, rgName := range rgNames {
|
||||
if rgName == meta.DefaultResourceGroupName {
|
||||
continue
|
||||
}
|
||||
lackNodeNum := manager.CheckLackOfNode(rgName)
|
||||
if lackNodeNum > 0 {
|
||||
log.Info("found resource group lack of nodes",
|
||||
zap.String("rgName", rgName),
|
||||
zap.Int("lackNodeNum", lackNodeNum),
|
||||
)
|
||||
|
||||
if enableRGAutoRecover {
|
||||
usedNodeNum, err := manager.AutoRecoverResourceGroup(rgName)
|
||||
if err != nil {
|
||||
log.Warn("failed to recover resource group",
|
||||
zap.String("rgName", rgName),
|
||||
zap.Int("lackNodeNum", lackNodeNum-usedNodeNum),
|
||||
zap.Error(err),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
package observers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
etcdKV "github.com/milvus-io/milvus/internal/kv/etcd"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/util/etcd"
|
||||
"github.com/stretchr/testify/suite"
|
||||
)
|
||||
|
||||
type ResourceObserverSuite struct {
|
||||
suite.Suite
|
||||
|
||||
kv *etcdKV.EtcdKV
|
||||
//dependency
|
||||
meta *meta.Meta
|
||||
observer *ResourceObserver
|
||||
nodeMgr *session.NodeManager
|
||||
|
||||
collectionID int64
|
||||
partitionID int64
|
||||
}
|
||||
|
||||
func (suite *ResourceObserverSuite) SetupSuite() {
|
||||
Params.Init()
|
||||
Params.QueryCoordCfg.CheckResourceGroupInterval = 3 * time.Second
|
||||
}
|
||||
|
||||
func (suite *ResourceObserverSuite) SetupTest() {
|
||||
var err error
|
||||
config := GenerateEtcdConfig()
|
||||
cli, err := etcd.GetEtcdClient(
|
||||
config.UseEmbedEtcd,
|
||||
config.EtcdUseSSL,
|
||||
config.Endpoints,
|
||||
config.EtcdTLSCert,
|
||||
config.EtcdTLSKey,
|
||||
config.EtcdTLSCACert,
|
||||
config.EtcdTLSMinVersion)
|
||||
suite.Require().NoError(err)
|
||||
suite.kv = etcdKV.NewEtcdKV(cli, config.MetaRootPath)
|
||||
|
||||
// meta
|
||||
store := meta.NewMetaStore(suite.kv)
|
||||
idAllocator := RandomIncrementIDAllocator()
|
||||
suite.nodeMgr = session.NewNodeManager()
|
||||
suite.meta = meta.NewMeta(idAllocator, store, suite.nodeMgr)
|
||||
|
||||
suite.observer = NewResourceObserver(suite.meta)
|
||||
suite.observer.Start(context.TODO())
|
||||
|
||||
for i := 1; i < 10; i++ {
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(int64(i), "localhost"))
|
||||
suite.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, int64(i))
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *ResourceObserverSuite) TestCheckNodesInReplica() {
|
||||
suite.meta.ResourceManager.AddResourceGroup("rg")
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(int64(100), "localhost"))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(int64(101), "localhost"))
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(int64(102), "localhost"))
|
||||
suite.meta.ResourceManager.AssignNode("rg", 100)
|
||||
suite.meta.ResourceManager.AssignNode("rg", 101)
|
||||
suite.meta.ResourceManager.AssignNode("rg", 102)
|
||||
suite.meta.ResourceManager.HandleNodeDown(100)
|
||||
suite.meta.ResourceManager.HandleNodeDown(101)
|
||||
|
||||
//before auto recover rg
|
||||
suite.Eventually(func() bool {
|
||||
lackNodesNum := suite.meta.ResourceManager.CheckLackOfNode("rg")
|
||||
return lackNodesNum == 2
|
||||
}, 5*time.Second, 1*time.Second)
|
||||
|
||||
// after auto recover rg
|
||||
suite.Eventually(func() bool {
|
||||
lackNodesNum := suite.meta.ResourceManager.CheckLackOfNode("rg")
|
||||
return lackNodesNum == 0
|
||||
}, 5*time.Second, 1*time.Second)
|
||||
|
||||
}
|
||||
|
||||
func (suite *ResourceObserverSuite) TearDownSuite() {
|
||||
suite.kv.Close()
|
||||
suite.observer.Stop()
|
||||
}
|
||||
|
||||
func TestResourceObserver(t *testing.T) {
|
||||
suite.Run(t, new(ResourceObserverSuite))
|
||||
}
|
|
@ -29,6 +29,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
|
||||
"github.com/milvus-io/milvus/internal/util/etcd"
|
||||
)
|
||||
|
@ -73,7 +74,7 @@ func (suite *TargetObserverSuite) SetupTest() {
|
|||
// meta
|
||||
store := meta.NewMetaStore(suite.kv)
|
||||
idAllocator := RandomIncrementIDAllocator()
|
||||
suite.meta = meta.NewMeta(idAllocator, store)
|
||||
suite.meta = meta.NewMeta(idAllocator, store, session.NewNodeManager())
|
||||
|
||||
suite.broker = meta.NewMockBroker(suite.T())
|
||||
suite.targetMgr = meta.NewTargetManager(suite.broker, suite.meta)
|
||||
|
@ -85,7 +86,7 @@ func (suite *TargetObserverSuite) SetupTest() {
|
|||
|
||||
err = suite.meta.CollectionManager.PutCollection(utils.CreateTestCollection(suite.collectionID, 1))
|
||||
suite.NoError(err)
|
||||
replicas, err := suite.meta.ReplicaManager.Spawn(suite.collectionID, 1)
|
||||
replicas, err := suite.meta.ReplicaManager.Spawn(suite.collectionID, 1, meta.DefaultResourceGroupName)
|
||||
suite.NoError(err)
|
||||
replicas[0].AddNode(2)
|
||||
err = suite.meta.ReplicaManager.Put(replicas...)
|
||||
|
@ -211,6 +212,6 @@ func (suite *TargetObserverSuite) TearDownSuite() {
|
|||
suite.observer.Stop()
|
||||
}
|
||||
|
||||
func TestTargetManager(t *testing.T) {
|
||||
func TestTargetObserver(t *testing.T) {
|
||||
suite.Run(t, new(TargetObserverSuite))
|
||||
}
|
||||
|
|
|
@ -103,6 +103,8 @@ type Server struct {
|
|||
collectionObserver *observers.CollectionObserver
|
||||
leaderObserver *observers.LeaderObserver
|
||||
targetObserver *observers.TargetObserver
|
||||
replicaObserver *observers.ReplicaObserver
|
||||
resourceObserver *observers.ResourceObserver
|
||||
|
||||
balancer balance.Balance
|
||||
|
||||
|
@ -208,13 +210,13 @@ func (s *Server) initQueryCoord() error {
|
|||
s.metricsCacheManager = metricsinfo.NewMetricsCacheManager()
|
||||
|
||||
// Init meta
|
||||
s.nodeMgr = session.NewNodeManager()
|
||||
err = s.initMeta()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Init session
|
||||
log.Info("init session")
|
||||
s.nodeMgr = session.NewNodeManager()
|
||||
s.cluster = session.NewCluster(s.nodeMgr)
|
||||
|
||||
// Init schedulers
|
||||
|
@ -273,7 +275,7 @@ func (s *Server) initQueryCoord() error {
|
|||
func (s *Server) initMeta() error {
|
||||
log.Info("init meta")
|
||||
s.store = meta.NewMetaStore(s.kv)
|
||||
s.meta = meta.NewMeta(s.idAllocator, s.store)
|
||||
s.meta = meta.NewMeta(s.idAllocator, s.store, s.nodeMgr)
|
||||
|
||||
log.Info("recover meta...")
|
||||
err := s.meta.CollectionManager.Recover()
|
||||
|
@ -289,6 +291,12 @@ func (s *Server) initMeta() error {
|
|||
return err
|
||||
}
|
||||
|
||||
err = s.meta.ResourceManager.Recover()
|
||||
if err != nil {
|
||||
log.Error("failed to recover resource groups")
|
||||
return err
|
||||
}
|
||||
|
||||
s.dist = &meta.DistributionManager{
|
||||
SegmentDistManager: meta.NewSegmentDistManager(),
|
||||
ChannelDistManager: meta.NewChannelDistManager(),
|
||||
|
@ -324,6 +332,13 @@ func (s *Server) initObserver() {
|
|||
s.targetMgr,
|
||||
s.targetObserver,
|
||||
)
|
||||
|
||||
s.replicaObserver = observers.NewReplicaObserver(
|
||||
s.meta,
|
||||
s.dist,
|
||||
)
|
||||
|
||||
s.resourceObserver = observers.NewResourceObserver(s.meta)
|
||||
}
|
||||
|
||||
func (s *Server) afterStart() {
|
||||
|
@ -384,6 +399,8 @@ func (s *Server) startServerLoop() {
|
|||
s.collectionObserver.Start(s.ctx)
|
||||
s.leaderObserver.Start(s.ctx)
|
||||
s.targetObserver.Start(s.ctx)
|
||||
s.replicaObserver.Start(s.ctx)
|
||||
s.resourceObserver.Start(s.ctx)
|
||||
}
|
||||
|
||||
func (s *Server) Stop() error {
|
||||
|
@ -427,6 +444,12 @@ func (s *Server) Stop() error {
|
|||
if s.targetObserver != nil {
|
||||
s.targetObserver.Stop()
|
||||
}
|
||||
if s.replicaObserver != nil {
|
||||
s.replicaObserver.Stop()
|
||||
}
|
||||
if s.resourceObserver != nil {
|
||||
s.resourceObserver.Stop()
|
||||
}
|
||||
|
||||
s.wg.Wait()
|
||||
log.Info("QueryCoord stop successfully")
|
||||
|
@ -603,17 +626,33 @@ func (s *Server) handleNodeUp(node int64) {
|
|||
s.taskScheduler.AddExecutor(node)
|
||||
s.distController.StartDistInstance(s.ctx, node)
|
||||
|
||||
// need assign to new rg and replica
|
||||
rgName, err := s.meta.ResourceManager.HandleNodeUp(node)
|
||||
if err != nil {
|
||||
log.Warn("HandleNodeUp: failed to assign node to resource group",
|
||||
zap.Error(err),
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
log.Info("HandleNodeUp: assign node to resource group",
|
||||
zap.String("resourceGroup", rgName),
|
||||
)
|
||||
|
||||
for _, collection := range s.meta.CollectionManager.GetAll() {
|
||||
log := log.With(zap.Int64("collectionID", collection))
|
||||
replica := s.meta.ReplicaManager.GetByCollectionAndNode(collection, node)
|
||||
if replica == nil {
|
||||
replicas := s.meta.ReplicaManager.GetByCollection(collection)
|
||||
replicas := s.meta.ReplicaManager.GetByCollectionAndRG(collection, rgName)
|
||||
if len(replicas) == 0 {
|
||||
continue
|
||||
}
|
||||
sort.Slice(replicas, func(i, j int) bool {
|
||||
return replicas[i].Nodes.Len() < replicas[j].Nodes.Len()
|
||||
return replicas[i].Len() < replicas[j].Len()
|
||||
})
|
||||
replica := replicas[0]
|
||||
// TODO(yah01): this may fail, need a component to check whether a node is assigned
|
||||
err := s.meta.ReplicaManager.AddNode(replica.GetID(), node)
|
||||
err = s.meta.ReplicaManager.AddNode(replica.GetID(), node)
|
||||
if err != nil {
|
||||
log.Warn("failed to assign node to replicas",
|
||||
zap.Int64("replicaID", replica.GetID()),
|
||||
|
@ -631,20 +670,6 @@ func (s *Server) handleNodeDown(node int64) {
|
|||
s.taskScheduler.RemoveExecutor(node)
|
||||
s.distController.Remove(node)
|
||||
|
||||
// Refresh the targets, to avoid consuming messages too early from channel
|
||||
// FIXME(yah01): the leads to miss data, the segments flushed between the two check points
|
||||
// are missed, it will recover for a while.
|
||||
channels := s.dist.ChannelDistManager.GetByNode(node)
|
||||
for _, channel := range channels {
|
||||
_, err := s.targetObserver.UpdateNextTarget(channel.GetCollectionID())
|
||||
if err != nil {
|
||||
msg := "failed to update next targets for collection"
|
||||
log.Error(msg,
|
||||
zap.Error(err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Clear dist
|
||||
s.dist.LeaderViewManager.Update(node)
|
||||
s.dist.ChannelDistManager.Update(node)
|
||||
|
@ -670,6 +695,19 @@ func (s *Server) handleNodeDown(node int64) {
|
|||
|
||||
// Clear tasks
|
||||
s.taskScheduler.RemoveByNode(node)
|
||||
|
||||
rgName, err := s.meta.ResourceManager.HandleNodeDown(node)
|
||||
if err != nil {
|
||||
log.Warn("HandleNodeDown: failed to remove node from resource group",
|
||||
zap.String("resourceGroup", rgName),
|
||||
zap.Error(err),
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
log.Info("HandleNodeDown: remove node from resource group",
|
||||
zap.String("resourceGroup", rgName),
|
||||
)
|
||||
}
|
||||
|
||||
// checkReplicas checks whether replica contains offline node, and remove those nodes
|
||||
|
@ -680,7 +718,7 @@ func (s *Server) checkReplicas() {
|
|||
for _, replica := range replicas {
|
||||
replica := replica.Clone()
|
||||
toRemove := make([]int64, 0)
|
||||
for node := range replica.Nodes {
|
||||
for _, node := range replica.GetNodes() {
|
||||
if s.nodeMgr.Get(node) == nil {
|
||||
toRemove = append(toRemove, node)
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/querycoordv2/mocks"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/observers"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/task"
|
||||
"github.com/milvus-io/milvus/internal/util/dependency"
|
||||
"github.com/milvus-io/milvus/internal/util/etcd"
|
||||
|
@ -95,7 +96,7 @@ func (suite *ServerSuite) SetupSuite() {
|
|||
1000: 1,
|
||||
1001: 3,
|
||||
}
|
||||
suite.nodes = make([]*mocks.MockQueryNode, 3)
|
||||
suite.nodes = make([]*mocks.MockQueryNode, 6)
|
||||
}
|
||||
|
||||
func (suite *ServerSuite) SetupTest() {
|
||||
|
@ -113,6 +114,8 @@ func (suite *ServerSuite) SetupTest() {
|
|||
suite.Require().NoError(err)
|
||||
ok := suite.waitNodeUp(suite.nodes[i], 5*time.Second)
|
||||
suite.Require().True(ok)
|
||||
suite.server.nodeMgr.Add(session.NewNodeInfo(suite.nodes[i].ID, "localhost"))
|
||||
suite.server.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, suite.nodes[i].ID)
|
||||
}
|
||||
|
||||
suite.loadAll()
|
||||
|
@ -167,7 +170,6 @@ func (suite *ServerSuite) TestNodeUp() {
|
|||
}
|
||||
return true
|
||||
}, 5*time.Second, time.Second)
|
||||
|
||||
}
|
||||
|
||||
func (suite *ServerSuite) TestNodeUpdate() {
|
||||
|
|
|
@ -43,6 +43,16 @@ import (
|
|||
|
||||
var (
|
||||
successStatus = utils.WrapStatus(commonpb.ErrorCode_Success, "")
|
||||
|
||||
ErrCreateResourceGroupFailed = errors.New("failed to create resource group")
|
||||
ErrDropResourceGroupFailed = errors.New("failed to drop resource group")
|
||||
ErrAddNodeToRGFailed = errors.New("failed to add node to resource group")
|
||||
ErrRemoveNodeFromRGFailed = errors.New("failed to remove node from resource group")
|
||||
ErrTransferNodeFailed = errors.New("failed to transfer node between resource group")
|
||||
ErrTransferReplicaFailed = errors.New("failed to transfer replica between resource group")
|
||||
ErrListResourceGroupsFailed = errors.New("failed to list resource group")
|
||||
ErrDescribeResourceGroupFailed = errors.New("failed to describe resource group")
|
||||
ErrLoadUseWrongRG = errors.New("load operation should use collection's resource group")
|
||||
)
|
||||
|
||||
func (s *Server) ShowCollections(ctx context.Context, req *querypb.ShowCollectionsRequest) (*querypb.ShowCollectionsResponse, error) {
|
||||
|
@ -219,6 +229,12 @@ func (s *Server) LoadCollection(ctx context.Context, req *querypb.LoadCollection
|
|||
if req.GetRefresh() {
|
||||
return s.refreshCollection(ctx, req.GetCollectionID())
|
||||
}
|
||||
if err := s.checkResourceGroup(req.GetCollectionID(), req.GetResourceGroups()); err != nil {
|
||||
msg := "failed to load collection"
|
||||
log.Warn(msg, zap.Error(err))
|
||||
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
|
||||
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument, msg, err), nil
|
||||
}
|
||||
|
||||
loadJob := job.NewLoadCollectionJob(ctx,
|
||||
req,
|
||||
|
@ -286,6 +302,8 @@ func (s *Server) LoadPartitions(ctx context.Context, req *querypb.LoadPartitions
|
|||
log := log.With(
|
||||
zap.Int64("msgID", req.GetBase().GetMsgID()),
|
||||
zap.Int64("collectionID", req.GetCollectionID()),
|
||||
zap.Int32("replicaNumber", req.GetReplicaNumber()),
|
||||
zap.Strings("resourceGroups", req.GetResourceGroups()),
|
||||
)
|
||||
|
||||
log.Info("received load partitions request",
|
||||
|
@ -305,6 +323,12 @@ func (s *Server) LoadPartitions(ctx context.Context, req *querypb.LoadPartitions
|
|||
if req.GetRefresh() {
|
||||
return s.refreshPartitions(ctx, req.GetCollectionID(), req.GetPartitionIDs())
|
||||
}
|
||||
if err := s.checkResourceGroup(req.GetCollectionID(), req.GetResourceGroups()); err != nil {
|
||||
msg := "failed to load partitions"
|
||||
log.Warn(msg, zap.Error(ErrLoadUseWrongRG))
|
||||
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
|
||||
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument, msg, ErrLoadUseWrongRG), nil
|
||||
}
|
||||
|
||||
loadJob := job.NewLoadPartitionJob(ctx,
|
||||
req,
|
||||
|
@ -327,6 +351,19 @@ func (s *Server) LoadPartitions(ctx context.Context, req *querypb.LoadPartitions
|
|||
return successStatus, nil
|
||||
}
|
||||
|
||||
func (s *Server) checkResourceGroup(collectionID int64, resourceGroups []string) error {
|
||||
if len(resourceGroups) != 0 {
|
||||
collectionUsedRG := s.meta.ReplicaManager.GetResourceGroupByCollection(collectionID)
|
||||
for _, rgName := range resourceGroups {
|
||||
if !collectionUsedRG.Contain(rgName) {
|
||||
return ErrLoadUseWrongRG
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) ReleasePartitions(ctx context.Context, req *querypb.ReleasePartitionsRequest) (*commonpb.Status, error) {
|
||||
log := log.With(
|
||||
zap.Int64("msgID", req.GetBase().GetMsgID()),
|
||||
|
@ -645,7 +682,7 @@ func (s *Server) LoadBalance(ctx context.Context, req *querypb.LoadBalanceReques
|
|||
fmt.Sprintf("can't balance, because the source node[%d] is invalid", srcNode), err), nil
|
||||
}
|
||||
for _, dstNode := range req.GetDstNodeIDs() {
|
||||
if !replica.Nodes.Contain(dstNode) {
|
||||
if !replica.Contains(dstNode) {
|
||||
msg := "destination nodes have to be in the same replica of source node"
|
||||
log.Warn(msg)
|
||||
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg), nil
|
||||
|
@ -934,3 +971,204 @@ func (s *Server) CheckHealth(ctx context.Context, req *milvuspb.CheckHealthReque
|
|||
|
||||
return &milvuspb.CheckHealthResponse{IsHealthy: true, Reasons: errReasons}, nil
|
||||
}
|
||||
|
||||
func (s *Server) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.String("rgName", req.GetResourceGroup()),
|
||||
)
|
||||
|
||||
log.Info("create resource group request received")
|
||||
if s.status.Load() != commonpb.StateCode_Healthy {
|
||||
log.Warn(ErrCreateResourceGroupFailed.Error(), zap.Error(ErrNotHealthy))
|
||||
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrCreateResourceGroupFailed.Error(), ErrNotHealthy), nil
|
||||
}
|
||||
|
||||
err := s.meta.ResourceManager.AddResourceGroup(req.GetResourceGroup())
|
||||
if err != nil {
|
||||
log.Warn(ErrCreateResourceGroupFailed.Error(), zap.Error(err))
|
||||
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrCreateResourceGroupFailed.Error(), err), nil
|
||||
}
|
||||
return successStatus, nil
|
||||
}
|
||||
|
||||
func (s *Server) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.String("rgName", req.GetResourceGroup()),
|
||||
)
|
||||
|
||||
log.Info("drop resource group request received")
|
||||
if s.status.Load() != commonpb.StateCode_Healthy {
|
||||
log.Warn(ErrDropResourceGroupFailed.Error(), zap.Error(ErrNotHealthy))
|
||||
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrDropResourceGroupFailed.Error(), ErrNotHealthy), nil
|
||||
}
|
||||
|
||||
err := s.meta.ResourceManager.RemoveResourceGroup(req.GetResourceGroup())
|
||||
if err != nil {
|
||||
log.Warn(ErrDropResourceGroupFailed.Error(), zap.Error(err))
|
||||
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrDropResourceGroupFailed.Error(), err), nil
|
||||
}
|
||||
return successStatus, nil
|
||||
}
|
||||
|
||||
func (s *Server) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.String("source", req.GetSourceResourceGroup()),
|
||||
zap.String("target", req.GetTargetResourceGroup()),
|
||||
)
|
||||
|
||||
log.Info("transfer node between resource group request received")
|
||||
if s.status.Load() != commonpb.StateCode_Healthy {
|
||||
log.Warn(ErrTransferNodeFailed.Error(), zap.Error(ErrNotHealthy))
|
||||
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrTransferNodeFailed.Error(), ErrNotHealthy), nil
|
||||
}
|
||||
|
||||
if ok := s.meta.ResourceManager.ContainResourceGroup(req.GetSourceResourceGroup()); !ok {
|
||||
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument,
|
||||
fmt.Sprintf("the source resource group[%s] doesn't exist", req.GetTargetResourceGroup()), meta.ErrRGNotExist), nil
|
||||
}
|
||||
|
||||
if ok := s.meta.ResourceManager.ContainResourceGroup(req.GetTargetResourceGroup()); !ok {
|
||||
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument,
|
||||
fmt.Sprintf("the target resource group[%s] doesn't exist", req.GetTargetResourceGroup()), meta.ErrRGNotExist), nil
|
||||
}
|
||||
|
||||
err := s.meta.ResourceManager.TransferNode(req.GetSourceResourceGroup(), req.GetTargetResourceGroup())
|
||||
if err != nil {
|
||||
log.Warn(ErrTransferNodeFailed.Error(), zap.Error(err))
|
||||
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrTransferNodeFailed.Error(), err), nil
|
||||
}
|
||||
|
||||
return successStatus, nil
|
||||
}
|
||||
|
||||
func (s *Server) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error) {
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.String("source", req.GetSourceResourceGroup()),
|
||||
zap.String("target", req.GetTargetResourceGroup()),
|
||||
zap.Int64("collectionID", req.GetCollectionID()),
|
||||
)
|
||||
|
||||
log.Info("transfer replica request received")
|
||||
if s.status.Load() != commonpb.StateCode_Healthy {
|
||||
log.Warn(ErrTransferReplicaFailed.Error(), zap.Error(ErrNotHealthy))
|
||||
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrTransferReplicaFailed.Error(), ErrNotHealthy), nil
|
||||
}
|
||||
|
||||
if ok := s.meta.ResourceManager.ContainResourceGroup(req.GetSourceResourceGroup()); !ok {
|
||||
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument,
|
||||
fmt.Sprintf("the source resource group[%s] doesn't exist", req.GetSourceResourceGroup()), meta.ErrRGNotExist), nil
|
||||
}
|
||||
|
||||
if ok := s.meta.ResourceManager.ContainResourceGroup(req.GetTargetResourceGroup()); !ok {
|
||||
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument,
|
||||
fmt.Sprintf("the target resource group[%s] doesn't exist", req.GetTargetResourceGroup()), meta.ErrRGNotExist), nil
|
||||
}
|
||||
|
||||
// for now, we don't support to transfer replica of same collection to same resource group
|
||||
replicas := s.meta.ReplicaManager.GetByCollectionAndRG(req.GetCollectionID(), req.GetSourceResourceGroup())
|
||||
if len(replicas) < int(req.GetNumReplica()) {
|
||||
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument,
|
||||
fmt.Sprintf("found [%d] replicas of collection[%d] in source resource group[%s]",
|
||||
len(replicas), req.GetCollectionID(), req.GetSourceResourceGroup())), nil
|
||||
}
|
||||
|
||||
err := s.transferReplica(req.GetTargetResourceGroup(), replicas[:req.GetNumReplica()])
|
||||
if err != nil {
|
||||
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument, ErrTransferReplicaFailed.Error(), err), nil
|
||||
}
|
||||
|
||||
return successStatus, nil
|
||||
}
|
||||
|
||||
func (s *Server) transferReplica(targetRG string, replicas []*meta.Replica) error {
|
||||
ret := make([]*meta.Replica, 0)
|
||||
for _, replica := range replicas {
|
||||
newReplica := replica.Clone()
|
||||
newReplica.ResourceGroup = targetRG
|
||||
|
||||
ret = append(ret, newReplica)
|
||||
}
|
||||
err := utils.AssignNodesToReplicas(s.meta, targetRG, ret...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return s.meta.ReplicaManager.Put(ret...)
|
||||
}
|
||||
|
||||
func (s *Server) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
|
||||
log := log.Ctx(ctx)
|
||||
|
||||
log.Info("list resource group request received")
|
||||
resp := &milvuspb.ListResourceGroupsResponse{
|
||||
Status: successStatus,
|
||||
}
|
||||
if s.status.Load() != commonpb.StateCode_Healthy {
|
||||
log.Warn(ErrListResourceGroupsFailed.Error(), zap.Error(ErrNotHealthy))
|
||||
resp.Status = utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrListResourceGroupsFailed.Error(), ErrNotHealthy)
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
resp.ResourceGroups = s.meta.ResourceManager.ListResourceGroups()
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (s *Server) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error) {
|
||||
log := log.Ctx(ctx).With(
|
||||
zap.String("rgName", req.GetResourceGroup()),
|
||||
)
|
||||
|
||||
log.Info("describe resource group request received")
|
||||
resp := &querypb.DescribeResourceGroupResponse{
|
||||
Status: successStatus,
|
||||
}
|
||||
if s.status.Load() != commonpb.StateCode_Healthy {
|
||||
log.Warn(ErrDescribeResourceGroupFailed.Error(), zap.Error(ErrNotHealthy))
|
||||
resp.Status = utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrDescribeResourceGroupFailed.Error(), ErrNotHealthy)
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
rg, err := s.meta.ResourceManager.GetResourceGroup(req.GetResourceGroup())
|
||||
if err != nil {
|
||||
resp.Status = utils.WrapStatus(commonpb.ErrorCode_IllegalArgument, ErrDescribeResourceGroupFailed.Error(), err)
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
loadedReplicas := make(map[int64]int32)
|
||||
outgoingNodes := make(map[int64]int32)
|
||||
replicasInRG := s.meta.GetByResourceGroup(req.GetResourceGroup())
|
||||
for _, replica := range replicasInRG {
|
||||
loadedReplicas[replica.GetCollectionID()]++
|
||||
for _, node := range replica.GetNodes() {
|
||||
if !s.meta.ContainsNode(replica.GetResourceGroup(), node) {
|
||||
outgoingNodes[replica.GetCollectionID()]++
|
||||
}
|
||||
}
|
||||
}
|
||||
incomingNodes := make(map[int64]int32)
|
||||
collections := s.meta.GetAll()
|
||||
for _, collection := range collections {
|
||||
replicas := s.meta.GetByCollection(collection)
|
||||
|
||||
for _, replica := range replicas {
|
||||
if replica.GetResourceGroup() == req.GetResourceGroup() {
|
||||
continue
|
||||
}
|
||||
for _, node := range replica.GetNodes() {
|
||||
if s.meta.ContainsNode(req.GetResourceGroup(), node) {
|
||||
incomingNodes[collection]++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resp.ResourceGroup = &querypb.ResourceGroupInfo{
|
||||
Name: req.GetResourceGroup(),
|
||||
Capacity: int32(rg.GetCapacity()),
|
||||
NumAvailableNode: int32(len(rg.GetNodes())),
|
||||
NumLoadedReplica: loadedReplicas,
|
||||
NumOutgoingNode: outgoingNodes,
|
||||
NumIncomingNode: incomingNodes,
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
|
|
@ -128,7 +128,8 @@ func (suite *ServiceSuite) SetupTest() {
|
|||
|
||||
suite.store = meta.NewMetaStore(suite.kv)
|
||||
suite.dist = meta.NewDistributionManager()
|
||||
suite.meta = meta.NewMeta(params.RandomIncrementIDAllocator(), suite.store)
|
||||
suite.nodeMgr = session.NewNodeManager()
|
||||
suite.meta = meta.NewMeta(params.RandomIncrementIDAllocator(), suite.store, suite.nodeMgr)
|
||||
suite.broker = meta.NewMockBroker(suite.T())
|
||||
suite.targetMgr = meta.NewTargetManager(suite.broker, suite.meta)
|
||||
suite.targetObserver = observers.NewTargetObserver(
|
||||
|
@ -137,9 +138,10 @@ func (suite *ServiceSuite) SetupTest() {
|
|||
suite.dist,
|
||||
suite.broker,
|
||||
)
|
||||
suite.nodeMgr = session.NewNodeManager()
|
||||
for _, node := range suite.nodes {
|
||||
suite.nodeMgr.Add(session.NewNodeInfo(node, "localhost"))
|
||||
err := suite.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, node)
|
||||
suite.NoError(err)
|
||||
}
|
||||
suite.cluster = session.NewMockCluster(suite.T())
|
||||
suite.jobScheduler = job.NewScheduler()
|
||||
|
@ -334,6 +336,260 @@ func (suite *ServiceSuite) TestLoadCollection() {
|
|||
suite.Contains(resp.Reason, ErrNotHealthy.Error())
|
||||
}
|
||||
|
||||
func (suite *ServiceSuite) TestResourceGroup() {
|
||||
ctx := context.Background()
|
||||
server := suite.server
|
||||
|
||||
createRG := &milvuspb.CreateResourceGroupRequest{
|
||||
ResourceGroup: "rg1",
|
||||
}
|
||||
|
||||
resp, err := server.CreateResourceGroup(ctx, createRG)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_Success, resp.ErrorCode)
|
||||
|
||||
resp, err = server.CreateResourceGroup(ctx, createRG)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp.ErrorCode)
|
||||
suite.Contains(resp.Reason, ErrCreateResourceGroupFailed.Error())
|
||||
suite.Contains(resp.Reason, meta.ErrRGAlreadyExist.Error())
|
||||
|
||||
listRG := &milvuspb.ListResourceGroupsRequest{}
|
||||
resp1, err := server.ListResourceGroups(ctx, listRG)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_Success, resp1.Status.ErrorCode)
|
||||
suite.Len(resp1.ResourceGroups, 2)
|
||||
|
||||
server.nodeMgr.Add(session.NewNodeInfo(1011, "localhost"))
|
||||
server.nodeMgr.Add(session.NewNodeInfo(1012, "localhost"))
|
||||
server.nodeMgr.Add(session.NewNodeInfo(1013, "localhost"))
|
||||
server.nodeMgr.Add(session.NewNodeInfo(1014, "localhost"))
|
||||
server.meta.ResourceManager.AddResourceGroup("rg11")
|
||||
server.meta.ResourceManager.AssignNode("rg11", 1011)
|
||||
server.meta.ResourceManager.AssignNode("rg11", 1012)
|
||||
server.meta.ResourceManager.AddResourceGroup("rg12")
|
||||
server.meta.ResourceManager.AssignNode("rg12", 1013)
|
||||
server.meta.ResourceManager.AssignNode("rg12", 1014)
|
||||
server.meta.CollectionManager.PutCollection(utils.CreateTestCollection(1, 1))
|
||||
server.meta.CollectionManager.PutCollection(utils.CreateTestCollection(2, 1))
|
||||
server.meta.ReplicaManager.Put(meta.NewReplica(&querypb.Replica{
|
||||
ID: 1,
|
||||
CollectionID: 1,
|
||||
Nodes: []int64{1011, 1013},
|
||||
ResourceGroup: "rg11"},
|
||||
typeutil.NewUniqueSet(1011, 1013)),
|
||||
)
|
||||
server.meta.ReplicaManager.Put(meta.NewReplica(&querypb.Replica{
|
||||
ID: 2,
|
||||
CollectionID: 2,
|
||||
Nodes: []int64{1012, 1014},
|
||||
ResourceGroup: "rg12"},
|
||||
typeutil.NewUniqueSet(1012, 1014)),
|
||||
)
|
||||
|
||||
describeRG := &querypb.DescribeResourceGroupRequest{
|
||||
ResourceGroup: "rg11",
|
||||
}
|
||||
resp2, err := server.DescribeResourceGroup(ctx, describeRG)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_Success, resp2.Status.ErrorCode)
|
||||
suite.Equal("rg11", resp2.GetResourceGroup().GetName())
|
||||
suite.Equal(int32(2), resp2.GetResourceGroup().GetCapacity())
|
||||
suite.Equal(int32(2), resp2.GetResourceGroup().GetNumAvailableNode())
|
||||
suite.Equal(map[int64]int32{1: 1}, resp2.GetResourceGroup().GetNumLoadedReplica())
|
||||
suite.Equal(map[int64]int32{2: 1}, resp2.GetResourceGroup().GetNumIncomingNode())
|
||||
suite.Equal(map[int64]int32{1: 1}, resp2.GetResourceGroup().GetNumOutgoingNode())
|
||||
|
||||
dropRG := &milvuspb.DropResourceGroupRequest{
|
||||
ResourceGroup: "rg1",
|
||||
}
|
||||
|
||||
resp3, err := server.DropResourceGroup(ctx, dropRG)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_Success, resp3.ErrorCode)
|
||||
|
||||
resp4, err := server.ListResourceGroups(ctx, listRG)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_Success, resp4.Status.ErrorCode)
|
||||
suite.Len(resp4.GetResourceGroups(), 3)
|
||||
}
|
||||
|
||||
func (suite *ServiceSuite) TestResourceGroupFailed() {
|
||||
ctx := context.Background()
|
||||
server := suite.server
|
||||
|
||||
// illegal argument
|
||||
describeRG := &querypb.DescribeResourceGroupRequest{
|
||||
ResourceGroup: "rfffff",
|
||||
}
|
||||
resp, err := server.DescribeResourceGroup(ctx, describeRG)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_IllegalArgument, resp.Status.ErrorCode)
|
||||
|
||||
// server unhealthy
|
||||
server.status.Store(commonpb.StateCode_Abnormal)
|
||||
|
||||
createRG := &milvuspb.CreateResourceGroupRequest{
|
||||
ResourceGroup: "rg1",
|
||||
}
|
||||
|
||||
resp1, err := server.CreateResourceGroup(ctx, createRG)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp1.ErrorCode)
|
||||
|
||||
listRG := &milvuspb.ListResourceGroupsRequest{}
|
||||
resp2, err := server.ListResourceGroups(ctx, listRG)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp2.Status.ErrorCode)
|
||||
|
||||
describeRG = &querypb.DescribeResourceGroupRequest{
|
||||
ResourceGroup: "rg1",
|
||||
}
|
||||
resp3, err := server.DescribeResourceGroup(ctx, describeRG)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp3.Status.ErrorCode)
|
||||
|
||||
dropRG := &milvuspb.DropResourceGroupRequest{
|
||||
ResourceGroup: "rg1",
|
||||
}
|
||||
resp4, err := server.DropResourceGroup(ctx, dropRG)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp4.ErrorCode)
|
||||
|
||||
resp5, err := server.ListResourceGroups(ctx, listRG)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp5.Status.ErrorCode)
|
||||
}
|
||||
|
||||
func (suite *ServiceSuite) TestTransferNode() {
|
||||
ctx := context.Background()
|
||||
server := suite.server
|
||||
|
||||
err := server.meta.ResourceManager.AddResourceGroup("rg1")
|
||||
suite.NoError(err)
|
||||
err = server.meta.ResourceManager.AddResourceGroup("rg2")
|
||||
suite.NoError(err)
|
||||
// test transfer node
|
||||
resp, err := server.TransferNode(ctx, &milvuspb.TransferNodeRequest{
|
||||
SourceResourceGroup: meta.DefaultResourceGroupName,
|
||||
TargetResourceGroup: "rg1",
|
||||
})
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_Success, resp.ErrorCode)
|
||||
nodes, err := server.meta.ResourceManager.GetNodes("rg1")
|
||||
suite.NoError(err)
|
||||
suite.Len(nodes, 1)
|
||||
|
||||
// test transfer node meet non-exist source rg
|
||||
resp, err = server.TransferNode(ctx, &milvuspb.TransferNodeRequest{
|
||||
SourceResourceGroup: "rgggg",
|
||||
TargetResourceGroup: meta.DefaultResourceGroupName,
|
||||
})
|
||||
suite.NoError(err)
|
||||
suite.Contains(resp.Reason, meta.ErrRGNotExist.Error())
|
||||
suite.Equal(commonpb.ErrorCode_IllegalArgument, resp.ErrorCode)
|
||||
|
||||
// test transfer node meet non-exist target rg
|
||||
resp, err = server.TransferNode(ctx, &milvuspb.TransferNodeRequest{
|
||||
SourceResourceGroup: meta.DefaultResourceGroupName,
|
||||
TargetResourceGroup: "rgggg",
|
||||
})
|
||||
suite.NoError(err)
|
||||
suite.Contains(resp.Reason, meta.ErrRGNotExist.Error())
|
||||
suite.Equal(commonpb.ErrorCode_IllegalArgument, resp.ErrorCode)
|
||||
|
||||
// server unhealthy
|
||||
server.status.Store(commonpb.StateCode_Abnormal)
|
||||
resp, err = server.TransferNode(ctx, &milvuspb.TransferNodeRequest{
|
||||
SourceResourceGroup: meta.DefaultResourceGroupName,
|
||||
TargetResourceGroup: "rg1",
|
||||
})
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp.ErrorCode)
|
||||
}
|
||||
|
||||
func (suite *ServiceSuite) TestTransferReplica() {
|
||||
ctx := context.Background()
|
||||
server := suite.server
|
||||
|
||||
err := server.meta.ResourceManager.AddResourceGroup("rg1")
|
||||
suite.NoError(err)
|
||||
err = server.meta.ResourceManager.AddResourceGroup("rg2")
|
||||
suite.NoError(err)
|
||||
err = server.meta.ResourceManager.AddResourceGroup("rg3")
|
||||
suite.NoError(err)
|
||||
|
||||
resp, err := suite.server.TransferReplica(ctx, &querypb.TransferReplicaRequest{
|
||||
SourceResourceGroup: meta.DefaultResourceGroupName,
|
||||
TargetResourceGroup: "rg1",
|
||||
CollectionID: 1,
|
||||
NumReplica: 2,
|
||||
})
|
||||
suite.NoError(err)
|
||||
suite.Contains(resp.Reason, "found [0] replicas of collection[1] in source resource group")
|
||||
|
||||
resp, err = suite.server.TransferReplica(ctx, &querypb.TransferReplicaRequest{
|
||||
SourceResourceGroup: "rgg",
|
||||
TargetResourceGroup: meta.DefaultResourceGroupName,
|
||||
CollectionID: 1,
|
||||
NumReplica: 2,
|
||||
})
|
||||
suite.NoError(err)
|
||||
suite.Equal(resp.ErrorCode, commonpb.ErrorCode_IllegalArgument)
|
||||
|
||||
resp, err = suite.server.TransferReplica(ctx, &querypb.TransferReplicaRequest{
|
||||
SourceResourceGroup: meta.DefaultResourceGroupName,
|
||||
TargetResourceGroup: "rgg",
|
||||
CollectionID: 1,
|
||||
NumReplica: 2,
|
||||
})
|
||||
suite.NoError(err)
|
||||
suite.Equal(resp.ErrorCode, commonpb.ErrorCode_IllegalArgument)
|
||||
|
||||
suite.server.meta.Put(meta.NewReplica(&querypb.Replica{
|
||||
CollectionID: 1,
|
||||
ID: 111,
|
||||
ResourceGroup: meta.DefaultResourceGroupName,
|
||||
}, typeutil.NewUniqueSet(1)))
|
||||
suite.server.meta.Put(meta.NewReplica(&querypb.Replica{
|
||||
CollectionID: 1,
|
||||
ID: 222,
|
||||
ResourceGroup: meta.DefaultResourceGroupName,
|
||||
}, typeutil.NewUniqueSet(2)))
|
||||
|
||||
suite.server.nodeMgr.Add(session.NewNodeInfo(1001, "localhost"))
|
||||
suite.server.nodeMgr.Add(session.NewNodeInfo(1002, "localhost"))
|
||||
suite.server.nodeMgr.Add(session.NewNodeInfo(1003, "localhost"))
|
||||
suite.server.nodeMgr.Add(session.NewNodeInfo(1004, "localhost"))
|
||||
suite.server.meta.AssignNode("rg1", 1001)
|
||||
suite.server.meta.AssignNode("rg2", 1002)
|
||||
suite.server.meta.AssignNode("rg3", 1003)
|
||||
suite.server.meta.AssignNode("rg3", 1004)
|
||||
|
||||
resp, err = suite.server.TransferReplica(ctx, &querypb.TransferReplicaRequest{
|
||||
SourceResourceGroup: meta.DefaultResourceGroupName,
|
||||
TargetResourceGroup: "rg3",
|
||||
CollectionID: 1,
|
||||
NumReplica: 2,
|
||||
})
|
||||
|
||||
suite.NoError(err)
|
||||
suite.Equal(resp.ErrorCode, commonpb.ErrorCode_Success)
|
||||
suite.Len(suite.server.meta.GetByResourceGroup("rg3"), 2)
|
||||
|
||||
// server unhealthy
|
||||
server.status.Store(commonpb.StateCode_Abnormal)
|
||||
resp, err = suite.server.TransferReplica(ctx, &querypb.TransferReplicaRequest{
|
||||
SourceResourceGroup: meta.DefaultResourceGroupName,
|
||||
TargetResourceGroup: "rg3",
|
||||
CollectionID: 1,
|
||||
NumReplica: 2,
|
||||
})
|
||||
|
||||
suite.NoError(err)
|
||||
suite.Equal(resp.ErrorCode, commonpb.ErrorCode_UnexpectedError)
|
||||
}
|
||||
|
||||
func (suite *ServiceSuite) TestLoadCollectionFailed() {
|
||||
suite.loadAll()
|
||||
ctx := context.Background()
|
||||
|
@ -365,6 +621,19 @@ func (suite *ServiceSuite) TestLoadCollectionFailed() {
|
|||
suite.Equal(commonpb.ErrorCode_IllegalArgument, resp.ErrorCode)
|
||||
suite.Contains(resp.Reason, job.ErrLoadParameterMismatched.Error())
|
||||
}
|
||||
|
||||
// Test load with wrong rg num
|
||||
for _, collection := range suite.collections {
|
||||
req := &querypb.LoadCollectionRequest{
|
||||
CollectionID: collection,
|
||||
ReplicaNumber: suite.replicaNumber[collection] + 1,
|
||||
ResourceGroups: []string{"rg1", "rg2"},
|
||||
}
|
||||
resp, err := server.LoadCollection(ctx, req)
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.ErrorCode_IllegalArgument, resp.ErrorCode)
|
||||
suite.Contains(resp.Reason, ErrLoadUseWrongRG.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *ServiceSuite) TestLoadPartition() {
|
||||
|
@ -756,8 +1025,9 @@ func (suite *ServiceSuite) TestLoadBalance() {
|
|||
// Test get balance first segment
|
||||
for _, collection := range suite.collections {
|
||||
replicas := suite.meta.ReplicaManager.GetByCollection(collection)
|
||||
srcNode := replicas[0].GetNodes()[0]
|
||||
dstNode := replicas[0].GetNodes()[1]
|
||||
nodes := replicas[0].GetNodes()
|
||||
srcNode := nodes[0]
|
||||
dstNode := nodes[1]
|
||||
suite.updateCollectionStatus(collection, querypb.LoadStatus_Loaded)
|
||||
suite.updateSegmentDist(collection, srcNode)
|
||||
segments := suite.getAllSegments(collection)
|
||||
|
@ -883,8 +1153,9 @@ func (suite *ServiceSuite) TestLoadBalanceFailed() {
|
|||
// Test load balance with not fully loaded
|
||||
for _, collection := range suite.collections {
|
||||
replicas := suite.meta.ReplicaManager.GetByCollection(collection)
|
||||
srcNode := replicas[0].GetNodes()[0]
|
||||
dstNode := replicas[0].GetNodes()[1]
|
||||
nodes := replicas[0].GetNodes()
|
||||
srcNode := nodes[0]
|
||||
dstNode := nodes[1]
|
||||
suite.updateCollectionStatus(collection, querypb.LoadStatus_Loading)
|
||||
segments := suite.getAllSegments(collection)
|
||||
req := &querypb.LoadBalanceRequest{
|
||||
|
@ -926,8 +1197,9 @@ func (suite *ServiceSuite) TestLoadBalanceFailed() {
|
|||
// Test balance task failed
|
||||
for _, collection := range suite.collections {
|
||||
replicas := suite.meta.ReplicaManager.GetByCollection(collection)
|
||||
srcNode := replicas[0].GetNodes()[0]
|
||||
dstNode := replicas[0].GetNodes()[1]
|
||||
nodes := replicas[0].GetNodes()
|
||||
srcNode := nodes[0]
|
||||
dstNode := nodes[1]
|
||||
suite.updateCollectionStatus(collection, querypb.LoadStatus_Loaded)
|
||||
suite.updateSegmentDist(collection, srcNode)
|
||||
segments := suite.getAllSegments(collection)
|
||||
|
@ -1163,6 +1435,11 @@ func (suite *ServiceSuite) TestGetShardLeadersFailed() {
|
|||
}
|
||||
|
||||
// Segment not fully loaded
|
||||
for _, node := range suite.nodes {
|
||||
suite.dist.SegmentDistManager.Update(node)
|
||||
suite.dist.ChannelDistManager.Update(node)
|
||||
suite.dist.LeaderViewManager.Update(node)
|
||||
}
|
||||
suite.updateChannelDistWithoutSegment(collection)
|
||||
resp, err = server.GetShardLeaders(ctx, req)
|
||||
suite.NoError(err)
|
||||
|
|
|
@ -130,7 +130,7 @@ func (suite *TaskSuite) SetupTest() {
|
|||
|
||||
suite.kv = etcdkv.NewEtcdKV(cli, config.MetaRootPath)
|
||||
suite.store = meta.NewMetaStore(suite.kv)
|
||||
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), suite.store)
|
||||
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), suite.store, session.NewNodeManager())
|
||||
suite.dist = meta.NewDistributionManager()
|
||||
suite.broker = meta.NewMockBroker(suite.T())
|
||||
suite.target = meta.NewTargetManager(suite.broker, suite.meta)
|
||||
|
@ -1258,14 +1258,14 @@ func (suite *TaskSuite) newScheduler() *taskScheduler {
|
|||
}
|
||||
|
||||
func createReplica(collection int64, nodes ...int64) *meta.Replica {
|
||||
return &meta.Replica{
|
||||
Replica: &querypb.Replica{
|
||||
return meta.NewReplica(
|
||||
&querypb.Replica{
|
||||
ID: rand.Int63()/2 + 1,
|
||||
CollectionID: collection,
|
||||
Nodes: nodes,
|
||||
},
|
||||
Nodes: typeutil.NewUniqueSet(nodes...),
|
||||
}
|
||||
typeutil.NewUniqueSet(nodes...),
|
||||
)
|
||||
}
|
||||
|
||||
func TestTask(t *testing.T) {
|
||||
|
|
|
@ -18,12 +18,22 @@ package utils
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/log"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/samber/lo"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrGetNodesFromRG = errors.New("failed to get node from rg")
|
||||
ErrNoReplicaFound = errors.New("no replica found during assign nodes")
|
||||
ErrReplicasInconsistent = errors.New("all replicas should belong to same collection during assign nodes")
|
||||
ErrUseWrongNumRG = errors.New("resource num can only be 0, 1 or same as replica number")
|
||||
)
|
||||
|
||||
func GetReplicaNodesInfo(replicaMgr *meta.ReplicaManager, nodeMgr *session.NodeManager, replicaID int64) []*session.NodeInfo {
|
||||
|
@ -32,8 +42,8 @@ func GetReplicaNodesInfo(replicaMgr *meta.ReplicaManager, nodeMgr *session.NodeM
|
|||
return nil
|
||||
}
|
||||
|
||||
nodes := make([]*session.NodeInfo, 0, len(replica.Nodes))
|
||||
for node := range replica.Nodes {
|
||||
nodes := make([]*session.NodeInfo, 0, len(replica.GetNodes()))
|
||||
for _, node := range replica.GetNodes() {
|
||||
nodes = append(nodes, nodeMgr.Get(node))
|
||||
}
|
||||
return nodes
|
||||
|
@ -64,7 +74,7 @@ func GroupNodesByReplica(replicaMgr *meta.ReplicaManager, collectionID int64, no
|
|||
replicas := replicaMgr.GetByCollection(collectionID)
|
||||
for _, replica := range replicas {
|
||||
for _, node := range nodes {
|
||||
if replica.Nodes.Contain(node) {
|
||||
if replica.Contains(node) {
|
||||
ret[replica.ID] = append(ret[replica.ID], node)
|
||||
}
|
||||
}
|
||||
|
@ -90,7 +100,7 @@ func GroupSegmentsByReplica(replicaMgr *meta.ReplicaManager, collectionID int64,
|
|||
replicas := replicaMgr.GetByCollection(collectionID)
|
||||
for _, replica := range replicas {
|
||||
for _, segment := range segments {
|
||||
if replica.Nodes.Contain(segment.Node) {
|
||||
if replica.Contains(segment.Node) {
|
||||
ret[replica.ID] = append(ret[replica.ID], segment)
|
||||
}
|
||||
}
|
||||
|
@ -101,24 +111,92 @@ func GroupSegmentsByReplica(replicaMgr *meta.ReplicaManager, collectionID int64,
|
|||
// AssignNodesToReplicas assigns nodes to the given replicas,
|
||||
// all given replicas must be the same collection,
|
||||
// the given replicas have to be not in ReplicaManager
|
||||
func AssignNodesToReplicas(nodeMgr *session.NodeManager, replicas ...*meta.Replica) {
|
||||
replicaNumber := len(replicas)
|
||||
nodes := nodeMgr.GetAll()
|
||||
rand.Shuffle(len(nodes), func(i, j int) {
|
||||
nodes[i], nodes[j] = nodes[j], nodes[i]
|
||||
func AssignNodesToReplicas(m *meta.Meta, rgName string, replicas ...*meta.Replica) error {
|
||||
replicaIDs := lo.Map(replicas, func(r *meta.Replica, _ int) int64 { return r.GetID() })
|
||||
log := log.With(zap.Int64("collectionID", replicas[0].GetCollectionID()),
|
||||
zap.Int64s("replicas", replicaIDs),
|
||||
zap.String("rgName", rgName),
|
||||
)
|
||||
if len(replicaIDs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
nodeGroup, err := m.ResourceManager.GetNodes(rgName)
|
||||
if err != nil {
|
||||
log.Error("failed to get nodes", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
if len(nodeGroup) < len(replicaIDs) {
|
||||
log.Error(meta.ErrNodeNotEnough.Error())
|
||||
return meta.ErrNodeNotEnough
|
||||
}
|
||||
|
||||
rand.Shuffle(len(nodeGroup), func(i, j int) {
|
||||
nodeGroup[i], nodeGroup[j] = nodeGroup[j], nodeGroup[i]
|
||||
})
|
||||
|
||||
for i, node := range nodes {
|
||||
replicas[i%replicaNumber].AddNode(node.ID())
|
||||
log.Info("assign nodes to replicas",
|
||||
zap.Int64s("nodes", nodeGroup),
|
||||
)
|
||||
for i, node := range nodeGroup {
|
||||
replicas[i%len(replicas)].AddNode(node)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SpawnReplicas spawns replicas for given collection, assign nodes to them, and save them
|
||||
func SpawnReplicas(replicaMgr *meta.ReplicaManager, nodeMgr *session.NodeManager, collection int64, replicaNumber int32) ([]*meta.Replica, error) {
|
||||
replicas, err := replicaMgr.Spawn(collection, replicaNumber)
|
||||
func SpawnAllReplicasInRG(m *meta.Meta, collection int64, replicaNumber int32, rgName string) ([]*meta.Replica, error) {
|
||||
replicas, err := m.ReplicaManager.Spawn(collection, replicaNumber, rgName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
AssignNodesToReplicas(nodeMgr, replicas...)
|
||||
return replicas, replicaMgr.Put(replicas...)
|
||||
err = AssignNodesToReplicas(m, rgName, replicas...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return replicas, m.ReplicaManager.Put(replicas...)
|
||||
}
|
||||
|
||||
func checkResourceGroup(collectionID int64, replicaNumber int32, resourceGroups []string) error {
|
||||
if len(resourceGroups) != 0 && len(resourceGroups) != 1 && len(resourceGroups) != int(replicaNumber) {
|
||||
return ErrUseWrongNumRG
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func SpawnReplicasWithRG(m *meta.Meta, collection int64, resourceGroups []string, replicaNumber int32) ([]*meta.Replica, error) {
|
||||
if err := checkResourceGroup(collection, replicaNumber, resourceGroups); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(resourceGroups) == 0 {
|
||||
return SpawnAllReplicasInRG(m, collection, replicaNumber, meta.DefaultResourceGroupName)
|
||||
}
|
||||
|
||||
if len(resourceGroups) == 1 {
|
||||
return SpawnAllReplicasInRG(m, collection, replicaNumber, resourceGroups[0])
|
||||
}
|
||||
|
||||
replicaSet := make([]*meta.Replica, 0)
|
||||
for _, rgName := range resourceGroups {
|
||||
if !m.ResourceManager.ContainResourceGroup(rgName) {
|
||||
return nil, meta.ErrRGNotExist
|
||||
}
|
||||
|
||||
replicas, err := m.ReplicaManager.Spawn(collection, 1, rgName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = AssignNodesToReplicas(m, rgName, replicas...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
replicaSet = append(replicaSet, replicas...)
|
||||
}
|
||||
|
||||
return replicaSet, m.ReplicaManager.Put(replicaSet...)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package utils
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
etcdKV "github.com/milvus-io/milvus/internal/kv/etcd"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/util/etcd"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestSpawnReplicasWithRG(t *testing.T) {
|
||||
Params.Init()
|
||||
config := GenerateEtcdConfig()
|
||||
cli, err := etcd.GetEtcdClient(
|
||||
config.UseEmbedEtcd,
|
||||
config.EtcdUseSSL,
|
||||
config.Endpoints,
|
||||
config.EtcdTLSCert,
|
||||
config.EtcdTLSKey,
|
||||
config.EtcdTLSCACert,
|
||||
config.EtcdTLSMinVersion)
|
||||
assert.NoError(t, err)
|
||||
kv := etcdKV.NewEtcdKV(cli, config.MetaRootPath)
|
||||
|
||||
store := meta.NewMetaStore(kv)
|
||||
nodeMgr := session.NewNodeManager()
|
||||
m := meta.NewMeta(RandomIncrementIDAllocator(), store, nodeMgr)
|
||||
m.ResourceManager.AddResourceGroup("rg1")
|
||||
m.ResourceManager.AddResourceGroup("rg2")
|
||||
m.ResourceManager.AddResourceGroup("rg3")
|
||||
|
||||
for i := 1; i < 10; i++ {
|
||||
nodeMgr.Add(session.NewNodeInfo(int64(i), "localhost"))
|
||||
|
||||
if i%3 == 0 {
|
||||
m.ResourceManager.AssignNode("rg1", int64(i))
|
||||
}
|
||||
if i%3 == 1 {
|
||||
m.ResourceManager.AssignNode("rg2", int64(i))
|
||||
}
|
||||
if i%3 == 2 {
|
||||
m.ResourceManager.AssignNode("rg3", int64(i))
|
||||
}
|
||||
}
|
||||
|
||||
type args struct {
|
||||
m *meta.Meta
|
||||
collection int64
|
||||
resourceGroups []string
|
||||
replicaNumber int32
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
wantReplicaNum int
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "test 3 replica on 1 rg",
|
||||
args: args{m, 1000, []string{"rg1"}, 3},
|
||||
wantReplicaNum: 3,
|
||||
wantErr: false,
|
||||
},
|
||||
|
||||
{
|
||||
name: "test 3 replica on 2 rg",
|
||||
args: args{m, 1000, []string{"rg1", "rg2"}, 3},
|
||||
wantReplicaNum: 0,
|
||||
wantErr: true,
|
||||
},
|
||||
|
||||
{
|
||||
name: "test 3 replica on 3 rg",
|
||||
args: args{m, 1000, []string{"rg1", "rg2", "rg3"}, 3},
|
||||
wantReplicaNum: 3,
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := SpawnReplicasWithRG(tt.args.m, tt.args.collection, tt.args.resourceGroups, tt.args.replicaNumber)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("SpawnReplicasWithRG() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
|
||||
if len(got) != tt.wantReplicaNum {
|
||||
t.Errorf("SpawnReplicasWithRG() = %v, want %d replicas", got, tt.args.replicaNumber)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -52,14 +52,15 @@ func CreateTestChannel(collection, node, version int64, channel string) *meta.Dm
|
|||
}
|
||||
|
||||
func CreateTestReplica(id, collectionID int64, nodes []int64) *meta.Replica {
|
||||
return &meta.Replica{
|
||||
Replica: &querypb.Replica{
|
||||
ID: id,
|
||||
CollectionID: collectionID,
|
||||
Nodes: nodes,
|
||||
return meta.NewReplica(
|
||||
&querypb.Replica{
|
||||
ID: id,
|
||||
CollectionID: collectionID,
|
||||
Nodes: nodes,
|
||||
ResourceGroup: meta.DefaultResourceGroupName,
|
||||
},
|
||||
Nodes: typeutil.NewUniqueSet(nodes...),
|
||||
}
|
||||
typeutil.NewUniqueSet(nodes...),
|
||||
)
|
||||
}
|
||||
|
||||
func CreateTestCollection(collection int64, replica int32) *meta.Collection {
|
||||
|
|
|
@ -20,7 +20,6 @@ import (
|
|||
"fmt"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/milvuspb"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
||||
|
@ -148,11 +147,3 @@ func MergeDmChannelInfo(infos []*datapb.VchannelInfo) *meta.DmChannel {
|
|||
|
||||
return dmChannel
|
||||
}
|
||||
|
||||
func Replica2ReplicaInfo(replica *querypb.Replica) *milvuspb.ReplicaInfo {
|
||||
return &milvuspb.ReplicaInfo{
|
||||
ReplicaID: replica.GetID(),
|
||||
CollectionID: replica.GetCollectionID(),
|
||||
NodeIds: replica.GetNodes(),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1308,6 +1308,13 @@ type ProxyComponent interface {
|
|||
SelectGrant(ctx context.Context, req *milvuspb.SelectGrantRequest) (*milvuspb.SelectGrantResponse, error)
|
||||
|
||||
CheckHealth(ctx context.Context, req *milvuspb.CheckHealthRequest) (*milvuspb.CheckHealthResponse, error)
|
||||
|
||||
CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error)
|
||||
DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error)
|
||||
TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error)
|
||||
TransferReplica(ctx context.Context, req *milvuspb.TransferReplicaRequest) (*commonpb.Status, error)
|
||||
ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error)
|
||||
DescribeResourceGroup(ctx context.Context, req *milvuspb.DescribeResourceGroupRequest) (*milvuspb.DescribeResourceGroupResponse, error)
|
||||
}
|
||||
|
||||
// QueryNode is the interface `querynode` package implements
|
||||
|
@ -1377,6 +1384,13 @@ type QueryCoord interface {
|
|||
GetShardLeaders(ctx context.Context, req *querypb.GetShardLeadersRequest) (*querypb.GetShardLeadersResponse, error)
|
||||
|
||||
CheckHealth(ctx context.Context, req *milvuspb.CheckHealthRequest) (*milvuspb.CheckHealthResponse, error)
|
||||
|
||||
CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error)
|
||||
DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error)
|
||||
TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error)
|
||||
TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error)
|
||||
ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error)
|
||||
DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error)
|
||||
}
|
||||
|
||||
// QueryCoordComponent is used by grpc server of QueryCoord
|
||||
|
|
|
@ -101,3 +101,27 @@ func (m *GrpcQueryCoordClient) GetReplicas(ctx context.Context, in *milvuspb.Get
|
|||
func (m *GrpcQueryCoordClient) GetShardLeaders(ctx context.Context, in *querypb.GetShardLeadersRequest, opts ...grpc.CallOption) (*querypb.GetShardLeadersResponse, error) {
|
||||
return &querypb.GetShardLeadersResponse{}, m.Err
|
||||
}
|
||||
|
||||
func (m *GrpcQueryCoordClient) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
|
||||
return &commonpb.Status{}, m.Err
|
||||
}
|
||||
|
||||
func (m *GrpcQueryCoordClient) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
|
||||
return &commonpb.Status{}, m.Err
|
||||
}
|
||||
|
||||
func (m *GrpcQueryCoordClient) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
|
||||
return &commonpb.Status{}, m.Err
|
||||
}
|
||||
|
||||
func (m *GrpcQueryCoordClient) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
|
||||
return &commonpb.Status{}, m.Err
|
||||
}
|
||||
|
||||
func (m *GrpcQueryCoordClient) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest, opts ...grpc.CallOption) (*milvuspb.ListResourceGroupsResponse, error) {
|
||||
return &milvuspb.ListResourceGroupsResponse{}, m.Err
|
||||
}
|
||||
|
||||
func (m *GrpcQueryCoordClient) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest, opts ...grpc.CallOption) (*querypb.DescribeResourceGroupResponse, error) {
|
||||
return &querypb.DescribeResourceGroupResponse{}, m.Err
|
||||
}
|
||||
|
|
|
@ -753,8 +753,11 @@ type queryCoordConfig struct {
|
|||
CheckHandoffInterval time.Duration
|
||||
EnableActiveStandby bool
|
||||
|
||||
NextTargetSurviveTime time.Duration
|
||||
UpdateNextTargetInterval time.Duration
|
||||
NextTargetSurviveTime time.Duration
|
||||
UpdateNextTargetInterval time.Duration
|
||||
CheckNodeInReplicaInterval time.Duration
|
||||
CheckResourceGroupInterval time.Duration
|
||||
EnableRGAutoRecover bool
|
||||
}
|
||||
|
||||
func (p *queryCoordConfig) init(base *BaseTable) {
|
||||
|
@ -784,6 +787,9 @@ func (p *queryCoordConfig) init(base *BaseTable) {
|
|||
p.initEnableActiveStandby()
|
||||
p.initNextTargetSurviveTime()
|
||||
p.initUpdateNextTargetInterval()
|
||||
p.initCheckNodeInReplicaInterval()
|
||||
p.initCheckResourceGroupInterval()
|
||||
p.initEnableRGAutoRecover()
|
||||
}
|
||||
|
||||
func (p *queryCoordConfig) initTaskRetryNum() {
|
||||
|
@ -937,6 +943,28 @@ func (p *queryCoordConfig) initUpdateNextTargetInterval() {
|
|||
p.UpdateNextTargetInterval = time.Duration(updateNextTargetInterval) * time.Second
|
||||
}
|
||||
|
||||
func (p *queryCoordConfig) initCheckNodeInReplicaInterval() {
|
||||
interval := p.Base.LoadWithDefault("queryCoord.checkNodeInReplicaInterval", "60")
|
||||
checkNodeInReplicaInterval, err := strconv.ParseInt(interval, 10, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
p.CheckNodeInReplicaInterval = time.Duration(checkNodeInReplicaInterval) * time.Second
|
||||
}
|
||||
|
||||
func (p *queryCoordConfig) initCheckResourceGroupInterval() {
|
||||
interval := p.Base.LoadWithDefault("queryCoord.checkResourceGroupInterval", "60")
|
||||
checkResourceGroupInterval, err := strconv.ParseInt(interval, 10, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
p.CheckResourceGroupInterval = time.Duration(checkResourceGroupInterval) * time.Second
|
||||
}
|
||||
|
||||
func (p *queryCoordConfig) initEnableRGAutoRecover() {
|
||||
p.EnableRGAutoRecover = p.Base.ParseBool("queryCoord.enableRGAutoRecover", true)
|
||||
}
|
||||
|
||||
// /////////////////////////////////////////////////////////////////////////////
|
||||
// --- querynode ---
|
||||
type queryNodeConfig struct {
|
||||
|
|
|
@ -250,6 +250,12 @@ func TestComponentParam(t *testing.T) {
|
|||
Params := CParams.QueryCoordCfg
|
||||
assert.Equal(t, Params.EnableActiveStandby, false)
|
||||
t.Logf("queryCoord EnableActiveStandby = %t", Params.EnableActiveStandby)
|
||||
|
||||
assert.Equal(t, float64(300), Params.NextTargetSurviveTime.Seconds())
|
||||
assert.Equal(t, float64(30), Params.UpdateNextTargetInterval.Seconds())
|
||||
assert.Equal(t, float64(60), Params.CheckNodeInReplicaInterval.Seconds())
|
||||
assert.Equal(t, float64(60), Params.CheckResourceGroupInterval.Seconds())
|
||||
assert.Equal(t, true, Params.EnableRGAutoRecover)
|
||||
})
|
||||
|
||||
t.Run("test queryNodeConfig", func(t *testing.T) {
|
||||
|
|
Loading…
Reference in New Issue