resource group impl (#21609)

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
pull/21863/head
wei liu 2023-01-30 10:19:48 +08:00 committed by GitHub
parent 66027790a2
commit 73c44d4b29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
63 changed files with 5547 additions and 514 deletions

2
go.mod
View File

@ -27,7 +27,7 @@ require (
github.com/klauspost/compress v1.14.4
github.com/lingdor/stackerror v0.0.0-20191119040541-976d8885ed76
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230112125535-5f87a812202c
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230129073344-87a125853a0b
github.com/minio/minio-go/v7 v7.0.17
github.com/panjf2000/ants/v2 v2.4.8
github.com/pkg/errors v0.9.1

4
go.sum
View File

@ -491,8 +491,8 @@ github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyex
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8=
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230112125535-5f87a812202c h1:74uRPm5WWagMe8bItOQ8QFuXcrUIWuWGAQ1GrwVM4J4=
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230112125535-5f87a812202c/go.mod h1:148qnlmZ0Fdm1Fq+Mj/OW2uDoEP25g3mjh0vMGtkgmk=
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230129073344-87a125853a0b h1:HoJ3J70COnaR3WQTA4gN70DkiaMRPkyLI6yXrPqpFiU=
github.com/milvus-io/milvus-proto/go-api v0.0.0-20230129073344-87a125853a0b/go.mod h1:148qnlmZ0Fdm1Fq+Mj/OW2uDoEP25g3mjh0vMGtkgmk=
github.com/milvus-io/pulsar-client-go v0.6.10 h1:eqpJjU+/QX0iIhEo3nhOqMNXL+TyInAs1IAHZCrCM/A=
github.com/milvus-io/pulsar-client-go v0.6.10/go.mod h1:lQqCkgwDF8YFYjKA+zOheTk1tev2B+bKj5j7+nm8M1w=
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=

View File

@ -372,7 +372,7 @@ const char descriptor_table_protodef_common_2eproto[] PROTOBUF_SECTION_VARIABLE(
"\n\n\006Sealed\020\003\022\013\n\007Flushed\020\004\022\014\n\010Flushing\020\005\022\013"
"\n\007Dropped\020\006\022\r\n\tImporting\020\007*>\n\017Placeholde"
"rType\022\010\n\004None\020\000\022\020\n\014BinaryVector\020d\022\017\n\013Flo"
"atVector\020e*\277\016\n\007MsgType\022\r\n\tUndefined\020\000\022\024\n"
"atVector\020e*\300\016\n\007MsgType\022\r\n\tUndefined\020\000\022\024\n"
"\020CreateCollection\020d\022\022\n\016DropCollection\020e\022"
"\021\n\rHasCollection\020f\022\026\n\022DescribeCollection"
"\020g\022\023\n\017ShowCollections\020h\022\024\n\020GetSystemConf"
@ -416,53 +416,53 @@ const char descriptor_table_protodef_common_2eproto[] PROTOBUF_SECTION_VARIABLE(
"\n\020OperatePrivilege\020\306\014\022\020\n\013SelectGrant\020\307\014\022"
"\033\n\026RefreshPolicyInfoCache\020\310\014\022\017\n\nListPoli"
"cy\020\311\014\022\030\n\023CreateResourceGroup\020\244\r\022\026\n\021DropR"
"esourceGroup\020\245\r\022\026\n\021ListResourceGroup\020\246\r\022"
"\032\n\025DescribeResourceGroup\020\247\r\022\021\n\014TransferN"
"ode\020\250\r\022\024\n\017TransferReplica\020\251\r*\"\n\007DslType\022"
"\007\n\003Dsl\020\000\022\016\n\nBoolExprV1\020\001*B\n\017CompactionSt"
"ate\022\021\n\rUndefiedState\020\000\022\r\n\tExecuting\020\001\022\r\n"
"\tCompleted\020\002*X\n\020ConsistencyLevel\022\n\n\006Stro"
"ng\020\000\022\013\n\007Session\020\001\022\013\n\007Bounded\020\002\022\016\n\nEventu"
"ally\020\003\022\016\n\nCustomized\020\004*\236\001\n\013ImportState\022\021"
"\n\rImportPending\020\000\022\020\n\014ImportFailed\020\001\022\021\n\rI"
"mportStarted\020\002\022\023\n\017ImportPersisted\020\005\022\021\n\rI"
"mportFlushed\020\010\022\023\n\017ImportCompleted\020\006\022\032\n\026I"
"mportFailedAndCleaned\020\007*2\n\nObjectType\022\016\n"
"\nCollection\020\000\022\n\n\006Global\020\001\022\010\n\004User\020\002*\233\005\n\017"
"ObjectPrivilege\022\020\n\014PrivilegeAll\020\000\022\035\n\031Pri"
"vilegeCreateCollection\020\001\022\033\n\027PrivilegeDro"
"pCollection\020\002\022\037\n\033PrivilegeDescribeCollec"
"tion\020\003\022\034\n\030PrivilegeShowCollections\020\004\022\021\n\r"
"PrivilegeLoad\020\005\022\024\n\020PrivilegeRelease\020\006\022\027\n"
"\023PrivilegeCompaction\020\007\022\023\n\017PrivilegeInser"
"t\020\010\022\023\n\017PrivilegeDelete\020\t\022\032\n\026PrivilegeGet"
"Statistics\020\n\022\030\n\024PrivilegeCreateIndex\020\013\022\030"
"\n\024PrivilegeIndexDetail\020\014\022\026\n\022PrivilegeDro"
"pIndex\020\r\022\023\n\017PrivilegeSearch\020\016\022\022\n\016Privile"
"geFlush\020\017\022\022\n\016PrivilegeQuery\020\020\022\030\n\024Privile"
"geLoadBalance\020\021\022\023\n\017PrivilegeImport\020\022\022\034\n\030"
"PrivilegeCreateOwnership\020\023\022\027\n\023PrivilegeU"
"pdateUser\020\024\022\032\n\026PrivilegeDropOwnership\020\025\022"
"\034\n\030PrivilegeSelectOwnership\020\026\022\034\n\030Privile"
"geManageOwnership\020\027\022\027\n\023PrivilegeSelectUs"
"er\020\030\022\023\n\017PrivilegeUpsert\020\031*S\n\tStateCode\022\020"
"\n\014Initializing\020\000\022\013\n\007Healthy\020\001\022\014\n\010Abnorma"
"l\020\002\022\013\n\007StandBy\020\003\022\014\n\010Stopping\020\004*c\n\tLoadSt"
"ate\022\025\n\021LoadStateNotExist\020\000\022\024\n\020LoadStateN"
"otLoad\020\001\022\024\n\020LoadStateLoading\020\002\022\023\n\017LoadSt"
"ateLoaded\020\003:^\n\021privilege_ext_obj\022\037.googl"
"e.protobuf.MessageOptions\030\351\007 \001(\0132!.milvu"
"s.proto.common.PrivilegeExtBf\n\016io.milvus"
".grpcB\013CommonProtoP\001Z1github.com/milvus-"
"io/milvus-proto/go-api/commonpb\240\001\001\252\002\016IO."
"Milvus.Grpcb\006proto3"
"esourceGroup\020\245\r\022\027\n\022ListResourceGroups\020\246\r"
"\022\032\n\025DescribeResourceGroup\020\247\r\022\021\n\014Transfer"
"Node\020\250\r\022\024\n\017TransferReplica\020\251\r*\"\n\007DslType"
"\022\007\n\003Dsl\020\000\022\016\n\nBoolExprV1\020\001*B\n\017CompactionS"
"tate\022\021\n\rUndefiedState\020\000\022\r\n\tExecuting\020\001\022\r"
"\n\tCompleted\020\002*X\n\020ConsistencyLevel\022\n\n\006Str"
"ong\020\000\022\013\n\007Session\020\001\022\013\n\007Bounded\020\002\022\016\n\nEvent"
"ually\020\003\022\016\n\nCustomized\020\004*\236\001\n\013ImportState\022"
"\021\n\rImportPending\020\000\022\020\n\014ImportFailed\020\001\022\021\n\r"
"ImportStarted\020\002\022\023\n\017ImportPersisted\020\005\022\021\n\r"
"ImportFlushed\020\010\022\023\n\017ImportCompleted\020\006\022\032\n\026"
"ImportFailedAndCleaned\020\007*2\n\nObjectType\022\016"
"\n\nCollection\020\000\022\n\n\006Global\020\001\022\010\n\004User\020\002*\233\005\n"
"\017ObjectPrivilege\022\020\n\014PrivilegeAll\020\000\022\035\n\031Pr"
"ivilegeCreateCollection\020\001\022\033\n\027PrivilegeDr"
"opCollection\020\002\022\037\n\033PrivilegeDescribeColle"
"ction\020\003\022\034\n\030PrivilegeShowCollections\020\004\022\021\n"
"\rPrivilegeLoad\020\005\022\024\n\020PrivilegeRelease\020\006\022\027"
"\n\023PrivilegeCompaction\020\007\022\023\n\017PrivilegeInse"
"rt\020\010\022\023\n\017PrivilegeDelete\020\t\022\032\n\026PrivilegeGe"
"tStatistics\020\n\022\030\n\024PrivilegeCreateIndex\020\013\022"
"\030\n\024PrivilegeIndexDetail\020\014\022\026\n\022PrivilegeDr"
"opIndex\020\r\022\023\n\017PrivilegeSearch\020\016\022\022\n\016Privil"
"egeFlush\020\017\022\022\n\016PrivilegeQuery\020\020\022\030\n\024Privil"
"egeLoadBalance\020\021\022\023\n\017PrivilegeImport\020\022\022\034\n"
"\030PrivilegeCreateOwnership\020\023\022\027\n\023Privilege"
"UpdateUser\020\024\022\032\n\026PrivilegeDropOwnership\020\025"
"\022\034\n\030PrivilegeSelectOwnership\020\026\022\034\n\030Privil"
"egeManageOwnership\020\027\022\027\n\023PrivilegeSelectU"
"ser\020\030\022\023\n\017PrivilegeUpsert\020\031*S\n\tStateCode\022"
"\020\n\014Initializing\020\000\022\013\n\007Healthy\020\001\022\014\n\010Abnorm"
"al\020\002\022\013\n\007StandBy\020\003\022\014\n\010Stopping\020\004*c\n\tLoadS"
"tate\022\025\n\021LoadStateNotExist\020\000\022\024\n\020LoadState"
"NotLoad\020\001\022\024\n\020LoadStateLoading\020\002\022\023\n\017LoadS"
"tateLoaded\020\003:^\n\021privilege_ext_obj\022\037.goog"
"le.protobuf.MessageOptions\030\351\007 \001(\0132!.milv"
"us.proto.common.PrivilegeExtBf\n\016io.milvu"
"s.grpcB\013CommonProtoP\001Z1github.com/milvus"
"-io/milvus-proto/go-api/commonpb\240\001\001\252\002\016IO"
".Milvus.Grpcb\006proto3"
;
static const ::_pbi::DescriptorTable* const descriptor_table_common_2eproto_deps[1] = {
&::descriptor_table_google_2fprotobuf_2fdescriptor_2eproto,
};
static ::_pbi::once_flag descriptor_table_common_2eproto_once;
const ::_pbi::DescriptorTable descriptor_table_common_2eproto = {
false, false, 5859, descriptor_table_protodef_common_2eproto,
false, false, 5860, descriptor_table_protodef_common_2eproto,
"common.proto",
&descriptor_table_common_2eproto_once, descriptor_table_common_2eproto_deps, 1, 11,
schemas, file_default_instances, TableStruct_common_2eproto::offsets,

View File

@ -354,7 +354,7 @@ enum MsgType : int {
ListPolicy = 1609,
CreateResourceGroup = 1700,
DropResourceGroup = 1701,
ListResourceGroup = 1702,
ListResourceGroups = 1702,
DescribeResourceGroup = 1703,
TransferNode = 1704,
TransferReplica = 1705,

View File

@ -869,25 +869,25 @@ func (s *Server) RenameCollection(ctx context.Context, req *milvuspb.RenameColle
}
func (s *Server) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
return nil, nil
return s.proxy.CreateResourceGroup(ctx, req)
}
func (s *Server) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
return nil, nil
return s.proxy.DropResourceGroup(ctx, req)
}
func (s *Server) DescribeResourceGroup(ctx context.Context, req *milvuspb.DescribeResourceGroupRequest) (*milvuspb.DescribeResourceGroupResponse, error) {
return nil, nil
return s.proxy.DescribeResourceGroup(ctx, req)
}
func (s *Server) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
return nil, nil
return s.proxy.TransferNode(ctx, req)
}
func (s *Server) TransferReplica(ctx context.Context, req *milvuspb.TransferReplicaRequest) (*commonpb.Status, error) {
return nil, nil
return s.proxy.TransferReplica(ctx, req)
}
func (s *Server) ListResourceGroup(ctx context.Context, req *milvuspb.ListResourceGroupRequest) (*milvuspb.ListResourceGroupResponse, error) {
return nil, nil
func (s *Server) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
return s.proxy.ListResourceGroups(ctx, req)
}

View File

@ -293,7 +293,7 @@ func (m *MockRootCoord) RenameCollection(ctx context.Context, req *milvuspb.Rena
return nil, nil
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
type MockQueryCoord struct {
MockBase
initErr error
@ -408,6 +408,30 @@ func (m *MockQueryCoord) CheckHealth(ctx context.Context, req *milvuspb.CheckHea
}, nil
}
func (m *MockQueryCoord) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
return nil, nil
}
func (m *MockQueryCoord) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
return nil, nil
}
func (m *MockQueryCoord) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error) {
return nil, nil
}
func (m *MockQueryCoord) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
return nil, nil
}
func (m *MockQueryCoord) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error) {
return nil, nil
}
func (m *MockQueryCoord) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
return nil, nil
}
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
type MockDataCoord struct {
MockBase
@ -935,7 +959,7 @@ func (m *MockProxy) TransferReplica(ctx context.Context, req *milvuspb.TransferR
return nil, nil
}
func (m *MockProxy) ListResourceGroup(ctx context.Context, req *milvuspb.ListResourceGroupRequest) (*milvuspb.ListResourceGroupResponse, error) {
func (m *MockProxy) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
return nil, nil
}
@ -1380,6 +1404,36 @@ func Test_NewServer(t *testing.T) {
assert.Nil(t, err)
})
t.Run("CreateResourceGroup", func(t *testing.T) {
_, err := server.CreateResourceGroup(ctx, nil)
assert.Nil(t, err)
})
t.Run("DropResourceGroup", func(t *testing.T) {
_, err := server.DropResourceGroup(ctx, nil)
assert.Nil(t, err)
})
t.Run("TransferNode", func(t *testing.T) {
_, err := server.TransferNode(ctx, nil)
assert.Nil(t, err)
})
t.Run("TransferReplica", func(t *testing.T) {
_, err := server.TransferReplica(ctx, nil)
assert.Nil(t, err)
})
t.Run("ListResourceGroups", func(t *testing.T) {
_, err := server.ListResourceGroups(ctx, nil)
assert.Nil(t, err)
})
t.Run("DescribeResourceGroup", func(t *testing.T) {
_, err := server.DescribeResourceGroup(ctx, nil)
assert.Nil(t, err)
})
err = server.Stop()
assert.Nil(t, err)

View File

@ -418,3 +418,111 @@ func (c *Client) CheckHealth(ctx context.Context, req *milvuspb.CheckHealthReque
}
return ret.(*milvuspb.CheckHealthResponse), err
}
func (c *Client) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
req = typeutil.Clone(req)
commonpbutil.UpdateMsgBase(
req.GetBase(),
commonpbutil.FillMsgBaseFromClient(paramtable.GetNodeID(), commonpbutil.WithTargetID(c.sess.ServerID)),
)
ret, err := c.grpcClient.ReCall(ctx, func(client querypb.QueryCoordClient) (any, error) {
if !funcutil.CheckCtxValid(ctx) {
return nil, ctx.Err()
}
return client.CreateResourceGroup(ctx, req)
})
if err != nil || ret == nil {
return nil, err
}
return ret.(*commonpb.Status), err
}
func (c *Client) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
req = typeutil.Clone(req)
commonpbutil.UpdateMsgBase(
req.GetBase(),
commonpbutil.FillMsgBaseFromClient(paramtable.GetNodeID(), commonpbutil.WithTargetID(c.sess.ServerID)),
)
ret, err := c.grpcClient.ReCall(ctx, func(client querypb.QueryCoordClient) (any, error) {
if !funcutil.CheckCtxValid(ctx) {
return nil, ctx.Err()
}
return client.DropResourceGroup(ctx, req)
})
if err != nil || ret == nil {
return nil, err
}
return ret.(*commonpb.Status), err
}
func (c *Client) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error) {
req = typeutil.Clone(req)
commonpbutil.UpdateMsgBase(
req.GetBase(),
commonpbutil.FillMsgBaseFromClient(paramtable.GetNodeID(), commonpbutil.WithTargetID(c.sess.ServerID)),
)
ret, err := c.grpcClient.ReCall(ctx, func(client querypb.QueryCoordClient) (any, error) {
if !funcutil.CheckCtxValid(ctx) {
return nil, ctx.Err()
}
return client.DescribeResourceGroup(ctx, req)
})
if err != nil || ret == nil {
return nil, err
}
return ret.(*querypb.DescribeResourceGroupResponse), err
}
func (c *Client) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
req = typeutil.Clone(req)
commonpbutil.UpdateMsgBase(
req.GetBase(),
commonpbutil.FillMsgBaseFromClient(paramtable.GetNodeID(), commonpbutil.WithTargetID(c.sess.ServerID)),
)
ret, err := c.grpcClient.ReCall(ctx, func(client querypb.QueryCoordClient) (any, error) {
if !funcutil.CheckCtxValid(ctx) {
return nil, ctx.Err()
}
return client.TransferNode(ctx, req)
})
if err != nil || ret == nil {
return nil, err
}
return ret.(*commonpb.Status), err
}
func (c *Client) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error) {
req = typeutil.Clone(req)
commonpbutil.UpdateMsgBase(
req.GetBase(),
commonpbutil.FillMsgBaseFromClient(paramtable.GetNodeID(), commonpbutil.WithTargetID(c.sess.ServerID)),
)
ret, err := c.grpcClient.ReCall(ctx, func(client querypb.QueryCoordClient) (any, error) {
if !funcutil.CheckCtxValid(ctx) {
return nil, ctx.Err()
}
return client.TransferReplica(ctx, req)
})
if err != nil || ret == nil {
return nil, err
}
return ret.(*commonpb.Status), err
}
func (c *Client) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
req = typeutil.Clone(req)
commonpbutil.UpdateMsgBase(
req.GetBase(),
commonpbutil.FillMsgBaseFromClient(paramtable.GetNodeID(), commonpbutil.WithTargetID(c.sess.ServerID)),
)
ret, err := c.grpcClient.ReCall(ctx, func(client querypb.QueryCoordClient) (any, error) {
if !funcutil.CheckCtxValid(ctx) {
return nil, ctx.Err()
}
return client.ListResourceGroups(ctx, req)
})
if err != nil || ret == nil {
return nil, err
}
return ret.(*milvuspb.ListResourceGroupsResponse), err
}

View File

@ -124,6 +124,24 @@ func Test_NewClient(t *testing.T) {
r20, err := client.CheckHealth(ctx, nil)
retCheck(retNotNil, r20, err)
r21, err := client.CreateResourceGroup(ctx, nil)
retCheck(retNotNil, r21, err)
r22, err := client.DropResourceGroup(ctx, nil)
retCheck(retNotNil, r22, err)
r23, err := client.TransferNode(ctx, nil)
retCheck(retNotNil, r23, err)
r24, err := client.TransferReplica(ctx, nil)
retCheck(retNotNil, r24, err)
r26, err := client.ListResourceGroups(ctx, nil)
retCheck(retNotNil, r26, err)
r27, err := client.DescribeResourceGroup(ctx, nil)
retCheck(retNotNil, r27, err)
}
client.grpcClient = &mock.GRPCClientBase[querypb.QueryCoordClient]{

View File

@ -364,3 +364,27 @@ func (s *Server) GetShardLeaders(ctx context.Context, req *querypb.GetShardLeade
func (s *Server) CheckHealth(ctx context.Context, req *milvuspb.CheckHealthRequest) (*milvuspb.CheckHealthResponse, error) {
return s.queryCoord.CheckHealth(ctx, req)
}
func (s *Server) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
return s.queryCoord.CreateResourceGroup(ctx, req)
}
func (s *Server) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
return s.queryCoord.DropResourceGroup(ctx, req)
}
func (s *Server) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
return s.queryCoord.TransferNode(ctx, req)
}
func (s *Server) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error) {
return s.queryCoord.TransferReplica(ctx, req)
}
func (s *Server) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
return s.queryCoord.ListResourceGroups(ctx, req)
}
func (s *Server) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error) {
return s.queryCoord.DescribeResourceGroup(ctx, req)
}

View File

@ -162,6 +162,34 @@ func (m *MockQueryCoord) CheckHealth(ctx context.Context, req *milvuspb.CheckHea
}, m.err
}
func (m *MockQueryCoord) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
return m.status, nil
}
func (m *MockQueryCoord) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
return m.status, nil
}
func (m *MockQueryCoord) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
return m.status, nil
}
func (m *MockQueryCoord) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error) {
return m.status, nil
}
func (m *MockQueryCoord) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
return &milvuspb.ListResourceGroupsResponse{
Status: m.status,
}, nil
}
func (m *MockQueryCoord) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error) {
return &querypb.DescribeResourceGroupResponse{
Status: m.status,
}, nil
}
// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
type MockRootCoord struct {
types.RootCoord
@ -371,6 +399,43 @@ func Test_NewServer(t *testing.T) {
assert.Equal(t, true, ret.IsHealthy)
})
t.Run("CreateResourceGroup", func(t *testing.T) {
resp, err := server.CreateResourceGroup(ctx, nil)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode)
})
t.Run("DropResourceGroup", func(t *testing.T) {
resp, err := server.DropResourceGroup(ctx, nil)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode)
})
t.Run("TransferNode", func(t *testing.T) {
resp, err := server.TransferNode(ctx, nil)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode)
})
t.Run("TransferReplica", func(t *testing.T) {
resp, err := server.TransferReplica(ctx, nil)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.ErrorCode)
})
t.Run("ListResourceGroups", func(t *testing.T) {
req := &milvuspb.ListResourceGroupsRequest{}
resp, err := server.ListResourceGroups(ctx, req)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.Status.ErrorCode)
})
t.Run("DescribeResourceGroup", func(t *testing.T) {
resp, err := server.DescribeResourceGroup(ctx, nil)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, resp.Status.ErrorCode)
})
err = server.Stop()
assert.Nil(t, err)
}

View File

@ -159,4 +159,7 @@ type QueryCoordCatalog interface {
ReleasePartition(collection int64, partitions ...int64) error
ReleaseReplicas(collectionID int64) error
ReleaseReplica(collection, replica int64) error
SaveResourceGroup(rgs ...*querypb.ResourceGroup) error
RemoveResourceGroup(rgName string) error
GetResourceGroups() ([]*querypb.ResourceGroup, error)
}

View File

@ -36,6 +36,13 @@ service QueryCoord {
rpc GetShardLeaders(GetShardLeadersRequest) returns (GetShardLeadersResponse) {}
rpc CheckHealth(milvus.CheckHealthRequest) returns (milvus.CheckHealthResponse) {}
rpc CreateResourceGroup(milvus.CreateResourceGroupRequest) returns (common.Status) {}
rpc DropResourceGroup(milvus.DropResourceGroupRequest) returns (common.Status) {}
rpc TransferNode(milvus.TransferNodeRequest) returns (common.Status) {}
rpc TransferReplica(TransferReplicaRequest) returns (common.Status) {}
rpc ListResourceGroups(milvus.ListResourceGroupsRequest) returns (milvus.ListResourceGroupsResponse) {}
rpc DescribeResourceGroup(DescribeResourceGroupRequest) returns (DescribeResourceGroupResponse) {}
}
service QueryNode {
@ -101,6 +108,8 @@ message LoadCollectionRequest {
// fieldID -> indexID
map<int64, int64> field_indexID = 6;
bool refresh = 7;
// resource group names
repeated string resource_groups = 8;
}
message ReleaseCollectionRequest {
@ -128,6 +137,8 @@ message LoadPartitionsRequest {
// fieldID -> indexID
map<int64, int64> field_indexID = 7;
bool refresh = 8;
// resource group names
repeated string resource_groups = 9;
}
message ReleasePartitionsRequest {
@ -488,6 +499,7 @@ message Replica {
int64 ID = 1;
int64 collectionID = 2;
repeated int64 nodes = 3;
string resource_group = 4;
}
enum SyncType {
@ -510,3 +522,39 @@ message SyncDistributionRequest {
repeated SyncAction actions = 4;
}
message ResourceGroup {
string name = 1;
int32 capacity = 2;
repeated int64 nodes = 3;
}
// transfer `replicaNum` replicas in `collectionID` from `source_resource_group` to `target_resource_groups`
message TransferReplicaRequest {
common.MsgBase base = 1;
string source_resource_group = 2;
string target_resource_group = 3;
int64 collectionID = 4;
int64 num_replica = 5;
}
message DescribeResourceGroupRequest {
common.MsgBase base = 1;
string resource_group = 2;
}
message DescribeResourceGroupResponse {
common.Status status = 1;
ResourceGroupInfo resource_group = 2;
}
message ResourceGroupInfo {
string name = 1;
int32 capacity = 2;
int32 num_available_node = 3;
// collection id -> loaded replica num
map<int64, int32> num_loaded_replica = 4;
// collection id -> accessed other rg's node num
map<int64, int32> num_outgoing_node = 5;
// collection id -> be accessed node num by other rg
map<int64, int32> num_incoming_node = 6;
}

File diff suppressed because it is too large Load Diff

View File

@ -3452,6 +3452,10 @@ func (node *Proxy) GetReplicas(ctx context.Context, req *milvuspb.GetReplicasReq
commonpbutil.WithSourceID(paramtable.GetNodeID()),
)
if req.GetCollectionName() != "" {
req.CollectionID, _ = globalMetaCache.GetCollectionID(ctx, req.GetCollectionName())
}
resp, err := node.queryCoord.GetReplicas(ctx, req)
if err != nil {
log.Error("Failed to get replicas from Query Coordinator",
@ -3758,7 +3762,6 @@ func (node *Proxy) UpdateCredentialCache(ctx context.Context, request *proxypb.U
}, nil
}
//
func (node *Proxy) CreateCredential(ctx context.Context, req *milvuspb.CreateCredentialRequest) (*commonpb.Status, error) {
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-CreateCredential")
defer sp.End()
@ -3823,7 +3826,6 @@ func (node *Proxy) CreateCredential(ctx context.Context, req *milvuspb.CreateCre
return result, err
}
//
func (node *Proxy) UpdateCredential(ctx context.Context, req *milvuspb.UpdateCredentialRequest) (*commonpb.Status, error) {
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-UpdateCredential")
defer sp.End()
@ -3897,7 +3899,6 @@ func (node *Proxy) UpdateCredential(ctx context.Context, req *milvuspb.UpdateCre
return result, err
}
//
func (node *Proxy) DeleteCredential(ctx context.Context, req *milvuspb.DeleteCredentialRequest) (*commonpb.Status, error) {
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-DeleteCredential")
defer sp.End()
@ -4449,42 +4450,391 @@ func (node *Proxy) RenameCollection(ctx context.Context, req *milvuspb.RenameCol
}
func (node *Proxy) CreateResourceGroup(ctx context.Context, request *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
}, nil
if !node.checkHealthy() {
return unhealthyStatus(), nil
}
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-CreateResourceGroup")
defer sp.End()
method := "CreateResourceGroup"
tr := timerecord.NewTimeRecorder(method)
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.TotalLabel).Inc()
t := &CreateResourceGroupTask{
ctx: ctx,
Condition: NewTaskCondition(ctx),
CreateResourceGroupRequest: request,
queryCoord: node.queryCoord,
}
log := log.Ctx(ctx).With(
zap.String("role", typeutil.ProxyRole),
)
log.Debug("CreateResourceGroup received")
if err := node.sched.ddQueue.Enqueue(t); err != nil {
log.Warn("CreateResourceGroup failed to enqueue",
zap.Error(err))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.AbandonLabel).Inc()
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: err.Error(),
}, nil
}
log.Debug("CreateResourceGroup enqueued",
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
if err := t.WaitToFinish(); err != nil {
log.Warn("CreateResourceGroup failed to WaitToFinish",
zap.Error(err),
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.FailLabel).Inc()
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: err.Error(),
}, nil
}
log.Debug("CreateResourceGroup done",
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.SuccessLabel).Inc()
metrics.ProxyReqLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return t.result, nil
}
func (node *Proxy) DropResourceGroup(ctx context.Context, request *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
}, nil
if !node.checkHealthy() {
return unhealthyStatus(), nil
}
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-DropResourceGroup")
defer sp.End()
method := "DropResourceGroup"
tr := timerecord.NewTimeRecorder(method)
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.TotalLabel).Inc()
t := &DropResourceGroupTask{
ctx: ctx,
Condition: NewTaskCondition(ctx),
DropResourceGroupRequest: request,
queryCoord: node.queryCoord,
}
log := log.Ctx(ctx).With(
zap.String("role", typeutil.ProxyRole),
)
log.Debug("DropResourceGroup received")
if err := node.sched.ddQueue.Enqueue(t); err != nil {
log.Warn("DropResourceGroup failed to enqueue",
zap.Error(err))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.AbandonLabel).Inc()
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: err.Error(),
}, nil
}
log.Debug("DropResourceGroup enqueued",
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
if err := t.WaitToFinish(); err != nil {
log.Warn("DropResourceGroup failed to WaitToFinish",
zap.Error(err),
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.FailLabel).Inc()
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: err.Error(),
}, nil
}
log.Debug("DropResourceGroup done",
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.SuccessLabel).Inc()
metrics.ProxyReqLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return t.result, nil
}
func (node *Proxy) TransferNode(ctx context.Context, request *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
}, nil
if !node.checkHealthy() {
return unhealthyStatus(), nil
}
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-TransferNode")
defer sp.End()
method := "TransferNode"
tr := timerecord.NewTimeRecorder(method)
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.TotalLabel).Inc()
t := &TransferNodeTask{
ctx: ctx,
Condition: NewTaskCondition(ctx),
TransferNodeRequest: request,
queryCoord: node.queryCoord,
}
log := log.Ctx(ctx).With(
zap.String("role", typeutil.ProxyRole),
)
log.Debug("TransferNode received")
if err := node.sched.ddQueue.Enqueue(t); err != nil {
log.Warn("TransferNode failed to enqueue",
zap.Error(err))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.AbandonLabel).Inc()
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: err.Error(),
}, nil
}
log.Debug("TransferNode enqueued",
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
if err := t.WaitToFinish(); err != nil {
log.Warn("TransferNode failed to WaitToFinish",
zap.Error(err),
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.FailLabel).Inc()
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: err.Error(),
}, nil
}
log.Debug("TransferNode done",
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.SuccessLabel).Inc()
metrics.ProxyReqLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return t.result, nil
}
func (node *Proxy) TransferReplica(ctx context.Context, request *milvuspb.TransferReplicaRequest) (*commonpb.Status, error) {
if !node.checkHealthy() {
return unhealthyStatus(), nil
}
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
}, nil
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-TransferReplica")
defer sp.End()
method := "TransferReplica"
tr := timerecord.NewTimeRecorder(method)
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.TotalLabel).Inc()
t := &TransferReplicaTask{
ctx: ctx,
Condition: NewTaskCondition(ctx),
TransferReplicaRequest: request,
queryCoord: node.queryCoord,
}
log := log.Ctx(ctx).With(
zap.String("role", typeutil.ProxyRole),
)
log.Debug("TransferReplica received")
if err := node.sched.ddQueue.Enqueue(t); err != nil {
log.Warn("TransferReplica failed to enqueue",
zap.Error(err))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.AbandonLabel).Inc()
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: err.Error(),
}, nil
}
log.Debug("TransferReplica enqueued",
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
if err := t.WaitToFinish(); err != nil {
log.Warn("TransferReplica failed to WaitToFinish",
zap.Error(err),
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.FailLabel).Inc()
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: err.Error(),
}, nil
}
log.Debug("TransferReplica done",
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.SuccessLabel).Inc()
metrics.ProxyReqLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return t.result, nil
}
func (node *Proxy) ListResourceGroup(ctx context.Context, request *milvuspb.ListResourceGroupRequest) (*milvuspb.ListResourceGroupResponse, error) {
return &milvuspb.ListResourceGroupResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
},
}, nil
func (node *Proxy) ListResourceGroups(ctx context.Context, request *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
if !node.checkHealthy() {
return &milvuspb.ListResourceGroupsResponse{
Status: unhealthyStatus(),
}, nil
}
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-ListResourceGroups")
defer sp.End()
method := "ListResourceGroups"
tr := timerecord.NewTimeRecorder(method)
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.TotalLabel).Inc()
t := &ListResourceGroupsTask{
ctx: ctx,
Condition: NewTaskCondition(ctx),
ListResourceGroupsRequest: request,
queryCoord: node.queryCoord,
}
log := log.Ctx(ctx).With(
zap.String("role", typeutil.ProxyRole),
)
log.Debug("ListResourceGroups received")
if err := node.sched.ddQueue.Enqueue(t); err != nil {
log.Warn("ListResourceGroups failed to enqueue",
zap.Error(err))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.AbandonLabel).Inc()
return &milvuspb.ListResourceGroupsResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: err.Error(),
},
}, nil
}
log.Debug("ListResourceGroups enqueued",
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
if err := t.WaitToFinish(); err != nil {
log.Warn("ListResourceGroups failed to WaitToFinish",
zap.Error(err),
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.FailLabel).Inc()
return &milvuspb.ListResourceGroupsResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: err.Error(),
},
}, nil
}
log.Debug("ListResourceGroups done",
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.SuccessLabel).Inc()
metrics.ProxyReqLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return t.result, nil
}
func (node *Proxy) DescribeResourceGroup(ctx context.Context, request *milvuspb.DescribeResourceGroupRequest) (*milvuspb.DescribeResourceGroupResponse, error) {
return &milvuspb.DescribeResourceGroupResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
},
}, nil
if !node.checkHealthy() {
return &milvuspb.DescribeResourceGroupResponse{
Status: unhealthyStatus(),
}, nil
}
ctx, sp := otel.Tracer(typeutil.ProxyRole).Start(ctx, "Proxy-DescribeResourceGroup")
defer sp.End()
method := "DescribeResourceGroup"
tr := timerecord.NewTimeRecorder(method)
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.TotalLabel).Inc()
t := &DescribeResourceGroupTask{
ctx: ctx,
Condition: NewTaskCondition(ctx),
DescribeResourceGroupRequest: request,
queryCoord: node.queryCoord,
}
log := log.Ctx(ctx).With(
zap.String("role", typeutil.ProxyRole),
)
log.Debug("DescribeResourceGroup received")
if err := node.sched.ddQueue.Enqueue(t); err != nil {
log.Warn("DescribeResourceGroup failed to enqueue",
zap.Error(err))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.AbandonLabel).Inc()
return &milvuspb.DescribeResourceGroupResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: err.Error(),
},
}, nil
}
log.Debug("DescribeResourceGroup enqueued",
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
if err := t.WaitToFinish(); err != nil {
log.Warn("DescribeResourceGroup failed to WaitToFinish",
zap.Error(err),
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.FailLabel).Inc()
return &milvuspb.DescribeResourceGroupResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
Reason: err.Error(),
},
}, nil
}
log.Debug("DescribeResourceGroup done",
zap.Uint64("BeginTS", t.BeginTs()),
zap.Uint64("EndTS", t.EndTs()))
metrics.ProxyFunctionCall.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method,
metrics.SuccessLabel).Inc()
metrics.ProxyReqLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return t.result, nil
}

View File

@ -29,6 +29,7 @@ import (
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/proxypb"
"github.com/milvus-io/milvus/internal/util/dependency"
"github.com/milvus-io/milvus/internal/util/paramtable"
"github.com/milvus-io/milvus/internal/util/sessionutil"
)
@ -198,3 +199,75 @@ func TestProxyRenameCollection(t *testing.T) {
assert.Equal(t, commonpb.ErrorCode_Success, resp.GetErrorCode())
})
}
func TestProxy_ResourceGroup(t *testing.T) {
factory := dependency.NewDefaultFactory(true)
ctx := context.Background()
node, err := NewProxy(ctx, factory)
assert.NoError(t, err)
node.multiRateLimiter = NewMultiRateLimiter()
node.stateCode.Store(commonpb.StateCode_Healthy)
qc := NewQueryCoordMock()
node.SetQueryCoordClient(qc)
tsoAllocatorIns := newMockTsoAllocator()
node.sched, err = newTaskScheduler(node.ctx, tsoAllocatorIns, node.factory)
assert.NoError(t, err)
node.sched.Start()
defer node.sched.Close()
rc := &MockRootCoordClientInterface{}
mgr := newShardClientMgr()
InitMetaCache(ctx, rc, qc, mgr)
t.Run("create resource group", func(t *testing.T) {
resp, err := node.CreateResourceGroup(ctx, &milvuspb.CreateResourceGroupRequest{
ResourceGroup: "rg",
})
assert.NoError(t, err)
assert.Equal(t, resp.ErrorCode, commonpb.ErrorCode_Success)
})
t.Run("drop resource group", func(t *testing.T) {
resp, err := node.DropResourceGroup(ctx, &milvuspb.DropResourceGroupRequest{
ResourceGroup: "rg",
})
assert.NoError(t, err)
assert.Equal(t, resp.ErrorCode, commonpb.ErrorCode_Success)
})
t.Run("transfer node", func(t *testing.T) {
resp, err := node.TransferNode(ctx, &milvuspb.TransferNodeRequest{
SourceResourceGroup: "rg1",
TargetResourceGroup: "rg2",
NumNode: 1,
})
assert.NoError(t, err)
assert.Equal(t, resp.ErrorCode, commonpb.ErrorCode_Success)
})
t.Run("transfer replica", func(t *testing.T) {
resp, err := node.TransferReplica(ctx, &milvuspb.TransferReplicaRequest{
SourceResourceGroup: "rg1",
TargetResourceGroup: "rg2",
NumReplica: 1,
CollectionName: "collection1",
})
assert.NoError(t, err)
assert.Equal(t, resp.ErrorCode, commonpb.ErrorCode_Success)
})
t.Run("list resource group", func(t *testing.T) {
resp, err := node.ListResourceGroups(ctx, &milvuspb.ListResourceGroupsRequest{})
assert.NoError(t, err)
assert.Equal(t, resp.Status.ErrorCode, commonpb.ErrorCode_Success)
})
t.Run("describe resource group", func(t *testing.T) {
resp, err := node.DescribeResourceGroup(ctx, &milvuspb.DescribeResourceGroupRequest{})
assert.NoError(t, err)
assert.Equal(t, resp.Status.ErrorCode, commonpb.ErrorCode_Success)
})
}

View File

@ -51,6 +51,8 @@ import (
type Cache interface {
// GetCollectionID get collection's id by name.
GetCollectionID(ctx context.Context, collectionName string) (typeutil.UniqueID, error)
// GetCollectionName get collection's name by id
GetCollectionName(ctx context.Context, collectionID int64) (string, error)
// GetCollectionInfo get collection's information by name, such as collection id, schema, and etc.
GetCollectionInfo(ctx context.Context, collectionName string) (*collectionInfo, error)
// GetPartitionID get partition's identifier of specific collection.
@ -196,7 +198,7 @@ func (m *MetaCache) GetCollectionID(ctx context.Context, collectionName string)
metrics.ProxyCacheStatsCounter.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), "GeCollectionID", metrics.CacheMissLabel).Inc()
tr := timerecord.NewTimeRecorder("UpdateCache")
m.mu.RUnlock()
coll, err := m.describeCollection(ctx, collectionName)
coll, err := m.describeCollection(ctx, collectionName, 0)
if err != nil {
return 0, err
}
@ -213,6 +215,37 @@ func (m *MetaCache) GetCollectionID(ctx context.Context, collectionName string)
return collInfo.collID, nil
}
// GetCollectionName returns the corresponding collection name for provided collection id
func (m *MetaCache) GetCollectionName(ctx context.Context, collectionID int64) (string, error) {
m.mu.RLock()
var collInfo *collectionInfo
for _, coll := range m.collInfo {
if coll.collID == collectionID {
collInfo = coll
break
}
}
if collInfo == nil || !collInfo.isCollectionCached() {
metrics.ProxyCacheStatsCounter.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), "GeCollectionName", metrics.CacheMissLabel).Inc()
tr := timerecord.NewTimeRecorder("UpdateCache")
m.mu.RUnlock()
coll, err := m.describeCollection(ctx, "", collectionID)
if err != nil {
return "", err
}
m.mu.Lock()
defer m.mu.Unlock()
m.updateCollection(coll, coll.Schema.Name)
metrics.ProxyUpdateCacheLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(tr.ElapseSpan().Milliseconds()))
return coll.Schema.Name, nil
}
defer m.mu.RUnlock()
metrics.ProxyCacheStatsCounter.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), "GeCollectionName", metrics.CacheHitLabel).Inc()
return collInfo.schema.Name, nil
}
// GetCollectionInfo returns the collection information related to provided collection name
// If the information is not found, proxy will try to fetch information for other source (RootCoord for now)
func (m *MetaCache) GetCollectionInfo(ctx context.Context, collectionName string) (*collectionInfo, error) {
@ -224,7 +257,7 @@ func (m *MetaCache) GetCollectionInfo(ctx context.Context, collectionName string
if !ok || !collInfo.isCollectionCached() {
tr := timerecord.NewTimeRecorder("UpdateCache")
metrics.ProxyCacheStatsCounter.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), "GetCollectionInfo", metrics.CacheMissLabel).Inc()
coll, err := m.describeCollection(ctx, collectionName)
coll, err := m.describeCollection(ctx, collectionName, 0)
if err != nil {
return nil, err
}
@ -281,7 +314,7 @@ func (m *MetaCache) GetCollectionSchema(ctx context.Context, collectionName stri
metrics.ProxyCacheStatsCounter.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10), "GetCollectionSchema", metrics.CacheMissLabel).Inc()
tr := timerecord.NewTimeRecorder("UpdateCache")
m.mu.RUnlock()
coll, err := m.describeCollection(ctx, collectionName)
coll, err := m.describeCollection(ctx, collectionName, 0)
if err != nil {
log.Warn("Failed to load collection from rootcoord ",
zap.String("collection name ", collectionName),
@ -294,7 +327,7 @@ func (m *MetaCache) GetCollectionSchema(ctx context.Context, collectionName stri
collInfo = m.collInfo[collectionName]
metrics.ProxyUpdateCacheLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(tr.ElapseSpan().Milliseconds()))
log.Debug("Reload collection from root coordinator ",
zap.String("collection name ", collectionName),
zap.String("collection name", collectionName),
zap.Any("time (milliseconds) take ", tr.ElapseSpan().Milliseconds()))
return collInfo.schema, nil
}
@ -424,12 +457,13 @@ func (m *MetaCache) GetPartitionInfo(ctx context.Context, collectionName string,
}
// Get the collection information from rootcoord.
func (m *MetaCache) describeCollection(ctx context.Context, collectionName string) (*milvuspb.DescribeCollectionResponse, error) {
func (m *MetaCache) describeCollection(ctx context.Context, collectionName string, collectionID int64) (*milvuspb.DescribeCollectionResponse, error) {
req := &milvuspb.DescribeCollectionRequest{
Base: commonpbutil.NewMsgBase(
commonpbutil.WithMsgType(commonpb.MsgType_DescribeCollection),
),
CollectionName: collectionName,
CollectionID: collectionID,
}
coll, err := m.rootCoord.DescribeCollection(ctx, req)
if err != nil {

View File

@ -127,7 +127,7 @@ func (m *MockRootCoordClientInterface) DescribeCollection(ctx context.Context, i
return nil, errors.New("mocked error")
}
m.IncAccessCount()
if in.CollectionName == "collection1" {
if in.CollectionName == "collection1" || in.CollectionID == 1 {
return &milvuspb.DescribeCollectionResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
@ -135,10 +135,11 @@ func (m *MockRootCoordClientInterface) DescribeCollection(ctx context.Context, i
CollectionID: typeutil.UniqueID(1),
Schema: &schemapb.CollectionSchema{
AutoID: true,
Name: "collection1",
},
}, nil
}
if in.CollectionName == "collection2" {
if in.CollectionName == "collection2" || in.CollectionID == 2 {
return &milvuspb.DescribeCollectionResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
@ -146,6 +147,7 @@ func (m *MockRootCoordClientInterface) DescribeCollection(ctx context.Context, i
CollectionID: typeutil.UniqueID(2),
Schema: &schemapb.CollectionSchema{
AutoID: true,
Name: "collection2",
},
}, nil
}
@ -230,7 +232,7 @@ func (m *MockQueryCoordClientInterface) ShowCollections(ctx context.Context, req
return rsp, nil
}
//Simulate the cache path and the
// Simulate the cache path and the
func TestMetaCache_GetCollection(t *testing.T) {
ctx := context.Background()
rootCoord := &MockRootCoordClientInterface{}
@ -251,6 +253,7 @@ func TestMetaCache_GetCollection(t *testing.T) {
assert.Equal(t, schema, &schemapb.CollectionSchema{
AutoID: true,
Fields: []*schemapb.FieldSchema{},
Name: "collection1",
})
id, err = globalMetaCache.GetCollectionID(ctx, "collection2")
assert.Equal(t, rootCoord.GetAccessCount(), 2)
@ -262,6 +265,7 @@ func TestMetaCache_GetCollection(t *testing.T) {
assert.Equal(t, schema, &schemapb.CollectionSchema{
AutoID: true,
Fields: []*schemapb.FieldSchema{},
Name: "collection2",
})
// test to get from cache, this should trigger root request
@ -275,10 +279,61 @@ func TestMetaCache_GetCollection(t *testing.T) {
assert.Equal(t, schema, &schemapb.CollectionSchema{
AutoID: true,
Fields: []*schemapb.FieldSchema{},
Name: "collection1",
})
}
func TestMetaCache_GetCollectionName(t *testing.T) {
ctx := context.Background()
rootCoord := &MockRootCoordClientInterface{}
queryCoord := &MockQueryCoordClientInterface{}
mgr := newShardClientMgr()
err := InitMetaCache(ctx, rootCoord, queryCoord, mgr)
assert.Nil(t, err)
collection, err := globalMetaCache.GetCollectionName(ctx, 1)
assert.Nil(t, err)
assert.Equal(t, collection, "collection1")
assert.Equal(t, rootCoord.GetAccessCount(), 1)
// should'nt be accessed to remote root coord.
schema, err := globalMetaCache.GetCollectionSchema(ctx, "collection1")
assert.Equal(t, rootCoord.GetAccessCount(), 1)
assert.Nil(t, err)
assert.Equal(t, schema, &schemapb.CollectionSchema{
AutoID: true,
Fields: []*schemapb.FieldSchema{},
Name: "collection1",
})
collection, err = globalMetaCache.GetCollectionName(ctx, 1)
assert.Equal(t, rootCoord.GetAccessCount(), 1)
assert.Nil(t, err)
assert.Equal(t, collection, "collection1")
schema, err = globalMetaCache.GetCollectionSchema(ctx, "collection2")
assert.Equal(t, rootCoord.GetAccessCount(), 2)
assert.Nil(t, err)
assert.Equal(t, schema, &schemapb.CollectionSchema{
AutoID: true,
Fields: []*schemapb.FieldSchema{},
Name: "collection2",
})
// test to get from cache, this should trigger root request
collection, err = globalMetaCache.GetCollectionName(ctx, 1)
assert.Equal(t, rootCoord.GetAccessCount(), 2)
assert.Nil(t, err)
assert.Equal(t, collection, "collection1")
schema, err = globalMetaCache.GetCollectionSchema(ctx, "collection1")
assert.Equal(t, rootCoord.GetAccessCount(), 2)
assert.Nil(t, err)
assert.Equal(t, schema, &schemapb.CollectionSchema{
AutoID: true,
Fields: []*schemapb.FieldSchema{},
Name: "collection1",
})
}
func TestMetaCache_GetCollectionFailure(t *testing.T) {
ctx := context.Background()
rootCoord := &MockRootCoordClientInterface{}
@ -299,6 +354,7 @@ func TestMetaCache_GetCollectionFailure(t *testing.T) {
assert.Equal(t, schema, &schemapb.CollectionSchema{
AutoID: true,
Fields: []*schemapb.FieldSchema{},
Name: "collection1",
})
rootCoord.Error = true
@ -307,6 +363,7 @@ func TestMetaCache_GetCollectionFailure(t *testing.T) {
assert.Equal(t, schema, &schemapb.CollectionSchema{
AutoID: true,
Fields: []*schemapb.FieldSchema{},
Name: "collection1",
})
}
@ -367,6 +424,7 @@ func TestMetaCache_ConcurrentTest1(t *testing.T) {
assert.Equal(t, schema, &schemapb.CollectionSchema{
AutoID: true,
Fields: []*schemapb.FieldSchema{},
Name: "collection1",
})
time.Sleep(10 * time.Millisecond)
}

View File

@ -8,6 +8,7 @@ import (
)
type getCollectionIDFunc func(ctx context.Context, collectionName string) (typeutil.UniqueID, error)
type getCollectionNameFunc func(ctx context.Context, collectionID int64) (string, error)
type getCollectionSchemaFunc func(ctx context.Context, collectionName string) (*schemapb.CollectionSchema, error)
type getCollectionInfoFunc func(ctx context.Context, collectionName string) (*collectionInfo, error)
type getUserRoleFunc func(username string) []string
@ -16,6 +17,7 @@ type getPartitionIDFunc func(ctx context.Context, collectionName string, partiti
type mockCache struct {
Cache
getIDFunc getCollectionIDFunc
getNameFunc getCollectionNameFunc
getSchemaFunc getCollectionSchemaFunc
getInfoFunc getCollectionInfoFunc
getUserRoleFunc getUserRoleFunc
@ -29,6 +31,13 @@ func (m *mockCache) GetCollectionID(ctx context.Context, collectionName string)
return 0, nil
}
func (m *mockCache) GetCollectionName(ctx context.Context, collectionID int64) (string, error) {
if m.getIDFunc != nil {
return m.getNameFunc(ctx, collectionID)
}
return "", nil
}
func (m *mockCache) GetCollectionSchema(ctx context.Context, collectionName string) (*schemapb.CollectionSchema, error) {
if m.getSchemaFunc != nil {
return m.getSchemaFunc(ctx, collectionName)

View File

@ -22,6 +22,7 @@ import (
"sync"
"sync/atomic"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/milvus-io/milvus/internal/util/funcutil"
"github.com/milvus-io/milvus/internal/util/uniquegenerator"
@ -423,6 +424,60 @@ func (coord *QueryCoordMock) GetShardLeaders(ctx context.Context, req *querypb.G
}, nil
}
func (coord *QueryCoordMock) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
Reason: "",
}, nil
}
func (coord *QueryCoordMock) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
Reason: "",
}, nil
}
func (coord *QueryCoordMock) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
Reason: "",
}, nil
}
func (coord *QueryCoordMock) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error) {
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
Reason: "",
}, nil
}
func (coord *QueryCoordMock) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
return &milvuspb.ListResourceGroupsResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
Reason: "",
},
ResourceGroups: []string{meta.DefaultResourceGroupName, "rg"},
}, nil
}
func (coord *QueryCoordMock) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error) {
return &querypb.DescribeResourceGroupResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
Reason: "",
},
ResourceGroup: &querypb.ResourceGroupInfo{
Name: "rg",
Capacity: 2,
NumAvailableNode: 1,
NumOutgoingNode: map[int64]int32{1: 1},
NumIncomingNode: map[int64]int32{2: 2},
},
}, nil
}
func NewQueryCoordMock(opts ...QueryCoordMockOption) *QueryCoordMock {
coord := &QueryCoordMock{
nodeID: UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt()),

View File

@ -39,6 +39,7 @@ import (
"github.com/milvus-io/milvus/internal/util/commonpbutil"
"github.com/milvus-io/milvus/internal/util/paramtable"
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/samber/lo"
)
const (
@ -51,27 +52,33 @@ const (
OffsetKey = "offset"
LimitKey = "limit"
InsertTaskName = "InsertTask"
CreateCollectionTaskName = "CreateCollectionTask"
DropCollectionTaskName = "DropCollectionTask"
HasCollectionTaskName = "HasCollectionTask"
DescribeCollectionTaskName = "DescribeCollectionTask"
ShowCollectionTaskName = "ShowCollectionTask"
CreatePartitionTaskName = "CreatePartitionTask"
DropPartitionTaskName = "DropPartitionTask"
HasPartitionTaskName = "HasPartitionTask"
ShowPartitionTaskName = "ShowPartitionTask"
FlushTaskName = "FlushTask"
LoadCollectionTaskName = "LoadCollectionTask"
ReleaseCollectionTaskName = "ReleaseCollectionTask"
LoadPartitionTaskName = "LoadPartitionsTask"
ReleasePartitionTaskName = "ReleasePartitionsTask"
DeleteTaskName = "DeleteTask"
CreateAliasTaskName = "CreateAliasTask"
DropAliasTaskName = "DropAliasTask"
AlterAliasTaskName = "AlterAliasTask"
AlterCollectionTaskName = "AlterCollectionTask"
UpsertTaskName = "UpsertTask"
InsertTaskName = "InsertTask"
CreateCollectionTaskName = "CreateCollectionTask"
DropCollectionTaskName = "DropCollectionTask"
HasCollectionTaskName = "HasCollectionTask"
DescribeCollectionTaskName = "DescribeCollectionTask"
ShowCollectionTaskName = "ShowCollectionTask"
CreatePartitionTaskName = "CreatePartitionTask"
DropPartitionTaskName = "DropPartitionTask"
HasPartitionTaskName = "HasPartitionTask"
ShowPartitionTaskName = "ShowPartitionTask"
FlushTaskName = "FlushTask"
LoadCollectionTaskName = "LoadCollectionTask"
ReleaseCollectionTaskName = "ReleaseCollectionTask"
LoadPartitionTaskName = "LoadPartitionsTask"
ReleasePartitionTaskName = "ReleasePartitionsTask"
DeleteTaskName = "DeleteTask"
CreateAliasTaskName = "CreateAliasTask"
DropAliasTaskName = "DropAliasTask"
AlterAliasTaskName = "AlterAliasTask"
AlterCollectionTaskName = "AlterCollectionTask"
UpsertTaskName = "UpsertTask"
CreateResourceGroupTaskName = "CreateResourceGroupTask"
DropResourceGroupTaskName = "DropResourceGroupTask"
TransferNodeTaskName = "TransferNodeTask"
TransferReplicaTaskName = "TransferReplicaTask"
ListResourceGroupsTaskName = "ListResourceGroupsTask"
DescribeResourceGroupTaskName = "DescribeResourceGroupTask"
// minFloat32 minimum float.
minFloat32 = -1 * float32(math.MaxFloat32)
@ -1916,3 +1923,412 @@ func (a *AlterAliasTask) Execute(ctx context.Context) error {
func (a *AlterAliasTask) PostExecute(ctx context.Context) error {
return nil
}
type CreateResourceGroupTask struct {
Condition
*milvuspb.CreateResourceGroupRequest
ctx context.Context
queryCoord types.QueryCoord
result *commonpb.Status
}
func (t *CreateResourceGroupTask) TraceCtx() context.Context {
return t.ctx
}
func (t *CreateResourceGroupTask) ID() UniqueID {
return t.Base.MsgID
}
func (t *CreateResourceGroupTask) SetID(uid UniqueID) {
t.Base.MsgID = uid
}
func (t *CreateResourceGroupTask) Name() string {
return CreateResourceGroupTaskName
}
func (t *CreateResourceGroupTask) Type() commonpb.MsgType {
return t.Base.MsgType
}
func (t *CreateResourceGroupTask) BeginTs() Timestamp {
return t.Base.Timestamp
}
func (t *CreateResourceGroupTask) EndTs() Timestamp {
return t.Base.Timestamp
}
func (t *CreateResourceGroupTask) SetTs(ts Timestamp) {
t.Base.Timestamp = ts
}
func (t *CreateResourceGroupTask) OnEnqueue() error {
t.Base = commonpbutil.NewMsgBase()
return nil
}
func (t *CreateResourceGroupTask) PreExecute(ctx context.Context) error {
t.Base.MsgType = commonpb.MsgType_CreateResourceGroup
t.Base.SourceID = paramtable.GetNodeID()
return nil
}
func (t *CreateResourceGroupTask) Execute(ctx context.Context) error {
var err error
t.result, err = t.queryCoord.CreateResourceGroup(ctx, t.CreateResourceGroupRequest)
return err
}
func (t *CreateResourceGroupTask) PostExecute(ctx context.Context) error {
return nil
}
type DropResourceGroupTask struct {
Condition
*milvuspb.DropResourceGroupRequest
ctx context.Context
queryCoord types.QueryCoord
result *commonpb.Status
}
func (t *DropResourceGroupTask) TraceCtx() context.Context {
return t.ctx
}
func (t *DropResourceGroupTask) ID() UniqueID {
return t.Base.MsgID
}
func (t *DropResourceGroupTask) SetID(uid UniqueID) {
t.Base.MsgID = uid
}
func (t *DropResourceGroupTask) Name() string {
return DropResourceGroupTaskName
}
func (t *DropResourceGroupTask) Type() commonpb.MsgType {
return t.Base.MsgType
}
func (t *DropResourceGroupTask) BeginTs() Timestamp {
return t.Base.Timestamp
}
func (t *DropResourceGroupTask) EndTs() Timestamp {
return t.Base.Timestamp
}
func (t *DropResourceGroupTask) SetTs(ts Timestamp) {
t.Base.Timestamp = ts
}
func (t *DropResourceGroupTask) OnEnqueue() error {
t.Base = commonpbutil.NewMsgBase()
return nil
}
func (t *DropResourceGroupTask) PreExecute(ctx context.Context) error {
t.Base.MsgType = commonpb.MsgType_DropResourceGroup
t.Base.SourceID = paramtable.GetNodeID()
return nil
}
func (t *DropResourceGroupTask) Execute(ctx context.Context) error {
var err error
t.result, err = t.queryCoord.DropResourceGroup(ctx, t.DropResourceGroupRequest)
return err
}
func (t *DropResourceGroupTask) PostExecute(ctx context.Context) error {
return nil
}
type DescribeResourceGroupTask struct {
Condition
*milvuspb.DescribeResourceGroupRequest
ctx context.Context
queryCoord types.QueryCoord
result *milvuspb.DescribeResourceGroupResponse
}
func (t *DescribeResourceGroupTask) TraceCtx() context.Context {
return t.ctx
}
func (t *DescribeResourceGroupTask) ID() UniqueID {
return t.Base.MsgID
}
func (t *DescribeResourceGroupTask) SetID(uid UniqueID) {
t.Base.MsgID = uid
}
func (t *DescribeResourceGroupTask) Name() string {
return DescribeResourceGroupTaskName
}
func (t *DescribeResourceGroupTask) Type() commonpb.MsgType {
return t.Base.MsgType
}
func (t *DescribeResourceGroupTask) BeginTs() Timestamp {
return t.Base.Timestamp
}
func (t *DescribeResourceGroupTask) EndTs() Timestamp {
return t.Base.Timestamp
}
func (t *DescribeResourceGroupTask) SetTs(ts Timestamp) {
t.Base.Timestamp = ts
}
func (t *DescribeResourceGroupTask) OnEnqueue() error {
t.Base = commonpbutil.NewMsgBase()
return nil
}
func (t *DescribeResourceGroupTask) PreExecute(ctx context.Context) error {
t.Base.MsgType = commonpb.MsgType_DescribeResourceGroup
t.Base.SourceID = paramtable.GetNodeID()
return nil
}
func (t *DescribeResourceGroupTask) Execute(ctx context.Context) error {
var err error
resp, err := t.queryCoord.DescribeResourceGroup(ctx, &querypb.DescribeResourceGroupRequest{
ResourceGroup: t.ResourceGroup,
})
rgInfo := resp.GetResourceGroup()
getCollectionNameFunc := func(value int32, key int64) string {
name, err := globalMetaCache.GetCollectionName(ctx, key)
if err != nil {
// unreachable logic path
return "unavailable_collection"
}
return name
}
loadReplicas := lo.MapKeys(rgInfo.NumLoadedReplica, getCollectionNameFunc)
outgoingNodes := lo.MapKeys(rgInfo.NumOutgoingNode, getCollectionNameFunc)
incomingNodes := lo.MapKeys(rgInfo.NumIncomingNode, getCollectionNameFunc)
t.result = &milvuspb.DescribeResourceGroupResponse{
Status: resp.Status,
ResourceGroup: &milvuspb.ResourceGroup{
Name: rgInfo.GetName(),
Capacity: rgInfo.GetCapacity(),
NumAvailableNode: rgInfo.NumAvailableNode,
NumLoadedReplica: loadReplicas,
NumOutgoingNode: outgoingNodes,
NumIncomingNode: incomingNodes,
},
}
return err
}
func (t *DescribeResourceGroupTask) PostExecute(ctx context.Context) error {
return nil
}
type TransferNodeTask struct {
Condition
*milvuspb.TransferNodeRequest
ctx context.Context
queryCoord types.QueryCoord
result *commonpb.Status
}
func (t *TransferNodeTask) TraceCtx() context.Context {
return t.ctx
}
func (t *TransferNodeTask) ID() UniqueID {
return t.Base.MsgID
}
func (t *TransferNodeTask) SetID(uid UniqueID) {
t.Base.MsgID = uid
}
func (t *TransferNodeTask) Name() string {
return TransferNodeTaskName
}
func (t *TransferNodeTask) Type() commonpb.MsgType {
return t.Base.MsgType
}
func (t *TransferNodeTask) BeginTs() Timestamp {
return t.Base.Timestamp
}
func (t *TransferNodeTask) EndTs() Timestamp {
return t.Base.Timestamp
}
func (t *TransferNodeTask) SetTs(ts Timestamp) {
t.Base.Timestamp = ts
}
func (t *TransferNodeTask) OnEnqueue() error {
t.Base = commonpbutil.NewMsgBase()
return nil
}
func (t *TransferNodeTask) PreExecute(ctx context.Context) error {
t.Base.MsgType = commonpb.MsgType_TransferNode
t.Base.SourceID = paramtable.GetNodeID()
return nil
}
func (t *TransferNodeTask) Execute(ctx context.Context) error {
var err error
t.result, err = t.queryCoord.TransferNode(ctx, t.TransferNodeRequest)
return err
}
func (t *TransferNodeTask) PostExecute(ctx context.Context) error {
return nil
}
type TransferReplicaTask struct {
Condition
*milvuspb.TransferReplicaRequest
ctx context.Context
queryCoord types.QueryCoord
result *commonpb.Status
}
func (t *TransferReplicaTask) TraceCtx() context.Context {
return t.ctx
}
func (t *TransferReplicaTask) ID() UniqueID {
return t.Base.MsgID
}
func (t *TransferReplicaTask) SetID(uid UniqueID) {
t.Base.MsgID = uid
}
func (t *TransferReplicaTask) Name() string {
return TransferReplicaTaskName
}
func (t *TransferReplicaTask) Type() commonpb.MsgType {
return t.Base.MsgType
}
func (t *TransferReplicaTask) BeginTs() Timestamp {
return t.Base.Timestamp
}
func (t *TransferReplicaTask) EndTs() Timestamp {
return t.Base.Timestamp
}
func (t *TransferReplicaTask) SetTs(ts Timestamp) {
t.Base.Timestamp = ts
}
func (t *TransferReplicaTask) OnEnqueue() error {
t.Base = commonpbutil.NewMsgBase()
return nil
}
func (t *TransferReplicaTask) PreExecute(ctx context.Context) error {
t.Base.MsgType = commonpb.MsgType_TransferReplica
t.Base.SourceID = paramtable.GetNodeID()
return nil
}
func (t *TransferReplicaTask) Execute(ctx context.Context) error {
var err error
collID, err := globalMetaCache.GetCollectionID(ctx, t.CollectionName)
if err != nil {
return err
}
t.result, err = t.queryCoord.TransferReplica(ctx, &querypb.TransferReplicaRequest{
SourceResourceGroup: t.SourceResourceGroup,
TargetResourceGroup: t.TargetResourceGroup,
CollectionID: collID,
NumReplica: t.NumReplica,
})
return err
}
func (t *TransferReplicaTask) PostExecute(ctx context.Context) error {
return nil
}
type ListResourceGroupsTask struct {
Condition
*milvuspb.ListResourceGroupsRequest
ctx context.Context
queryCoord types.QueryCoord
result *milvuspb.ListResourceGroupsResponse
}
func (t *ListResourceGroupsTask) TraceCtx() context.Context {
return t.ctx
}
func (t *ListResourceGroupsTask) ID() UniqueID {
return t.Base.MsgID
}
func (t *ListResourceGroupsTask) SetID(uid UniqueID) {
t.Base.MsgID = uid
}
func (t *ListResourceGroupsTask) Name() string {
return ListResourceGroupsTaskName
}
func (t *ListResourceGroupsTask) Type() commonpb.MsgType {
return t.Base.MsgType
}
func (t *ListResourceGroupsTask) BeginTs() Timestamp {
return t.Base.Timestamp
}
func (t *ListResourceGroupsTask) EndTs() Timestamp {
return t.Base.Timestamp
}
func (t *ListResourceGroupsTask) SetTs(ts Timestamp) {
t.Base.Timestamp = ts
}
func (t *ListResourceGroupsTask) OnEnqueue() error {
t.Base = commonpbutil.NewMsgBase()
return nil
}
func (t *ListResourceGroupsTask) PreExecute(ctx context.Context) error {
t.Base.MsgType = commonpb.MsgType_ListResourceGroups
t.Base.SourceID = paramtable.GetNodeID()
return nil
}
func (t *ListResourceGroupsTask) Execute(ctx context.Context) error {
var err error
t.result, err = t.queryCoord.ListResourceGroups(ctx, t.ListResourceGroupsRequest)
return err
}
func (t *ListResourceGroupsTask) PostExecute(ctx context.Context) error {
return nil
}

View File

@ -28,6 +28,7 @@ import (
"time"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/golang/protobuf/proto"
"github.com/milvus-io/milvus-proto/go-api/commonpb"
@ -2538,3 +2539,248 @@ func Test_loadPartitionTask_Execute(t *testing.T) {
assert.Error(t, err)
})
}
func TestCreateResourceGroupTask(t *testing.T) {
rc := NewRootCoordMock()
rc.Start()
defer rc.Stop()
qc := NewQueryCoordMock()
qc.Start()
defer qc.Stop()
ctx := context.Background()
mgr := newShardClientMgr()
InitMetaCache(ctx, rc, qc, mgr)
createRGReq := &milvuspb.CreateResourceGroupRequest{
Base: &commonpb.MsgBase{
MsgID: 1,
Timestamp: 2,
TargetID: 3,
},
ResourceGroup: "rg",
}
task := &CreateResourceGroupTask{
CreateResourceGroupRequest: createRGReq,
ctx: ctx,
queryCoord: qc,
}
task.PreExecute(ctx)
assert.Equal(t, commonpb.MsgType_CreateResourceGroup, task.Type())
assert.Equal(t, UniqueID(1), task.ID())
assert.Equal(t, Timestamp(2), task.BeginTs())
assert.Equal(t, Timestamp(2), task.EndTs())
assert.Equal(t, paramtable.GetNodeID(), task.Base.GetSourceID())
assert.Equal(t, UniqueID(3), task.Base.GetTargetID())
err := task.Execute(ctx)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, task.result.ErrorCode)
}
func TestDropResourceGroupTask(t *testing.T) {
rc := NewRootCoordMock()
rc.Start()
defer rc.Stop()
qc := NewQueryCoordMock()
qc.Start()
defer qc.Stop()
ctx := context.Background()
mgr := newShardClientMgr()
InitMetaCache(ctx, rc, qc, mgr)
dropRGReq := &milvuspb.DropResourceGroupRequest{
Base: &commonpb.MsgBase{
MsgID: 1,
Timestamp: 2,
TargetID: 3,
},
ResourceGroup: "rg",
}
task := &DropResourceGroupTask{
DropResourceGroupRequest: dropRGReq,
ctx: ctx,
queryCoord: qc,
}
task.PreExecute(ctx)
assert.Equal(t, commonpb.MsgType_DropResourceGroup, task.Type())
assert.Equal(t, UniqueID(1), task.ID())
assert.Equal(t, Timestamp(2), task.BeginTs())
assert.Equal(t, Timestamp(2), task.EndTs())
assert.Equal(t, paramtable.GetNodeID(), task.Base.GetSourceID())
assert.Equal(t, UniqueID(3), task.Base.GetTargetID())
err := task.Execute(ctx)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, task.result.ErrorCode)
}
func TestTransferNodeTask(t *testing.T) {
rc := NewRootCoordMock()
rc.Start()
defer rc.Stop()
qc := NewQueryCoordMock()
qc.Start()
defer qc.Stop()
ctx := context.Background()
mgr := newShardClientMgr()
InitMetaCache(ctx, rc, qc, mgr)
req := &milvuspb.TransferNodeRequest{
Base: &commonpb.MsgBase{
MsgID: 1,
Timestamp: 2,
TargetID: 3,
},
SourceResourceGroup: "rg1",
TargetResourceGroup: "rg2",
NumNode: 1,
}
task := &TransferNodeTask{
TransferNodeRequest: req,
ctx: ctx,
queryCoord: qc,
}
task.PreExecute(ctx)
assert.Equal(t, commonpb.MsgType_TransferNode, task.Type())
assert.Equal(t, UniqueID(1), task.ID())
assert.Equal(t, Timestamp(2), task.BeginTs())
assert.Equal(t, Timestamp(2), task.EndTs())
assert.Equal(t, paramtable.GetNodeID(), task.Base.GetSourceID())
assert.Equal(t, UniqueID(3), task.Base.GetTargetID())
err := task.Execute(ctx)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, task.result.ErrorCode)
}
func TestTransferReplicaTask(t *testing.T) {
rc := &MockRootCoordClientInterface{}
qc := NewQueryCoordMock()
qc.Start()
defer qc.Stop()
ctx := context.Background()
mgr := newShardClientMgr()
InitMetaCache(ctx, rc, qc, mgr)
// make it avoid remote call on rc
globalMetaCache.GetCollectionSchema(context.Background(), "collection1")
req := &milvuspb.TransferReplicaRequest{
Base: &commonpb.MsgBase{
MsgID: 1,
Timestamp: 2,
TargetID: 3,
},
CollectionName: "collection1",
SourceResourceGroup: "rg1",
TargetResourceGroup: "rg2",
NumReplica: 1,
}
task := &TransferReplicaTask{
TransferReplicaRequest: req,
ctx: ctx,
queryCoord: qc,
}
task.PreExecute(ctx)
assert.Equal(t, commonpb.MsgType_TransferReplica, task.Type())
assert.Equal(t, UniqueID(1), task.ID())
assert.Equal(t, Timestamp(2), task.BeginTs())
assert.Equal(t, Timestamp(2), task.EndTs())
assert.Equal(t, paramtable.GetNodeID(), task.Base.GetSourceID())
assert.Equal(t, UniqueID(3), task.Base.GetTargetID())
err := task.Execute(ctx)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, task.result.ErrorCode)
}
func TestListResourceGroupsTask(t *testing.T) {
rc := &MockRootCoordClientInterface{}
qc := NewQueryCoordMock()
qc.Start()
defer qc.Stop()
ctx := context.Background()
mgr := newShardClientMgr()
InitMetaCache(ctx, rc, qc, mgr)
req := &milvuspb.ListResourceGroupsRequest{
Base: &commonpb.MsgBase{
MsgID: 1,
Timestamp: 2,
TargetID: 3,
},
}
task := &ListResourceGroupsTask{
ListResourceGroupsRequest: req,
ctx: ctx,
queryCoord: qc,
}
task.PreExecute(ctx)
assert.Equal(t, commonpb.MsgType_ListResourceGroups, task.Type())
assert.Equal(t, UniqueID(1), task.ID())
assert.Equal(t, Timestamp(2), task.BeginTs())
assert.Equal(t, Timestamp(2), task.EndTs())
assert.Equal(t, paramtable.GetNodeID(), task.Base.GetSourceID())
assert.Equal(t, UniqueID(3), task.Base.GetTargetID())
err := task.Execute(ctx)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, task.result.Status.ErrorCode)
groups := task.result.GetResourceGroups()
assert.Contains(t, groups, meta.DefaultResourceGroupName)
assert.Contains(t, groups, "rg")
}
func TestDescribeResourceGroupTask(t *testing.T) {
rc := &MockRootCoordClientInterface{}
qc := NewQueryCoordMock()
qc.Start()
defer qc.Stop()
ctx := context.Background()
mgr := newShardClientMgr()
InitMetaCache(ctx, rc, qc, mgr)
// make it avoid remote call on rc
globalMetaCache.GetCollectionSchema(context.Background(), "collection1")
globalMetaCache.GetCollectionSchema(context.Background(), "collection2")
req := &milvuspb.DescribeResourceGroupRequest{
Base: &commonpb.MsgBase{
MsgID: 1,
Timestamp: 2,
TargetID: 3,
},
ResourceGroup: "rg",
}
task := &DescribeResourceGroupTask{
DescribeResourceGroupRequest: req,
ctx: ctx,
queryCoord: qc,
}
task.PreExecute(ctx)
assert.Equal(t, commonpb.MsgType_DescribeResourceGroup, task.Type())
assert.Equal(t, UniqueID(1), task.ID())
assert.Equal(t, Timestamp(2), task.BeginTs())
assert.Equal(t, Timestamp(2), task.EndTs())
assert.Equal(t, paramtable.GetNodeID(), task.Base.GetSourceID())
assert.Equal(t, UniqueID(3), task.Base.GetTargetID())
err := task.Execute(ctx)
assert.Nil(t, err)
assert.Equal(t, commonpb.ErrorCode_Success, task.result.Status.ErrorCode)
groupInfo := task.result.GetResourceGroup()
outgoingNodeNum := groupInfo.GetNumOutgoingNode()
incomingNodeNum := groupInfo.GetNumIncomingNode()
assert.NotNil(t, outgoingNodeNum["collection1"])
assert.NotNil(t, incomingNodeNum["collection2"])
}

View File

@ -104,7 +104,7 @@ func (b *RowCountBasedBalancer) Balance() ([]SegmentAssignPlan, []ChannelAssignP
}
func (b *RowCountBasedBalancer) balanceReplica(replica *meta.Replica) ([]SegmentAssignPlan, []ChannelAssignPlan) {
nodes := replica.Nodes.Collect()
nodes := replica.GetNodes()
if len(nodes) == 0 {
return nil, nil
}
@ -112,6 +112,8 @@ func (b *RowCountBasedBalancer) balanceReplica(replica *meta.Replica) ([]Segment
nodesSegments := make(map[int64][]*meta.Segment)
stoppingNodesSegments := make(map[int64][]*meta.Segment)
outboundNodes := b.meta.ResourceManager.CheckOutboundNodes(replica)
totalCnt := 0
for _, nid := range nodes {
segments := b.dist.SegmentDistManager.GetByCollectionAndNode(replica.GetCollectionID(), nid)
@ -125,6 +127,14 @@ func (b *RowCountBasedBalancer) balanceReplica(replica *meta.Replica) ([]Segment
continue
} else if isStopping {
stoppingNodesSegments[nid] = segments
} else if outboundNodes.Contain(nid) {
// if node is stop or transfer to other rg
log.RatedInfo(10, "meet outbound node, try to move out all segment/channel",
zap.Int64("collectionID", replica.GetCollectionID()),
zap.Int64("replicaID", replica.GetCollectionID()),
zap.Int64("node", nid),
)
stoppingNodesSegments[nid] = segments
} else {
nodesSegments[nid] = segments
}
@ -224,7 +234,7 @@ outer:
node.setPriority(node.getPriority() + int(s.GetNumOfRows()))
queue.push(node)
}
return plans, b.getChannelPlan(replica, stoppingNodesSegments)
return plans, b.getChannelPlan(replica, lo.Keys(nodesSegments), lo.Keys(stoppingNodesSegments))
}
func (b *RowCountBasedBalancer) handleStoppingNodes(replica *meta.Replica, nodeSegments map[int64][]*meta.Segment) ([]SegmentAssignPlan, []ChannelAssignPlan) {
@ -271,17 +281,11 @@ func (b *RowCountBasedBalancer) collectionStoppingSegments(stoppingNodesSegments
return segments, removeRowCnt
}
func (b *RowCountBasedBalancer) getChannelPlan(replica *meta.Replica, stoppingNodesSegments map[int64][]*meta.Segment) []ChannelAssignPlan {
// maybe it will have some strategies to balance the channel in the future
// but now, only balance the channel for the stopping nodes.
return b.getChannelPlanForStoppingNodes(replica, stoppingNodesSegments)
}
func (b *RowCountBasedBalancer) getChannelPlanForStoppingNodes(replica *meta.Replica, stoppingNodesSegments map[int64][]*meta.Segment) []ChannelAssignPlan {
func (b *RowCountBasedBalancer) getChannelPlan(replica *meta.Replica, onlineNodes []int64, offlineNodes []int64) []ChannelAssignPlan {
channelPlans := make([]ChannelAssignPlan, 0)
for nodeID := range stoppingNodesSegments {
for _, nodeID := range offlineNodes {
dmChannels := b.dist.ChannelDistManager.GetByCollectionAndNode(replica.GetCollectionID(), nodeID)
plans := b.AssignChannel(dmChannels, replica.Replica.GetNodes())
plans := b.AssignChannel(dmChannels, onlineNodes)
for i := range plans {
plans[i].From = nodeID
plans[i].ReplicaID = replica.ID

View File

@ -62,11 +62,11 @@ func (suite *RowCountBasedBalancerTestSuite) SetupTest() {
store := meta.NewMetaStore(suite.kv)
idAllocator := RandomIncrementIDAllocator()
testMeta := meta.NewMeta(idAllocator, store)
nodeManager := session.NewNodeManager()
testMeta := meta.NewMeta(idAllocator, store, nodeManager)
testTarget := meta.NewTargetManager(suite.broker, testMeta)
distManager := meta.NewDistributionManager()
nodeManager := session.NewNodeManager()
suite.mockScheduler = task.NewMockScheduler(suite.T())
suite.balancer = NewRowCountBasedBalancer(suite.mockScheduler, nodeManager, distManager, testMeta, testTarget)
}
@ -272,8 +272,10 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
for i := range c.nodes {
nodeInfo := session.NewNodeInfo(c.nodes[i], "127.0.0.1:0")
nodeInfo.UpdateStats(session.WithSegmentCnt(c.segmentCnts[i]))
nodeInfo.UpdateStats(session.WithChannelCnt(len(c.distributionChannels[c.nodes[i]])))
nodeInfo.SetState(c.states[i])
suite.balancer.nodeManager.Add(nodeInfo)
suite.balancer.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, c.nodes[i])
}
segmentPlans, channelPlans := balancer.Balance()
suite.ElementsMatch(c.expectChannelPlans, channelPlans)
@ -283,6 +285,111 @@ func (suite *RowCountBasedBalancerTestSuite) TestBalance() {
}
func (suite *RowCountBasedBalancerTestSuite) TestBalanceOutboundNodes() {
cases := []struct {
name string
nodes []int64
notExistedNodes []int64
segmentCnts []int
states []session.State
shouldMock bool
distributions map[int64][]*meta.Segment
distributionChannels map[int64][]*meta.DmChannel
expectPlans []SegmentAssignPlan
expectChannelPlans []ChannelAssignPlan
}{
{
name: "balance out bound nodes",
nodes: []int64{1, 2, 3},
segmentCnts: []int{1, 2, 2},
states: []session.State{session.NodeStateNormal, session.NodeStateNormal, session.NodeStateNormal},
shouldMock: true,
distributions: map[int64][]*meta.Segment{
1: {{SegmentInfo: &datapb.SegmentInfo{ID: 1, CollectionID: 1, NumOfRows: 10}, Node: 1}},
2: {
{SegmentInfo: &datapb.SegmentInfo{ID: 2, CollectionID: 1, NumOfRows: 20}, Node: 2},
{SegmentInfo: &datapb.SegmentInfo{ID: 3, CollectionID: 1, NumOfRows: 30}, Node: 2},
},
3: {
{SegmentInfo: &datapb.SegmentInfo{ID: 4, CollectionID: 1, NumOfRows: 10}, Node: 3},
{SegmentInfo: &datapb.SegmentInfo{ID: 5, CollectionID: 1, NumOfRows: 10}, Node: 3},
},
},
distributionChannels: map[int64][]*meta.DmChannel{
2: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v2"}, Node: 2},
},
3: {
{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 3},
},
},
expectPlans: []SegmentAssignPlan{
{Segment: &meta.Segment{SegmentInfo: &datapb.SegmentInfo{ID: 4, CollectionID: 1, NumOfRows: 10}, Node: 3}, From: 3, To: 1, ReplicaID: 1, Weight: weightHigh},
{Segment: &meta.Segment{SegmentInfo: &datapb.SegmentInfo{ID: 5, CollectionID: 1, NumOfRows: 10}, Node: 3}, From: 3, To: 1, ReplicaID: 1, Weight: weightHigh},
},
expectChannelPlans: []ChannelAssignPlan{
{Channel: &meta.DmChannel{VchannelInfo: &datapb.VchannelInfo{CollectionID: 1, ChannelName: "v3"}, Node: 3}, From: 3, To: 1, ReplicaID: 1, Weight: weightHigh},
},
},
}
suite.mockScheduler.Mock.On("GetNodeChannelDelta", mock.Anything).Return(0)
for _, c := range cases {
suite.Run(c.name, func() {
suite.SetupSuite()
defer suite.TearDownTest()
balancer := suite.balancer
collection := utils.CreateTestCollection(1, 1)
segments := []*datapb.SegmentBinlogs{
{
SegmentID: 1,
},
{
SegmentID: 2,
},
{
SegmentID: 3,
},
{
SegmentID: 4,
},
{
SegmentID: 5,
},
}
suite.broker.EXPECT().GetRecoveryInfo(mock.Anything, int64(1), int64(1)).Return(
nil, segments, nil)
balancer.targetMgr.UpdateCollectionNextTargetWithPartitions(int64(1), int64(1))
balancer.targetMgr.UpdateCollectionCurrentTarget(1, 1)
collection.LoadPercentage = 100
collection.Status = querypb.LoadStatus_Loaded
balancer.meta.CollectionManager.PutCollection(collection)
balancer.meta.ReplicaManager.Put(utils.CreateTestReplica(1, 1, append(c.nodes, c.notExistedNodes...)))
for node, s := range c.distributions {
balancer.dist.SegmentDistManager.Update(node, s...)
}
for node, v := range c.distributionChannels {
balancer.dist.ChannelDistManager.Update(node, v...)
}
for i := range c.nodes {
nodeInfo := session.NewNodeInfo(c.nodes[i], "127.0.0.1:0")
nodeInfo.UpdateStats(session.WithSegmentCnt(c.segmentCnts[i]))
nodeInfo.UpdateStats(session.WithChannelCnt(len(c.distributionChannels[c.nodes[i]])))
nodeInfo.SetState(c.states[i])
suite.balancer.nodeManager.Add(nodeInfo)
}
// make node-3 outbound
err := balancer.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, 1)
suite.NoError(err)
err = balancer.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, 2)
suite.NoError(err)
segmentPlans, channelPlans := balancer.Balance()
suite.ElementsMatch(c.expectChannelPlans, channelPlans)
suite.ElementsMatch(c.expectPlans, segmentPlans)
})
}
}
func (suite *RowCountBasedBalancerTestSuite) TestBalanceOnLoadingCollection() {
cases := []struct {
name string

View File

@ -27,6 +27,7 @@ import (
"github.com/milvus-io/milvus/internal/querycoordv2/task"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/samber/lo"
"go.uber.org/zap"
)
@ -134,7 +135,7 @@ func (c *ChannelChecker) getDmChannelDiff(targetMgr *meta.TargetManager,
func (c *ChannelChecker) getChannelDist(distMgr *meta.DistributionManager, replica *meta.Replica) []*meta.DmChannel {
dist := make([]*meta.DmChannel, 0)
for _, nodeID := range replica.Nodes.Collect() {
for _, nodeID := range replica.GetNodes() {
dist = append(dist, distMgr.ChannelDistManager.GetByCollectionAndNode(replica.GetCollectionID(), nodeID)...)
}
return dist
@ -170,7 +171,11 @@ func (c *ChannelChecker) findRepeatedChannels(distMgr *meta.DistributionManager,
}
func (c *ChannelChecker) createChannelLoadTask(ctx context.Context, channels []*meta.DmChannel, replica *meta.Replica) []task.Task {
plans := c.balancer.AssignChannel(channels, replica.Replica.GetNodes())
outboundNodes := c.meta.ResourceManager.CheckOutboundNodes(replica)
availableNodes := lo.Filter(replica.Replica.GetNodes(), func(node int64, _ int) bool {
return !outboundNodes.Contain(node)
})
plans := c.balancer.AssignChannel(channels, availableNodes)
for i := range plans {
plans[i].ReplicaID = replica.GetID()
}

View File

@ -28,6 +28,7 @@ import (
"github.com/milvus-io/milvus/internal/querycoordv2/balance"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/querycoordv2/task"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/internal/util/etcd"
@ -39,6 +40,8 @@ type ChannelCheckerTestSuite struct {
checker *ChannelChecker
meta *meta.Meta
broker *meta.MockBroker
nodeMgr *session.NodeManager
}
func (suite *ChannelCheckerTestSuite) SetupSuite() {
@ -62,7 +65,8 @@ func (suite *ChannelCheckerTestSuite) SetupTest() {
// meta
store := meta.NewMetaStore(suite.kv)
idAllocator := RandomIncrementIDAllocator()
suite.meta = meta.NewMeta(idAllocator, store)
suite.nodeMgr = session.NewNodeManager()
suite.meta = meta.NewMeta(idAllocator, store, suite.nodeMgr)
suite.broker = meta.NewMockBroker(suite.T())
targetManager := meta.NewTargetManager(suite.broker, suite.meta)
@ -98,6 +102,8 @@ func (suite *ChannelCheckerTestSuite) TestLoadChannel() {
checker := suite.checker
checker.meta.CollectionManager.PutCollection(utils.CreateTestCollection(1, 1))
checker.meta.ReplicaManager.Put(utils.CreateTestReplica(1, 1, []int64{1}))
suite.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
checker.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, 1)
channels := []*datapb.VchannelInfo{
{

View File

@ -29,6 +29,7 @@ import (
"github.com/milvus-io/milvus/internal/querycoordv2/task"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/samber/lo"
"go.uber.org/zap"
)
@ -144,7 +145,7 @@ func (c *SegmentChecker) getStreamingSegmentDiff(targetMgr *meta.TargetManager,
func (c *SegmentChecker) getStreamingSegmentsDist(distMgr *meta.DistributionManager, replica *meta.Replica) map[int64]*meta.Segment {
segments := make(map[int64]*meta.Segment, 0)
for _, node := range replica.Nodes.Collect() {
for _, node := range replica.GetNodes() {
segmentsOnNodes := distMgr.LeaderViewManager.GetGrowingSegmentDistByCollectionAndNode(replica.CollectionID, node)
for k, v := range segmentsOnNodes {
segments[k] = v
@ -196,7 +197,7 @@ func (c *SegmentChecker) getHistoricalSegmentDiff(targetMgr *meta.TargetManager,
func (c *SegmentChecker) getHistoricalSegmentsDist(distMgr *meta.DistributionManager, replica *meta.Replica) []*meta.Segment {
ret := make([]*meta.Segment, 0)
for _, node := range replica.Nodes.Collect() {
for _, node := range replica.GetNodes() {
ret = append(ret, distMgr.SegmentDistManager.GetByCollectionAndNode(replica.CollectionID, node)...)
}
return ret
@ -266,7 +267,11 @@ func (c *SegmentChecker) createSegmentLoadTasks(ctx context.Context, segments []
}
packedSegments = append(packedSegments, &meta.Segment{SegmentInfo: s})
}
plans := c.balancer.AssignSegment(packedSegments, replica.Replica.GetNodes())
outboundNodes := c.meta.ResourceManager.CheckOutboundNodes(replica)
availableNodes := lo.Filter(replica.Replica.GetNodes(), func(node int64, _ int) bool {
return !outboundNodes.Contain(node)
})
plans := c.balancer.AssignSegment(packedSegments, availableNodes)
for i := range plans {
plans[i].ReplicaID = replica.GetID()
}

View File

@ -30,6 +30,7 @@ import (
"github.com/milvus-io/milvus/internal/querycoordv2/balance"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/querycoordv2/task"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/internal/util/etcd"
@ -41,6 +42,7 @@ type SegmentCheckerTestSuite struct {
checker *SegmentChecker
meta *meta.Meta
broker *meta.MockBroker
nodeMgr *session.NodeManager
}
func (suite *SegmentCheckerTestSuite) SetupSuite() {
@ -64,7 +66,8 @@ func (suite *SegmentCheckerTestSuite) SetupTest() {
// meta
store := meta.NewMetaStore(suite.kv)
idAllocator := RandomIncrementIDAllocator()
suite.meta = meta.NewMeta(idAllocator, store)
suite.nodeMgr = session.NewNodeManager()
suite.meta = meta.NewMeta(idAllocator, store, suite.nodeMgr)
distManager := meta.NewDistributionManager()
suite.broker = meta.NewMockBroker(suite.T())
targetManager := meta.NewTargetManager(suite.broker, suite.meta)
@ -100,6 +103,10 @@ func (suite *SegmentCheckerTestSuite) TestLoadSegments() {
// set meta
checker.meta.CollectionManager.PutCollection(utils.CreateTestCollection(1, 1))
checker.meta.ReplicaManager.Put(utils.CreateTestReplica(1, 1, []int64{1, 2}))
suite.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
suite.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
checker.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, 1)
checker.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, 2)
// set target
segments := []*datapb.SegmentBinlogs{

View File

@ -65,7 +65,7 @@ func (suite *DistControllerTestSuite) SetupTest() {
// meta
store := meta.NewMetaStore(suite.kv)
idAllocator := RandomIncrementIDAllocator()
suite.meta = meta.NewMeta(idAllocator, store)
suite.meta = meta.NewMeta(idAllocator, store, session.NewNodeManager())
suite.mockCluster = session.NewMockCluster(suite.T())
nodeManager := session.NewNodeManager()

View File

@ -48,7 +48,7 @@ import (
func (s *Server) checkAnyReplicaAvailable(collectionID int64) bool {
for _, replica := range s.meta.ReplicaManager.GetByCollection(collectionID) {
isAvailable := true
for node := range replica.Nodes {
for _, node := range replica.GetNodes() {
if s.nodeMgr.Get(node) == nil {
isAvailable = false
break
@ -94,7 +94,11 @@ func (s *Server) balanceSegments(ctx context.Context, req *querypb.LoadBalanceRe
srcNode := req.GetSourceNodeIDs()[0]
dstNodeSet := typeutil.NewUniqueSet(req.GetDstNodeIDs()...)
if dstNodeSet.Len() == 0 {
dstNodeSet.Insert(replica.GetNodes()...)
outboundNodes := s.meta.ResourceManager.CheckOutboundNodes(replica)
availableNodes := lo.Filter(replica.Replica.GetNodes(), func(node int64, _ int) bool {
return !outboundNodes.Contain(node)
})
dstNodeSet.Insert(availableNodes...)
}
dstNodeSet.Remove(srcNode)
@ -302,7 +306,13 @@ func (s *Server) tryGetNodesMetrics(ctx context.Context, req *milvuspb.GetMetric
}
func (s *Server) fillReplicaInfo(replica *meta.Replica, withShardNodes bool) (*milvuspb.ReplicaInfo, error) {
info := utils.Replica2ReplicaInfo(replica.Replica)
info := &milvuspb.ReplicaInfo{
ReplicaID: replica.GetID(),
CollectionID: replica.GetCollectionID(),
NodeIds: replica.GetNodes(),
ResourceGroupName: replica.GetResourceGroup(),
NumOutboundNode: s.meta.GetOutgoingNodeNumByReplica(replica),
}
channels := s.targetMgr.GetDmChannelsByCollection(replica.GetCollectionID(), meta.CurrentTarget)
if len(channels) == 0 {
@ -335,7 +345,7 @@ func (s *Server) fillReplicaInfo(replica *meta.Replica, withShardNodes bool) (*m
}
if withShardNodes {
shardNodes := lo.FilterMap(segments, func(segment *meta.Segment, _ int) (int64, bool) {
if replica.Nodes.Contain(segment.Node) {
if replica.Contains(segment.Node) {
return segment.Node, true
}
return 0, false

View File

@ -197,10 +197,11 @@ func (job *LoadCollectionJob) Execute() error {
}
// Create replicas
replicas, err := utils.SpawnReplicas(job.meta.ReplicaManager,
job.nodeMgr,
replicas, err := utils.SpawnReplicasWithRG(job.meta,
req.GetCollectionID(),
req.GetReplicaNumber())
req.GetResourceGroups(),
req.GetReplicaNumber(),
)
if err != nil {
msg := "failed to spawn replica for collection"
log.Error(msg, zap.Error(err))
@ -209,7 +210,8 @@ func (job *LoadCollectionJob) Execute() error {
for _, replica := range replicas {
log.Info("replica created",
zap.Int64("replicaID", replica.GetID()),
zap.Int64s("nodes", replica.GetNodes()))
zap.Int64s("nodes", replica.GetNodes()),
zap.String("resourceGroup", replica.GetResourceGroup()))
}
// Fetch channels and segments from DataCoord
@ -411,10 +413,11 @@ func (job *LoadPartitionJob) Execute() error {
}
// Create replicas
replicas, err := utils.SpawnReplicas(job.meta.ReplicaManager,
job.nodeMgr,
replicas, err := utils.SpawnReplicasWithRG(job.meta,
req.GetCollectionID(),
req.GetReplicaNumber())
req.GetResourceGroups(),
req.GetReplicaNumber(),
)
if err != nil {
msg := "failed to spawn replica for collection"
log.Error(msg, zap.Error(err))
@ -423,7 +426,8 @@ func (job *LoadPartitionJob) Execute() error {
for _, replica := range replicas {
log.Info("replica created",
zap.Int64("replicaID", replica.GetID()),
zap.Int64s("nodes", replica.GetNodes()))
zap.Int64s("nodes", replica.GetNodes()),
zap.String("resourceGroup", replica.GetResourceGroup()))
}
// It's safe here to call UpdateCollectionNextTargetWithPartitions, as the collection not existing

View File

@ -131,19 +131,29 @@ func (suite *JobSuite) SetupTest() {
suite.store = meta.NewMetaStore(suite.kv)
suite.dist = meta.NewDistributionManager()
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), suite.store)
suite.nodeMgr = session.NewNodeManager()
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), suite.store, suite.nodeMgr)
suite.targetMgr = meta.NewTargetManager(suite.broker, suite.meta)
suite.targetObserver = observers.NewTargetObserver(suite.meta,
suite.targetMgr,
suite.dist,
suite.broker,
)
suite.nodeMgr = session.NewNodeManager()
suite.nodeMgr.Add(&session.NodeInfo{})
suite.scheduler = NewScheduler()
suite.scheduler.Start(context.Background())
meta.GlobalFailedLoadCache = meta.NewFailedLoadCache()
suite.nodeMgr.Add(session.NewNodeInfo(1000, "localhost"))
suite.nodeMgr.Add(session.NewNodeInfo(2000, "localhost"))
suite.nodeMgr.Add(session.NewNodeInfo(3000, "localhost"))
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 1000)
suite.NoError(err)
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 2000)
suite.NoError(err)
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 3000)
suite.NoError(err)
}
func (suite *JobSuite) TearDownTest() {
@ -265,6 +275,48 @@ func (suite *JobSuite) TestLoadCollection() {
err := job.Wait()
suite.ErrorIs(err, ErrLoadParameterMismatched)
}
suite.meta.ResourceManager.AddResourceGroup("rg1")
suite.meta.ResourceManager.AddResourceGroup("rg2")
suite.meta.ResourceManager.AddResourceGroup("rg3")
// Load with 3 replica on 1 rg
req := &querypb.LoadCollectionRequest{
CollectionID: 1001,
ReplicaNumber: 3,
ResourceGroups: []string{"rg1"},
}
job := NewLoadCollectionJob(
ctx,
req,
suite.dist,
suite.meta,
suite.targetMgr,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
err := job.Wait()
suite.ErrorContains(err, meta.ErrNodeNotEnough.Error())
// Load with 3 replica on 3 rg
req = &querypb.LoadCollectionRequest{
CollectionID: 1002,
ReplicaNumber: 3,
ResourceGroups: []string{"rg1", "rg2", "rg3"},
}
job = NewLoadCollectionJob(
ctx,
req,
suite.dist,
suite.meta,
suite.targetMgr,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
err = job.Wait()
suite.ErrorContains(err, meta.ErrNodeNotEnough.Error())
}
func (suite *JobSuite) TestLoadCollectionWithReplicas() {
@ -278,7 +330,7 @@ func (suite *JobSuite) TestLoadCollectionWithReplicas() {
// Load with 3 replica
req := &querypb.LoadCollectionRequest{
CollectionID: collection,
ReplicaNumber: 3,
ReplicaNumber: 5,
}
job := NewLoadCollectionJob(
ctx,
@ -482,6 +534,50 @@ func (suite *JobSuite) TestLoadPartition() {
err := job.Wait()
suite.ErrorIs(err, ErrLoadParameterMismatched)
}
suite.meta.ResourceManager.AddResourceGroup("rg1")
suite.meta.ResourceManager.AddResourceGroup("rg2")
suite.meta.ResourceManager.AddResourceGroup("rg3")
// test load 3 replica in 1 rg, should pass rg check
req := &querypb.LoadPartitionsRequest{
CollectionID: 100,
PartitionIDs: []int64{1001},
ReplicaNumber: 3,
ResourceGroups: []string{"rg1"},
}
job := NewLoadPartitionJob(
ctx,
req,
suite.dist,
suite.meta,
suite.targetMgr,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
err := job.Wait()
suite.Contains(err.Error(), meta.ErrNodeNotEnough.Error())
// test load 3 replica in 3 rg, should pass rg check
req = &querypb.LoadPartitionsRequest{
CollectionID: 102,
PartitionIDs: []int64{1001},
ReplicaNumber: 3,
ResourceGroups: []string{"rg1", "rg2", "rg3"},
}
job = NewLoadPartitionJob(
ctx,
req,
suite.dist,
suite.meta,
suite.targetMgr,
suite.broker,
suite.nodeMgr,
)
suite.scheduler.Add(job)
err = job.Wait()
suite.Contains(err.Error(), meta.ErrNodeNotEnough.Error())
}
func (suite *JobSuite) TestLoadPartitionWithReplicas() {
@ -496,7 +592,7 @@ func (suite *JobSuite) TestLoadPartitionWithReplicas() {
req := &querypb.LoadPartitionsRequest{
CollectionID: collection,
PartitionIDs: suite.partitions[collection],
ReplicaNumber: 3,
ReplicaNumber: 5,
}
job := NewLoadPartitionJob(
ctx,
@ -707,7 +803,16 @@ func (suite *JobSuite) TestReleasePartition() {
func (suite *JobSuite) TestLoadCollectionStoreFailed() {
// Store collection failed
store := meta.NewMockStore(suite.T())
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), store)
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), store, suite.nodeMgr)
store.EXPECT().SaveResourceGroup(mock.Anything, mock.Anything).Return(nil)
err := suite.meta.AssignNode(meta.DefaultResourceGroupName, 1000)
suite.NoError(err)
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 2000)
suite.NoError(err)
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 3000)
suite.NoError(err)
for _, collection := range suite.collections {
if suite.loadTypes[collection] != querypb.LoadType_LoadCollection {
continue
@ -743,8 +848,17 @@ func (suite *JobSuite) TestLoadCollectionStoreFailed() {
func (suite *JobSuite) TestLoadPartitionStoreFailed() {
// Store partition failed
store := meta.NewMockStore(suite.T())
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), store)
err := errors.New("failed to store collection")
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), store, suite.nodeMgr)
store.EXPECT().SaveResourceGroup(mock.Anything, mock.Anything).Return(nil)
err := suite.meta.AssignNode(meta.DefaultResourceGroupName, 1000)
suite.NoError(err)
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 2000)
suite.NoError(err)
err = suite.meta.AssignNode(meta.DefaultResourceGroupName, 3000)
suite.NoError(err)
err = errors.New("failed to store collection")
for _, collection := range suite.collections {
if suite.loadTypes[collection] != querypb.LoadType_LoadPartition {
continue
@ -775,7 +889,7 @@ func (suite *JobSuite) TestLoadPartitionStoreFailed() {
func (suite *JobSuite) TestLoadCreateReplicaFailed() {
// Store replica failed
suite.meta = meta.NewMeta(ErrorIDAllocator(), suite.store)
suite.meta = meta.NewMeta(ErrorIDAllocator(), suite.store, session.NewNodeManager())
for _, collection := range suite.collections {
req := &querypb.LoadCollectionRequest{
CollectionID: collection,

View File

@ -91,7 +91,7 @@ func (m *ChannelDistManager) GetShardLeader(replica *Replica, shard string) (int
m.rwmutex.RLock()
defer m.rwmutex.RUnlock()
for node := range replica.Nodes {
for _, node := range replica.GetNodes() {
channels := m.channels[node]
for _, dmc := range channels {
if dmc.ChannelName == shard {
@ -108,7 +108,7 @@ func (m *ChannelDistManager) GetShardLeadersByReplica(replica *Replica) map[stri
defer m.rwmutex.RUnlock()
ret := make(map[string]int64)
for node := range replica.Nodes {
for _, node := range replica.GetNodes() {
channels := m.channels[node]
for _, dmc := range channels {
if dmc.GetCollectionID() == replica.GetCollectionID() {

View File

@ -100,18 +100,18 @@ func (suite *ChannelDistManagerSuite) TestGetBy() {
func (suite *ChannelDistManagerSuite) TestGetShardLeader() {
replicas := []*Replica{
{
Replica: &querypb.Replica{
NewReplica(
&querypb.Replica{
CollectionID: suite.collection,
},
Nodes: typeutil.NewUniqueSet(suite.nodes[0], suite.nodes[2]),
},
{
Replica: &querypb.Replica{
typeutil.NewUniqueSet(suite.nodes[0], suite.nodes[2]),
),
NewReplica(
&querypb.Replica{
CollectionID: suite.collection,
},
Nodes: typeutil.NewUniqueSet(suite.nodes[1]),
},
typeutil.NewUniqueSet(suite.nodes[1]),
),
}
// Test on replica 0

View File

@ -16,17 +16,22 @@
package meta
import "github.com/milvus-io/milvus/internal/querycoordv2/session"
type Meta struct {
*CollectionManager
*ReplicaManager
*ResourceManager
}
func NewMeta(
idAllocator func() (int64, error),
store Store,
nodeMgr *session.NodeManager,
) *Meta {
return &Meta{
NewCollectionManager(store),
NewReplicaManager(idAllocator, store),
NewResourceManager(store, nodeMgr),
}
}

View File

@ -155,6 +155,51 @@ func (_c *MockStore_GetReplicas_Call) Return(_a0 []*querypb.Replica, _a1 error)
return _c
}
// GetResourceGroups provides a mock function with given fields:
func (_m *MockStore) GetResourceGroups() ([]*querypb.ResourceGroup, error) {
ret := _m.Called()
var r0 []*querypb.ResourceGroup
if rf, ok := ret.Get(0).(func() []*querypb.ResourceGroup); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*querypb.ResourceGroup)
}
}
var r1 error
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MockStore_GetResourceGroups_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetResourceGroups'
type MockStore_GetResourceGroups_Call struct {
*mock.Call
}
// GetResourceGroups is a helper method to define mock.On call
func (_e *MockStore_Expecter) GetResourceGroups() *MockStore_GetResourceGroups_Call {
return &MockStore_GetResourceGroups_Call{Call: _e.mock.On("GetResourceGroups")}
}
func (_c *MockStore_GetResourceGroups_Call) Run(run func()) *MockStore_GetResourceGroups_Call {
_c.Call.Run(func(args mock.Arguments) {
run()
})
return _c
}
func (_c *MockStore_GetResourceGroups_Call) Return(_a0 []*querypb.ResourceGroup, _a1 error) *MockStore_GetResourceGroups_Call {
_c.Call.Return(_a0, _a1)
return _c
}
// ReleaseCollection provides a mock function with given fields: id
func (_m *MockStore) ReleaseCollection(id int64) error {
ret := _m.Called(id)
@ -319,6 +364,43 @@ func (_c *MockStore_ReleaseReplicas_Call) Return(_a0 error) *MockStore_ReleaseRe
return _c
}
// RemoveResourceGroup provides a mock function with given fields: rgName
func (_m *MockStore) RemoveResourceGroup(rgName string) error {
ret := _m.Called(rgName)
var r0 error
if rf, ok := ret.Get(0).(func(string) error); ok {
r0 = rf(rgName)
} else {
r0 = ret.Error(0)
}
return r0
}
// MockStore_RemoveResourceGroup_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'RemoveResourceGroup'
type MockStore_RemoveResourceGroup_Call struct {
*mock.Call
}
// RemoveResourceGroup is a helper method to define mock.On call
// - rgName string
func (_e *MockStore_Expecter) RemoveResourceGroup(rgName interface{}) *MockStore_RemoveResourceGroup_Call {
return &MockStore_RemoveResourceGroup_Call{Call: _e.mock.On("RemoveResourceGroup", rgName)}
}
func (_c *MockStore_RemoveResourceGroup_Call) Run(run func(rgName string)) *MockStore_RemoveResourceGroup_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(string))
})
return _c
}
func (_c *MockStore_RemoveResourceGroup_Call) Return(_a0 error) *MockStore_RemoveResourceGroup_Call {
_c.Call.Return(_a0)
return _c
}
// SaveCollection provides a mock function with given fields: info
func (_m *MockStore) SaveCollection(info *querypb.CollectionLoadInfo) error {
ret := _m.Called(info)
@ -443,6 +525,56 @@ func (_c *MockStore_SaveReplica_Call) Return(_a0 error) *MockStore_SaveReplica_C
return _c
}
// SaveResourceGroup provides a mock function with given fields: rgs
func (_m *MockStore) SaveResourceGroup(rgs ...*querypb.ResourceGroup) error {
_va := make([]interface{}, len(rgs))
for _i := range rgs {
_va[_i] = rgs[_i]
}
var _ca []interface{}
_ca = append(_ca, _va...)
ret := _m.Called(_ca...)
var r0 error
if rf, ok := ret.Get(0).(func(...*querypb.ResourceGroup) error); ok {
r0 = rf(rgs...)
} else {
r0 = ret.Error(0)
}
return r0
}
// MockStore_SaveResourceGroup_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SaveResourceGroup'
type MockStore_SaveResourceGroup_Call struct {
*mock.Call
}
// SaveResourceGroup is a helper method to define mock.On call
// - rgs ...*querypb.ResourceGroup
func (_e *MockStore_Expecter) SaveResourceGroup(rgs ...interface{}) *MockStore_SaveResourceGroup_Call {
return &MockStore_SaveResourceGroup_Call{Call: _e.mock.On("SaveResourceGroup",
append([]interface{}{}, rgs...)...)}
}
func (_c *MockStore_SaveResourceGroup_Call) Run(run func(rgs ...*querypb.ResourceGroup)) *MockStore_SaveResourceGroup_Call {
_c.Call.Run(func(args mock.Arguments) {
variadicArgs := make([]*querypb.ResourceGroup, len(args)-0)
for i, a := range args[0:] {
if a != nil {
variadicArgs[i] = a.(*querypb.ResourceGroup)
}
}
run(variadicArgs...)
})
return _c
}
func (_c *MockStore_SaveResourceGroup_Call) Return(_a0 error) *MockStore_SaveResourceGroup_Call {
_c.Call.Return(_a0)
return _c
}
type mockConstructorTestingTNewMockStore interface {
mock.TestingT
Cleanup(func())

View File

@ -30,23 +30,66 @@ import (
type Replica struct {
*querypb.Replica
Nodes UniqueSet // a helper field for manipulating replica's Nodes slice field
nodes UniqueSet // a helper field for manipulating replica's Nodes slice field
rwmutex sync.RWMutex
}
func NewReplica(replica *querypb.Replica, nodes UniqueSet) *Replica {
return &Replica{
Replica: replica,
nodes: nodes,
}
}
func (replica *Replica) AddNode(nodes ...int64) {
replica.Nodes.Insert(nodes...)
replica.Replica.Nodes = replica.Nodes.Collect()
replica.rwmutex.Lock()
defer replica.rwmutex.Unlock()
replica.nodes.Insert(nodes...)
replica.Replica.Nodes = replica.nodes.Collect()
}
func (replica *Replica) GetNodes() []int64 {
replica.rwmutex.RLock()
defer replica.rwmutex.RUnlock()
if replica != nil {
return replica.nodes.Collect()
}
return nil
}
func (replica *Replica) Len() int {
replica.rwmutex.RLock()
defer replica.rwmutex.RUnlock()
if replica != nil {
return replica.nodes.Len()
}
return 0
}
func (replica *Replica) Contains(node int64) bool {
replica.rwmutex.RLock()
defer replica.rwmutex.RUnlock()
if replica != nil {
return replica.nodes.Contain(node)
}
return false
}
func (replica *Replica) RemoveNode(nodes ...int64) {
replica.Nodes.Remove(nodes...)
replica.Replica.Nodes = replica.Nodes.Collect()
replica.rwmutex.Lock()
defer replica.rwmutex.Unlock()
replica.nodes.Remove(nodes...)
replica.Replica.Nodes = replica.nodes.Collect()
}
func (replica *Replica) Clone() *Replica {
replica.rwmutex.RLock()
defer replica.rwmutex.RUnlock()
return &Replica{
Replica: proto.Clone(replica.Replica).(*querypb.Replica),
Nodes: NewUniqueSet(replica.Replica.Nodes...),
nodes: NewUniqueSet(replica.Replica.Nodes...),
}
}
@ -75,10 +118,14 @@ func (m *ReplicaManager) Recover(collections []int64) error {
collectionSet := typeutil.NewUniqueSet(collections...)
for _, replica := range replicas {
if len(replica.GetResourceGroup()) == 0 {
replica.ResourceGroup = DefaultResourceGroupName
}
if collectionSet.Contain(replica.GetCollectionID()) {
m.replicas[replica.GetID()] = &Replica{
Replica: replica,
Nodes: NewUniqueSet(replica.GetNodes()...),
nodes: NewUniqueSet(replica.GetNodes()...),
}
log.Info("recover replica",
zap.Int64("collectionID", replica.GetCollectionID()),
@ -109,13 +156,13 @@ func (m *ReplicaManager) Get(id UniqueID) *Replica {
// Spawn spawns replicas of the given number, for given collection,
// this doesn't store these replicas and assign nodes to them.
func (m *ReplicaManager) Spawn(collection int64, replicaNumber int32) ([]*Replica, error) {
func (m *ReplicaManager) Spawn(collection int64, replicaNumber int32, rgName string) ([]*Replica, error) {
var (
replicas = make([]*Replica, replicaNumber)
err error
)
for i := range replicas {
replicas[i], err = m.spawn(collection)
replicas[i], err = m.spawn(collection, rgName)
if err != nil {
return nil, err
}
@ -130,17 +177,18 @@ func (m *ReplicaManager) Put(replicas ...*Replica) error {
return m.put(replicas...)
}
func (m *ReplicaManager) spawn(collectionID UniqueID) (*Replica, error) {
func (m *ReplicaManager) spawn(collectionID UniqueID, rgName string) (*Replica, error) {
id, err := m.idAllocator()
if err != nil {
return nil, err
}
return &Replica{
Replica: &querypb.Replica{
ID: id,
CollectionID: collectionID,
ID: id,
CollectionID: collectionID,
ResourceGroup: rgName,
},
Nodes: make(UniqueSet),
nodes: make(UniqueSet),
}, nil
}
@ -192,7 +240,7 @@ func (m *ReplicaManager) GetByCollectionAndNode(collectionID, nodeID UniqueID) *
defer m.rwmutex.RUnlock()
for _, replica := range m.replicas {
if replica.CollectionID == collectionID && replica.Nodes.Contain(nodeID) {
if replica.CollectionID == collectionID && replica.nodes.Contain(nodeID) {
return replica
}
}
@ -200,6 +248,34 @@ func (m *ReplicaManager) GetByCollectionAndNode(collectionID, nodeID UniqueID) *
return nil
}
func (m *ReplicaManager) GetByCollectionAndRG(collectionID int64, rgName string) []*Replica {
m.rwmutex.RLock()
defer m.rwmutex.RUnlock()
ret := make([]*Replica, 0)
for _, replica := range m.replicas {
if replica.GetCollectionID() == collectionID && replica.GetResourceGroup() == rgName {
ret = append(ret, replica)
}
}
return ret
}
func (m *ReplicaManager) GetByResourceGroup(rgName string) []*Replica {
m.rwmutex.RLock()
defer m.rwmutex.RUnlock()
ret := make([]*Replica, 0)
for _, replica := range m.replicas {
if replica.GetResourceGroup() == rgName {
ret = append(ret, replica)
}
}
return ret
}
func (m *ReplicaManager) AddNode(replicaID UniqueID, nodes ...UniqueID) error {
m.rwmutex.Lock()
defer m.rwmutex.Unlock()
@ -227,3 +303,17 @@ func (m *ReplicaManager) RemoveNode(replicaID UniqueID, nodes ...UniqueID) error
replica.RemoveNode(nodes...)
return m.put(replica)
}
func (m *ReplicaManager) GetResourceGroupByCollection(collection UniqueID) typeutil.Set[string] {
m.rwmutex.Lock()
defer m.rwmutex.Unlock()
ret := typeutil.NewSet[string]()
for _, r := range m.replicas {
if r.GetCollectionID() == collection {
ret.Insert(r.GetResourceGroup())
}
}
return ret
}

View File

@ -76,14 +76,14 @@ func (suite *ReplicaManagerSuite) TestSpawn() {
mgr := suite.mgr
for i, collection := range suite.collections {
replicas, err := mgr.Spawn(collection, suite.replicaNumbers[i])
replicas, err := mgr.Spawn(collection, suite.replicaNumbers[i], DefaultResourceGroupName)
suite.NoError(err)
suite.Len(replicas, int(suite.replicaNumbers[i]))
}
mgr.idAllocator = ErrorIDAllocator()
for i, collection := range suite.collections {
_, err := mgr.Spawn(collection, suite.replicaNumbers[i])
_, err := mgr.Spawn(collection, suite.replicaNumbers[i], DefaultResourceGroupName)
suite.Error(err)
}
}
@ -98,8 +98,8 @@ func (suite *ReplicaManagerSuite) TestGet() {
for _, replica := range replicas {
suite.Equal(collection, replica.GetCollectionID())
suite.Equal(replica, mgr.Get(replica.GetID()))
suite.Equal(replica.Replica.Nodes, replica.Nodes.Collect())
replicaNodes[replica.GetID()] = replica.Replica.Nodes
suite.Equal(replica.Replica.GetNodes(), replica.GetNodes())
replicaNodes[replica.GetID()] = replica.Replica.GetNodes()
nodes = append(nodes, replica.Replica.Nodes...)
}
suite.Len(nodes, int(suite.replicaNumbers[i]))
@ -137,9 +137,9 @@ func (suite *ReplicaManagerSuite) TestRecover() {
suite.NotNil(replica)
suite.EqualValues(1000, replica.CollectionID)
suite.EqualValues([]int64{1, 2, 3}, replica.Replica.Nodes)
suite.Len(replica.Nodes, len(replica.Replica.GetNodes()))
suite.Len(replica.GetNodes(), len(replica.Replica.GetNodes()))
for _, node := range replica.Replica.GetNodes() {
suite.True(replica.Nodes.Contain(node))
suite.True(replica.Contains(node))
}
}
@ -175,7 +175,7 @@ func (suite *ReplicaManagerSuite) TestNodeManipulate() {
suite.NoError(err)
replica = mgr.GetByCollectionAndNode(collection, newNode)
suite.Contains(replica.Nodes, newNode)
suite.Contains(replica.GetNodes(), newNode)
suite.Contains(replica.Replica.GetNodes(), newNode)
err = mgr.RemoveNode(replica.GetID(), firstNode)
@ -192,7 +192,7 @@ func (suite *ReplicaManagerSuite) TestNodeManipulate() {
suite.Nil(replica)
replica = mgr.GetByCollectionAndNode(collection, newNode)
suite.Contains(replica.Nodes, newNode)
suite.Contains(replica.GetNodes(), newNode)
suite.Contains(replica.Replica.GetNodes(), newNode)
}
}
@ -201,7 +201,7 @@ func (suite *ReplicaManagerSuite) spawnAndPutAll() {
mgr := suite.mgr
for i, collection := range suite.collections {
replicas, err := mgr.Spawn(collection, suite.replicaNumbers[i])
replicas, err := mgr.Spawn(collection, suite.replicaNumbers[i], DefaultResourceGroupName)
suite.NoError(err)
suite.Len(replicas, int(suite.replicaNumbers[i]))
for j, replica := range replicas {
@ -212,6 +212,27 @@ func (suite *ReplicaManagerSuite) spawnAndPutAll() {
}
}
func (suite *ReplicaManagerSuite) TestResourceGroup() {
mgr := NewReplicaManager(suite.idAllocator, suite.store)
replica1, err := mgr.spawn(int64(1000), DefaultResourceGroupName)
replica1.AddNode(1)
suite.NoError(err)
mgr.Put(replica1)
replica2, err := mgr.spawn(int64(2000), DefaultResourceGroupName)
replica2.AddNode(1)
suite.NoError(err)
mgr.Put(replica2)
replicas := mgr.GetByResourceGroup(DefaultResourceGroupName)
suite.Len(replicas, 2)
replicas = mgr.GetByCollectionAndRG(int64(1000), DefaultResourceGroupName)
suite.Len(replicas, 1)
rgNames := mgr.GetResourceGroupByCollection(int64(1000))
suite.Len(rgNames, 1)
suite.True(rgNames.Contain(DefaultResourceGroupName))
}
func (suite *ReplicaManagerSuite) clearMemory() {
suite.mgr.replicas = make(map[int64]*Replica)
}

View File

@ -0,0 +1,632 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package meta
import (
"errors"
"sync"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/util/typeutil"
. "github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/samber/lo"
"go.uber.org/zap"
)
var (
ErrNodeAlreadyAssign = errors.New("node already assign to other resource group")
ErrRGIsFull = errors.New("resource group is full")
ErrRGIsEmpty = errors.New("resource group is empty")
ErrRGNotExist = errors.New("resource group doesn't exist")
ErrRGAlreadyExist = errors.New("resource group already exist")
ErrRGAssignNodeFailed = errors.New("failed to assign node to resource group")
ErrRGUnAssignNodeFailed = errors.New("failed to unassign node from resource group")
ErrSaveResourceGroupToStore = errors.New("failed to save resource group to store")
ErrRemoveResourceGroupFromStore = errors.New("failed to remove resource group from store")
ErrRecoverResourceGroupToStore = errors.New("failed to recover resource group to store")
ErrNodeNotAssignToRG = errors.New("node hasn't been assign to any resource group")
ErrRGNameIsEmpty = errors.New("resource group name couldn't be empty")
ErrDeleteDefaultRG = errors.New("delete default rg is not permitted")
ErrDeleteNonEmptyRG = errors.New("delete non-empty rg is not permitted")
ErrNodeNotExist = errors.New("node does not exist")
ErrNodeStopped = errors.New("node has been stopped")
ErrRGLimit = errors.New("resource group num reach limit 1024")
ErrNodeNotEnough = errors.New("nodes not enough")
)
var DefaultResourceGroupName = "__default_resource_group"
type ResourceGroup struct {
nodes UniqueSet
capacity int
}
func NewResourceGroup(capacity int) *ResourceGroup {
rg := &ResourceGroup{
nodes: typeutil.NewUniqueSet(),
capacity: capacity,
}
return rg
}
// assign node to resource group
func (rg *ResourceGroup) assignNode(id int64) error {
if rg.containsNode(id) {
return ErrNodeAlreadyAssign
}
rg.nodes.Insert(id)
rg.capacity++
return nil
}
// unassign node from resource group
func (rg *ResourceGroup) unassignNode(id int64) error {
if !rg.containsNode(id) {
// remove non exist node should be tolerable
return nil
}
rg.nodes.Remove(id)
rg.capacity--
return nil
}
func (rg *ResourceGroup) handleNodeUp(id int64) error {
if rg.LackOfNodes() == 0 {
return ErrRGIsFull
}
if rg.containsNode(id) {
return ErrNodeAlreadyAssign
}
rg.nodes.Insert(id)
return nil
}
func (rg *ResourceGroup) handleNodeDown(id int64) error {
if !rg.containsNode(id) {
// remove non exist node should be tolerable
return nil
}
rg.nodes.Remove(id)
return nil
}
func (rg *ResourceGroup) LackOfNodes() int {
return rg.capacity - len(rg.nodes)
}
func (rg *ResourceGroup) containsNode(id int64) bool {
return rg.nodes.Contain(id)
}
func (rg *ResourceGroup) GetNodes() []int64 {
return rg.nodes.Collect()
}
func (rg *ResourceGroup) GetCapacity() int {
return rg.capacity
}
type ResourceManager struct {
groups map[string]*ResourceGroup
store Store
nodeMgr *session.NodeManager
rwmutex sync.RWMutex
}
func NewResourceManager(store Store, nodeMgr *session.NodeManager) *ResourceManager {
groupMap := make(map[string]*ResourceGroup)
groupMap[DefaultResourceGroupName] = NewResourceGroup(1000000)
return &ResourceManager{
groups: groupMap,
store: store,
nodeMgr: nodeMgr,
}
}
func (rm *ResourceManager) AddResourceGroup(rgName string) error {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
if len(rgName) == 0 {
return ErrRGNameIsEmpty
}
if rm.groups[rgName] != nil {
return ErrRGAlreadyExist
}
if len(rm.groups) >= 1024 {
return ErrRGLimit
}
err := rm.store.SaveResourceGroup(&querypb.ResourceGroup{
Name: rgName,
Capacity: 0,
})
if err != nil {
log.Info("failed to add resource group",
zap.String("rgName", rgName),
zap.Error(err),
)
return err
}
rm.groups[rgName] = NewResourceGroup(0)
log.Info("add resource group",
zap.String("rgName", rgName),
)
return nil
}
func (rm *ResourceManager) RemoveResourceGroup(rgName string) error {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
if rgName == DefaultResourceGroupName {
return ErrDeleteDefaultRG
}
if rm.groups[rgName] == nil {
// delete a non-exist rg should be tolerable
return nil
}
if rm.groups[rgName].GetCapacity() != 0 {
return ErrDeleteNonEmptyRG
}
err := rm.store.RemoveResourceGroup(rgName)
if err != nil {
log.Info("failed to remove resource group",
zap.String("rgName", rgName),
zap.Error(err),
)
return err
}
delete(rm.groups, rgName)
log.Info("remove resource group",
zap.String("rgName", rgName),
)
return nil
}
func (rm *ResourceManager) AssignNode(rgName string, node int64) error {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
return rm.assignNode(rgName, node)
}
func (rm *ResourceManager) assignNode(rgName string, node int64) error {
if rm.groups[rgName] == nil {
return ErrRGNotExist
}
if rm.nodeMgr.Get(node) == nil {
return ErrNodeNotExist
}
if ok, _ := rm.nodeMgr.IsStoppingNode(node); ok {
return ErrNodeStopped
}
rm.checkRGNodeStatus(rgName)
if rm.checkNodeAssigned(node) {
return ErrNodeAlreadyAssign
}
newNodes := rm.groups[rgName].GetNodes()
newNodes = append(newNodes, node)
err := rm.store.SaveResourceGroup(&querypb.ResourceGroup{
Name: rgName,
Capacity: int32(rm.groups[rgName].GetCapacity()) + 1,
Nodes: newNodes,
})
if err != nil {
log.Info("failed to add node to resource group",
zap.String("rgName", rgName),
zap.Int64("node", node),
zap.Error(err),
)
return err
}
err = rm.groups[rgName].assignNode(node)
if err != nil {
return err
}
log.Info("add node to resource group",
zap.String("rgName", rgName),
zap.Int64("node", node),
)
return nil
}
func (rm *ResourceManager) checkNodeAssigned(node int64) bool {
for _, group := range rm.groups {
if group.containsNode(node) {
return true
}
}
return false
}
func (rm *ResourceManager) UnassignNode(rgName string, node int64) error {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
return rm.unassignNode(rgName, node)
}
func (rm *ResourceManager) unassignNode(rgName string, node int64) error {
if rm.groups[rgName] == nil {
return ErrRGNotExist
}
if rm.nodeMgr.Get(node) == nil {
// remove non exist node should be tolerable
return nil
}
newNodes := make([]int64, 0)
for nid := range rm.groups[rgName].nodes {
if nid != node {
newNodes = append(newNodes, nid)
}
}
err := rm.store.SaveResourceGroup(&querypb.ResourceGroup{
Name: rgName,
Capacity: int32(rm.groups[rgName].GetCapacity()) - 1,
Nodes: newNodes,
})
if err != nil {
log.Info("remove node from resource group",
zap.String("rgName", rgName),
zap.Int64("node", node),
zap.Error(err),
)
return err
}
rm.checkRGNodeStatus(rgName)
err = rm.groups[rgName].unassignNode(node)
if err != nil {
return err
}
log.Info("remove node from resource group",
zap.String("rgName", rgName),
zap.Int64("node", node),
)
return nil
}
func (rm *ResourceManager) GetNodes(rgName string) ([]int64, error) {
rm.rwmutex.RLock()
defer rm.rwmutex.RUnlock()
if rm.groups[rgName] == nil {
return nil, ErrRGNotExist
}
rm.checkRGNodeStatus(rgName)
return rm.groups[rgName].GetNodes(), nil
}
// return all outbound node
func (rm *ResourceManager) CheckOutboundNodes(replica *Replica) typeutil.UniqueSet {
rm.rwmutex.RLock()
defer rm.rwmutex.RUnlock()
if rm.groups[replica.GetResourceGroup()] == nil {
return typeutil.NewUniqueSet()
}
rg := rm.groups[replica.GetResourceGroup()]
ret := typeutil.NewUniqueSet()
for _, node := range replica.GetNodes() {
if !rg.containsNode(node) {
ret.Insert(node)
}
}
return ret
}
// return outgoing node num on each rg from this replica
func (rm *ResourceManager) GetOutgoingNodeNumByReplica(replica *Replica) map[string]int32 {
rm.rwmutex.RLock()
defer rm.rwmutex.RUnlock()
if rm.groups[replica.GetResourceGroup()] == nil {
return nil
}
rg := rm.groups[replica.GetResourceGroup()]
ret := make(map[string]int32)
for _, node := range replica.GetNodes() {
if !rg.containsNode(node) {
rgName, err := rm.findResourceGroupByNode(node)
if err == nil {
ret[rgName]++
}
}
}
return ret
}
func (rm *ResourceManager) ContainsNode(rgName string, node int64) bool {
rm.rwmutex.RLock()
defer rm.rwmutex.RUnlock()
if rm.groups[rgName] == nil {
return false
}
rm.checkRGNodeStatus(rgName)
return rm.groups[rgName].containsNode(node)
}
func (rm *ResourceManager) ContainResourceGroup(rgName string) bool {
rm.rwmutex.RLock()
defer rm.rwmutex.RUnlock()
return rm.groups[rgName] != nil
}
func (rm *ResourceManager) GetResourceGroup(rgName string) (*ResourceGroup, error) {
rm.rwmutex.RLock()
defer rm.rwmutex.RUnlock()
if rm.groups[rgName] == nil {
return nil, ErrRGNotExist
}
rm.checkRGNodeStatus(rgName)
return rm.groups[rgName], nil
}
func (rm *ResourceManager) ListResourceGroups() []string {
rm.rwmutex.RLock()
defer rm.rwmutex.RUnlock()
return lo.Keys(rm.groups)
}
func (rm *ResourceManager) FindResourceGroupByNode(node int64) (string, error) {
rm.rwmutex.RLock()
defer rm.rwmutex.RUnlock()
return rm.findResourceGroupByNode(node)
}
func (rm *ResourceManager) findResourceGroupByNode(node int64) (string, error) {
for name, group := range rm.groups {
if group.containsNode(node) {
return name, nil
}
}
return "", ErrNodeNotAssignToRG
}
func (rm *ResourceManager) HandleNodeUp(node int64) (string, error) {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
if rm.nodeMgr.Get(node) == nil {
return "", ErrNodeNotExist
}
if ok, _ := rm.nodeMgr.IsStoppingNode(node); ok {
return "", ErrNodeStopped
}
// if node already assign to rg
rgName, err := rm.findResourceGroupByNode(node)
if err == nil {
log.Info("HandleNodeUp: node already assign to resource group",
zap.String("rgName", rgName),
zap.Int64("node", node),
)
return rgName, nil
}
// add new node to default rg
rm.groups[DefaultResourceGroupName].handleNodeUp(node)
log.Info("HandleNodeUp: assign node to default resource group",
zap.String("rgName", DefaultResourceGroupName),
zap.Int64("node", node),
)
return DefaultResourceGroupName, nil
}
func (rm *ResourceManager) HandleNodeDown(node int64) (string, error) {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
if rm.nodeMgr.Get(node) == nil {
return "", ErrNodeNotExist
}
rgName, err := rm.findResourceGroupByNode(node)
if err == nil {
log.Info("HandleNodeDown: remove node from resource group",
zap.String("rgName", rgName),
zap.Int64("node", node),
)
return rgName, rm.groups[rgName].handleNodeDown(node)
}
return "", ErrNodeNotAssignToRG
}
func (rm *ResourceManager) TransferNode(from, to string) error {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
if rm.groups[from] == nil || rm.groups[to] == nil {
return ErrRGNotExist
}
if len(rm.groups[from].nodes) == 0 {
return ErrRGIsEmpty
}
rm.checkRGNodeStatus(from)
rm.checkRGNodeStatus(to)
//todo: a better way to choose a node with least balance cost
node := rm.groups[from].GetNodes()[0]
if err := rm.transferNodeInStore(from, to, node); err != nil {
return err
}
err := rm.groups[from].unassignNode(node)
if err != nil {
// interrupt transfer, unreachable logic path
return err
}
err = rm.groups[to].assignNode(node)
if err != nil {
// interrupt transfer, unreachable logic path
return err
}
return nil
}
func (rm *ResourceManager) transferNodeInStore(from string, to string, node int64) error {
fromNodeList := make([]int64, 0)
for nid := range rm.groups[from].nodes {
if nid != node {
fromNodeList = append(fromNodeList, nid)
}
}
toNodeList := rm.groups[to].GetNodes()
toNodeList = append(toNodeList, node)
fromRG := &querypb.ResourceGroup{
Name: from,
Capacity: int32(rm.groups[from].GetCapacity()) - 1,
Nodes: fromNodeList,
}
toRG := &querypb.ResourceGroup{
Name: to,
Capacity: int32(rm.groups[to].GetCapacity()) + 1,
Nodes: toNodeList,
}
return rm.store.SaveResourceGroup(fromRG, toRG)
}
// auto recover rg, return recover used node num
func (rm *ResourceManager) AutoRecoverResourceGroup(rgName string) (int, error) {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
if rm.groups[rgName] == nil {
return 0, ErrRGNotExist
}
rm.checkRGNodeStatus(rgName)
lackNodesNum := rm.groups[rgName].LackOfNodes()
nodesInDefault := rm.groups[DefaultResourceGroupName].GetNodes()
for i := 0; i < len(nodesInDefault) && i < lackNodesNum; i++ {
//todo: a better way to choose a node with least balance cost
node := nodesInDefault[i]
err := rm.unassignNode(DefaultResourceGroupName, node)
if err != nil {
// interrupt transfer, unreachable logic path
return i + 1, err
}
err = rm.groups[rgName].handleNodeUp(node)
if err != nil {
// roll back, unreachable logic path
rm.assignNode(DefaultResourceGroupName, node)
}
}
return lackNodesNum, nil
}
func (rm *ResourceManager) Recover() error {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
rgs, err := rm.store.GetResourceGroups()
if err != nil {
return ErrRecoverResourceGroupToStore
}
for _, rg := range rgs {
rm.groups[rg.GetName()] = NewResourceGroup(0)
for _, node := range rg.GetNodes() {
rm.groups[rg.GetName()].assignNode(node)
}
rm.checkRGNodeStatus(rg.GetName())
log.Info("Recover resource group",
zap.String("rgName", rg.GetName()),
zap.Int64s("nodes", rg.GetNodes()),
zap.Int32("capacity", rg.GetCapacity()),
)
}
return nil
}
// every operation which involves nodes access, should check nodes status first
func (rm *ResourceManager) checkRGNodeStatus(rgName string) {
for _, node := range rm.groups[rgName].GetNodes() {
if rm.nodeMgr.Get(node) == nil {
log.Info("found node down, remove it",
zap.String("rgName", rgName),
zap.Int64("nodeID", node),
)
rm.groups[rgName].handleNodeDown(node)
}
}
}
// return lack of nodes num
func (rm *ResourceManager) CheckLackOfNode(rgName string) int {
rm.rwmutex.Lock()
defer rm.rwmutex.Unlock()
if rm.groups[rgName] == nil {
return 0
}
rm.checkRGNodeStatus(rgName)
return rm.groups[rgName].LackOfNodes()
}

View File

@ -0,0 +1,294 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package meta
import (
"testing"
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
"github.com/milvus-io/milvus/internal/proto/querypb"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/util/etcd"
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/stretchr/testify/suite"
)
type ResourceManagerSuite struct {
suite.Suite
kv *etcdkv.EtcdKV
manager *ResourceManager
}
func (suite *ResourceManagerSuite) SetupSuite() {
Params.Init()
}
func (suite *ResourceManagerSuite) SetupTest() {
config := GenerateEtcdConfig()
cli, err := etcd.GetEtcdClient(
config.UseEmbedEtcd.GetAsBool(),
config.EtcdUseSSL.GetAsBool(),
config.Endpoints.GetAsStrings(),
config.EtcdTLSCert.GetValue(),
config.EtcdTLSKey.GetValue(),
config.EtcdTLSCACert.GetValue(),
config.EtcdTLSMinVersion.GetValue())
suite.Require().NoError(err)
suite.kv = etcdkv.NewEtcdKV(cli, config.MetaRootPath.GetValue())
store := NewMetaStore(suite.kv)
suite.manager = NewResourceManager(store, session.NewNodeManager())
}
func (suite *ResourceManagerSuite) TestManipulateResourceGroup() {
// test add rg
err := suite.manager.AddResourceGroup("rg1")
suite.NoError(err)
suite.True(suite.manager.ContainResourceGroup("rg1"))
suite.Len(suite.manager.ListResourceGroups(), 2)
// test add duplicate rg
err = suite.manager.AddResourceGroup("rg1")
suite.ErrorIs(err, ErrRGAlreadyExist)
// test delete rg
err = suite.manager.RemoveResourceGroup("rg1")
suite.NoError(err)
// test delete rg which doesn't exist
err = suite.manager.RemoveResourceGroup("rg1")
suite.NoError(err)
// test delete default rg
err = suite.manager.RemoveResourceGroup(DefaultResourceGroupName)
suite.ErrorIs(ErrDeleteDefaultRG, err)
}
func (suite *ResourceManagerSuite) TestManipulateNode() {
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
err := suite.manager.AddResourceGroup("rg1")
suite.NoError(err)
// test add node to rg
err = suite.manager.AssignNode("rg1", 1)
suite.NoError(err)
// test add non-exist node to rg
err = suite.manager.AssignNode("rg1", 2)
suite.ErrorIs(err, ErrNodeNotExist)
// test add node to non-exist rg
err = suite.manager.AssignNode("rg2", 1)
suite.ErrorIs(err, ErrRGNotExist)
// test remove node from rg
err = suite.manager.UnassignNode("rg1", 1)
suite.NoError(err)
// test remove non-exist node from rg
err = suite.manager.UnassignNode("rg1", 2)
suite.NoError(err)
// test remove node from non-exist rg
err = suite.manager.UnassignNode("rg2", 1)
suite.ErrorIs(err, ErrRGNotExist)
// add node which already assign to rg to another rg
err = suite.manager.AddResourceGroup("rg2")
suite.NoError(err)
err = suite.manager.AssignNode("rg1", 1)
suite.NoError(err)
err = suite.manager.AssignNode("rg2", 1)
println(err.Error())
suite.ErrorIs(err, ErrNodeAlreadyAssign)
// transfer node between rgs
err = suite.manager.TransferNode("rg1", "rg2")
suite.NoError(err)
// transfer meet non exist rg
err = suite.manager.TransferNode("rgggg", "rg2")
suite.ErrorIs(err, ErrRGNotExist)
}
func (suite *ResourceManagerSuite) TestHandleNodeUp() {
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(3, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(100, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(101, "localhost"))
err := suite.manager.AddResourceGroup("rg1")
suite.NoError(err)
suite.manager.AssignNode("rg1", 1)
suite.manager.AssignNode("rg1", 2)
suite.manager.AssignNode("rg1", 3)
// test query node id not change, expect assign back to origin rg
rg, err := suite.manager.GetResourceGroup("rg1")
suite.NoError(err)
suite.Equal(rg.GetCapacity(), 3)
suite.Equal(len(rg.GetNodes()), 3)
suite.manager.HandleNodeUp(1)
suite.Equal(rg.GetCapacity(), 3)
suite.Equal(len(rg.GetNodes()), 3)
suite.manager.HandleNodeDown(2)
rg, err = suite.manager.GetResourceGroup("rg1")
suite.NoError(err)
suite.Equal(rg.GetCapacity(), 3)
suite.Equal(len(rg.GetNodes()), 2)
suite.NoError(err)
defaultRG, err := suite.manager.GetResourceGroup(DefaultResourceGroupName)
suite.NoError(err)
oldNodesNum := len(defaultRG.GetNodes())
suite.manager.HandleNodeUp(101)
rg, err = suite.manager.GetResourceGroup("rg1")
suite.NoError(err)
suite.Equal(rg.GetCapacity(), 3)
suite.Equal(len(rg.GetNodes()), 2)
suite.False(suite.manager.ContainsNode("rg1", 101))
nodes, err := suite.manager.GetNodes(DefaultResourceGroupName)
suite.NoError(err)
suite.Equal(len(nodes), oldNodesNum+1)
}
func (suite *ResourceManagerSuite) TestRecover() {
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(3, "localhost"))
err := suite.manager.AddResourceGroup("rg")
suite.NoError(err)
suite.manager.AssignNode("rg", 1)
suite.manager.AssignNode("rg", 2)
suite.manager.AssignNode("rg", 3)
suite.manager.UnassignNode("rg", 3)
// clear resource manager in hack way
delete(suite.manager.groups, "rg")
delete(suite.manager.groups, DefaultResourceGroupName)
suite.manager.Recover()
rg, err := suite.manager.GetResourceGroup("rg")
suite.NoError(err)
suite.Equal(2, rg.GetCapacity())
suite.True(suite.manager.ContainsNode("rg", 1))
suite.True(suite.manager.ContainsNode("rg", 2))
suite.False(suite.manager.ContainsNode("rg", 3))
}
func (suite *ResourceManagerSuite) TestCheckOutboundNodes() {
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(3, "localhost"))
err := suite.manager.AddResourceGroup("rg")
suite.NoError(err)
suite.manager.AssignNode("rg", 1)
suite.manager.AssignNode("rg", 2)
suite.manager.AssignNode("rg", 3)
replica := NewReplica(
&querypb.Replica{
ID: 1,
CollectionID: 1,
Nodes: []int64{1, 2, 3, 4},
ResourceGroup: "rg",
},
typeutil.NewUniqueSet(1, 2, 3, 4),
)
outboundNodes := suite.manager.CheckOutboundNodes(replica)
suite.Len(outboundNodes, 1)
suite.True(outboundNodes.Contain(4))
}
func (suite *ResourceManagerSuite) TestCheckResourceGroup() {
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(3, "localhost"))
err := suite.manager.AddResourceGroup("rg")
suite.NoError(err)
suite.manager.AssignNode("rg", 1)
suite.manager.AssignNode("rg", 2)
suite.manager.AssignNode("rg", 3)
suite.manager.HandleNodeDown(1)
lackNodes := suite.manager.CheckLackOfNode("rg")
suite.Equal(lackNodes, 1)
suite.manager.nodeMgr.Remove(2)
suite.manager.checkRGNodeStatus("rg")
lackNodes = suite.manager.CheckLackOfNode("rg")
suite.Equal(lackNodes, 2)
rg, err := suite.manager.FindResourceGroupByNode(3)
suite.NoError(err)
suite.Equal(rg, "rg")
}
func (suite *ResourceManagerSuite) TestGetOutboundNode() {
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(3, "localhost"))
suite.manager.AddResourceGroup("rg")
suite.manager.AddResourceGroup("rg1")
suite.manager.AssignNode("rg", 1)
suite.manager.AssignNode("rg", 2)
suite.manager.AssignNode("rg1", 3)
replica := NewReplica(
&querypb.Replica{
ID: 1,
CollectionID: 100,
ResourceGroup: "rg",
Nodes: []int64{1, 2, 3},
},
typeutil.NewUniqueSet(1, 2, 3),
)
outgoingNodes := suite.manager.GetOutgoingNodeNumByReplica(replica)
suite.NotNil(outgoingNodes)
suite.Len(outgoingNodes, 1)
suite.NotNil(outgoingNodes["rg1"])
suite.Equal(outgoingNodes["rg1"], int32(1))
}
func (suite *ResourceManagerSuite) TestAutoRecover() {
suite.manager.nodeMgr.Add(session.NewNodeInfo(1, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(2, "localhost"))
suite.manager.nodeMgr.Add(session.NewNodeInfo(3, "localhost"))
err := suite.manager.AddResourceGroup("rg")
suite.NoError(err)
suite.manager.AssignNode(DefaultResourceGroupName, 1)
suite.manager.AssignNode(DefaultResourceGroupName, 2)
suite.manager.AssignNode("rg", 3)
suite.manager.HandleNodeDown(3)
lackNodes := suite.manager.CheckLackOfNode("rg")
suite.Equal(lackNodes, 1)
suite.manager.AutoRecoverResourceGroup("rg")
lackNodes = suite.manager.CheckLackOfNode("rg")
suite.Equal(lackNodes, 0)
}
func (suite *ResourceManagerSuite) TearDownSuite() {
suite.kv.Close()
}
func TestResourceManager(t *testing.T) {
suite.Run(t, new(ResourceManagerSuite))
}

View File

@ -150,7 +150,7 @@ func (m *SegmentDistManager) GetByShardWithReplica(shard string, replica *Replic
ret := make([]*Segment, 0)
for nodeID, segments := range m.segments {
if !replica.Nodes.Contain(nodeID) {
if !replica.Contains(nodeID) {
continue
}
for _, segment := range segments {

View File

@ -28,7 +28,6 @@ import (
"github.com/milvus-io/milvus/internal/kv"
"github.com/milvus-io/milvus/internal/metastore"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/util"
)
var (
@ -41,6 +40,7 @@ const (
ReplicaPrefix = "querycoord-replica"
CollectionMetaPrefixV1 = "queryCoord-collectionMeta"
ReplicaMetaPrefixV1 = "queryCoord-ReplicaMeta"
ResourceGroupPrefix = "queryCoord-ResourceGroup"
)
type WatchStoreChan = clientv3.WatchChan
@ -91,6 +91,26 @@ func (s metaStore) SaveReplica(replica *querypb.Replica) error {
return s.cli.Save(key, string(value))
}
func (s metaStore) SaveResourceGroup(rgs ...*querypb.ResourceGroup) error {
ret := make(map[string]string)
for _, rg := range rgs {
key := encodeResourceGroupKey(rg.GetName())
value, err := proto.Marshal(rg)
if err != nil {
return err
}
ret[key] = string(value)
}
return s.cli.MultiSave(ret)
}
func (s metaStore) RemoveResourceGroup(rgName string) error {
key := encodeResourceGroupKey(rgName)
return s.cli.Remove(key)
}
func (s metaStore) GetCollections() ([]*querypb.CollectionLoadInfo, error) {
_, values, err := s.cli.LoadWithPrefix(CollectionLoadInfoPrefix)
if err != nil {
@ -171,6 +191,25 @@ func (s metaStore) getReplicasFromV1() ([]*querypb.Replica, error) {
return ret, nil
}
func (s metaStore) GetResourceGroups() ([]*querypb.ResourceGroup, error) {
_, rgs, err := s.cli.LoadWithPrefix(ResourceGroupPrefix)
if err != nil {
return nil, err
}
ret := make([]*querypb.ResourceGroup, 0, len(rgs))
for _, value := range rgs {
rg := &querypb.ResourceGroup{}
err := proto.Unmarshal([]byte(value), rg)
if err != nil {
return nil, err
}
ret = append(ret, rg)
}
return ret, nil
}
func (s metaStore) ReleaseCollection(id int64) error {
k := encodeCollectionLoadInfoKey(id)
return s.cli.Remove(k)
@ -209,6 +248,6 @@ func encodeCollectionReplicaKey(collection int64) string {
return fmt.Sprintf("%s/%d", ReplicaPrefix, collection)
}
func encodeHandoffEventKey(collection, partition, segment int64) string {
return fmt.Sprintf("%s/%d/%d/%d", util.HandoffSegmentPrefix, collection, partition, segment)
func encodeResourceGroupKey(rgName string) string {
return fmt.Sprintf("%s/%s", ResourceGroupPrefix, rgName)
}

View File

@ -17,22 +17,151 @@
package meta
import (
"sort"
"testing"
"github.com/milvus-io/milvus/internal/kv"
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
"github.com/milvus-io/milvus/internal/proto/querypb"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/util/etcd"
"github.com/stretchr/testify/suite"
)
type StoreTestSuite struct {
suite.Suite
kv kv.MetaKv
store metaStore
}
func (suite *StoreTestSuite) SetupTest() {
//kv := memkv.NewMemoryKV()
//suite.store = NewMetaStore(kv)
func (suite *StoreTestSuite) SetupSuite() {
Params.Init()
}
func (suite *StoreTestSuite) TearDownTest() {}
func (suite *StoreTestSuite) SetupTest() {
config := GenerateEtcdConfig()
cli, err := etcd.GetEtcdClient(
config.UseEmbedEtcd.GetAsBool(),
config.EtcdUseSSL.GetAsBool(),
config.Endpoints.GetAsStrings(),
config.EtcdTLSCert.GetValue(),
config.EtcdTLSKey.GetValue(),
config.EtcdTLSCACert.GetValue(),
config.EtcdTLSMinVersion.GetValue())
suite.Require().NoError(err)
suite.kv = etcdkv.NewEtcdKV(cli, config.MetaRootPath.GetValue())
suite.store = NewMetaStore(suite.kv)
}
func (suite *StoreTestSuite) TearDownTest() {
if suite.kv != nil {
suite.kv.Close()
}
}
func (suite *StoreTestSuite) TestCollection() {
suite.store.SaveCollection(&querypb.CollectionLoadInfo{
CollectionID: 1,
})
suite.store.SaveCollection(&querypb.CollectionLoadInfo{
CollectionID: 2,
})
suite.store.SaveCollection(&querypb.CollectionLoadInfo{
CollectionID: 3,
})
suite.store.ReleaseCollection(1)
suite.store.ReleaseCollection(2)
collections, err := suite.store.GetCollections()
suite.NoError(err)
suite.Len(collections, 1)
}
func (suite *StoreTestSuite) TestPartition() {
suite.store.SavePartition(&querypb.PartitionLoadInfo{
PartitionID: 1,
})
suite.store.SavePartition(&querypb.PartitionLoadInfo{
PartitionID: 2,
})
suite.store.SavePartition(&querypb.PartitionLoadInfo{
PartitionID: 3,
})
suite.store.ReleasePartition(1)
suite.store.ReleasePartition(2)
partitions, err := suite.store.GetPartitions()
suite.NoError(err)
suite.Len(partitions, 1)
}
func (suite *StoreTestSuite) TestReplica() {
suite.store.SaveReplica(&querypb.Replica{
CollectionID: 1,
ID: 1,
})
suite.store.SaveReplica(&querypb.Replica{
CollectionID: 1,
ID: 2,
})
suite.store.SaveReplica(&querypb.Replica{
CollectionID: 1,
ID: 3,
})
suite.store.ReleaseReplica(1, 1)
suite.store.ReleaseReplica(1, 2)
replicas, err := suite.store.GetReplicas()
suite.NoError(err)
suite.Len(replicas, 1)
}
func (suite *StoreTestSuite) TestResourceGroup() {
suite.store.SaveResourceGroup(&querypb.ResourceGroup{
Name: "rg1",
Capacity: 3,
Nodes: []int64{1, 2, 3},
})
suite.store.SaveResourceGroup(&querypb.ResourceGroup{
Name: "rg2",
Capacity: 3,
Nodes: []int64{4, 5},
})
suite.store.SaveResourceGroup(&querypb.ResourceGroup{
Name: "rg3",
Capacity: 0,
Nodes: []int64{},
})
suite.store.RemoveResourceGroup("rg3")
groups, err := suite.store.GetResourceGroups()
suite.NoError(err)
suite.Len(groups, 2)
sort.Slice(groups, func(i, j int) bool {
return groups[i].GetName() < groups[j].GetName()
})
suite.Equal("rg1", groups[0].GetName())
suite.Equal(int32(3), groups[0].GetCapacity())
suite.Equal([]int64{1, 2, 3}, groups[0].GetNodes())
suite.Equal("rg2", groups[1].GetName())
suite.Equal(int32(3), groups[1].GetCapacity())
suite.Equal([]int64{4, 5}, groups[1].GetNodes())
}
func (suite *StoreTestSuite) TestLoadRelease() {
// TODO(sunby): add ut

View File

@ -27,6 +27,7 @@ import (
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/querypb"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/util/etcd"
"github.com/milvus-io/milvus/internal/util/typeutil"
)
@ -101,7 +102,7 @@ func (suite *TargetManagerSuite) SetupTest() {
// meta
store := NewMetaStore(suite.kv)
idAllocator := RandomIncrementIDAllocator()
suite.meta = NewMeta(idAllocator, store)
suite.meta = NewMeta(idAllocator, store, session.NewNodeManager())
suite.broker = NewMockBroker(suite.T())
suite.mgr = NewTargetManager(suite.broker, suite.meta)

View File

@ -32,6 +32,7 @@ import (
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/util/etcd"
"github.com/milvus-io/milvus/internal/util/paramtable"
)
@ -178,7 +179,7 @@ func (suite *CollectionObserverSuite) SetupTest() {
// Dependencies
suite.dist = meta.NewDistributionManager()
suite.meta = meta.NewMeta(suite.idAllocator, suite.store)
suite.meta = meta.NewMeta(suite.idAllocator, suite.store, session.NewNodeManager())
suite.broker = meta.NewMockBroker(suite.T())
suite.targetMgr = meta.NewTargetManager(suite.broker, suite.meta)
suite.targetObserver = NewTargetObserver(suite.meta,
@ -323,7 +324,7 @@ func (suite *CollectionObserverSuite) loadAll() {
func (suite *CollectionObserverSuite) load(collection int64) {
// Mock meta data
replicas, err := suite.meta.ReplicaManager.Spawn(collection, suite.replicaNumber[collection])
replicas, err := suite.meta.ReplicaManager.Spawn(collection, suite.replicaNumber[collection], meta.DefaultResourceGroupName)
suite.NoError(err)
for _, replica := range replicas {
replica.AddNode(suite.nodes...)

View File

@ -67,7 +67,7 @@ func (suite *LeaderObserverTestSuite) SetupTest() {
// meta
store := meta.NewMetaStore(suite.kv)
idAllocator := RandomIncrementIDAllocator()
suite.meta = meta.NewMeta(idAllocator, store)
suite.meta = meta.NewMeta(idAllocator, store, session.NewNodeManager())
suite.broker = meta.NewMockBroker(suite.T())
suite.mockCluster = session.NewMockCluster(suite.T())

View File

@ -0,0 +1,112 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package observers
import (
"context"
"sync"
"time"
"go.uber.org/zap"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/milvus-io/milvus/internal/querycoordv2/params"
)
// check replica, find outbound nodes and remove it from replica if all segment/channel has been moved
type ReplicaObserver struct {
c chan struct{}
wg sync.WaitGroup
meta *meta.Meta
distMgr *meta.DistributionManager
stopOnce sync.Once
}
func NewReplicaObserver(meta *meta.Meta, distMgr *meta.DistributionManager) *ReplicaObserver {
return &ReplicaObserver{
c: make(chan struct{}),
meta: meta,
distMgr: distMgr,
}
}
func (ob *ReplicaObserver) Start(ctx context.Context) {
ob.wg.Add(1)
go ob.schedule(ctx)
}
func (ob *ReplicaObserver) Stop() {
ob.stopOnce.Do(func() {
close(ob.c)
ob.wg.Wait()
})
}
func (ob *ReplicaObserver) schedule(ctx context.Context) {
defer ob.wg.Done()
log.Info("Start check replica loop")
ticker := time.NewTicker(params.Params.QueryCoordCfg.CheckNodeInReplicaInterval.GetAsDuration(time.Second))
for {
select {
case <-ctx.Done():
log.Info("Close replica observer due to context canceled")
return
case <-ob.c:
log.Info("Close replica observer")
return
case <-ticker.C:
ob.checkNodesInReplica()
}
}
}
func (ob *ReplicaObserver) checkNodesInReplica() {
collections := ob.meta.GetAll()
for _, collectionID := range collections {
replicas := ob.meta.ReplicaManager.GetByCollection(collectionID)
for _, replica := range replicas {
outboundNodes := ob.meta.ResourceManager.CheckOutboundNodes(replica)
if len(outboundNodes) > 0 {
log.RatedInfo(10, "found outbound nodes in replica",
zap.Int64("collectionID", replica.GetCollectionID()),
zap.Int64("replicaID", replica.GetCollectionID()),
zap.Int64s("allOutboundNodes", outboundNodes.Collect()),
)
for node := range outboundNodes {
channels := ob.distMgr.ChannelDistManager.GetByCollectionAndNode(collectionID, node)
segments := ob.distMgr.SegmentDistManager.GetByCollectionAndNode(collectionID, node)
if len(channels) == 0 && len(segments) == 0 {
replica.RemoveNode(node)
log.Info("all segment/channel has been removed from outbound node, remove it from replica",
zap.Int64("collectionID", replica.GetCollectionID()),
zap.Int64("replicaID", replica.GetCollectionID()),
zap.Int64("removedNodes", node),
zap.Int64s("availableNodes", replica.GetNodes()),
)
}
}
}
}
}
}

View File

@ -0,0 +1,134 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package observers
import (
"context"
"testing"
"time"
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/internal/util/etcd"
"github.com/milvus-io/milvus/internal/util/paramtable"
"github.com/stretchr/testify/suite"
)
type ReplicaObserverSuite struct {
suite.Suite
kv *etcdkv.EtcdKV
//dependency
meta *meta.Meta
distMgr *meta.DistributionManager
observer *ReplicaObserver
collectionID int64
partitionID int64
}
func (suite *ReplicaObserverSuite) SetupSuite() {
paramtable.Init()
paramtable.Get().Save(Params.QueryCoordCfg.CheckNodeInReplicaInterval.Key, "1")
}
func (suite *ReplicaObserverSuite) SetupTest() {
var err error
config := GenerateEtcdConfig()
cli, err := etcd.GetEtcdClient(
config.UseEmbedEtcd.GetAsBool(),
config.EtcdUseSSL.GetAsBool(),
config.Endpoints.GetAsStrings(),
config.EtcdTLSCert.GetValue(),
config.EtcdTLSKey.GetValue(),
config.EtcdTLSCACert.GetValue(),
config.EtcdTLSMinVersion.GetValue())
suite.Require().NoError(err)
suite.kv = etcdkv.NewEtcdKV(cli, config.MetaRootPath.GetValue())
// meta
store := meta.NewMetaStore(suite.kv)
idAllocator := RandomIncrementIDAllocator()
suite.meta = meta.NewMeta(idAllocator, store, session.NewNodeManager())
suite.distMgr = meta.NewDistributionManager()
suite.observer = NewReplicaObserver(suite.meta, suite.distMgr)
suite.observer.Start(context.TODO())
suite.collectionID = int64(1000)
suite.partitionID = int64(100)
suite.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, 1)
err = suite.meta.CollectionManager.PutCollection(utils.CreateTestCollection(suite.collectionID, 1))
suite.NoError(err)
replicas, err := suite.meta.ReplicaManager.Spawn(suite.collectionID, 1, meta.DefaultResourceGroupName)
suite.NoError(err)
err = suite.meta.ReplicaManager.Put(replicas...)
suite.NoError(err)
}
func (suite *ReplicaObserverSuite) TestCheckNodesInReplica() {
replicas := suite.meta.ReplicaManager.GetByCollection(suite.collectionID)
suite.distMgr.ChannelDistManager.Update(1, utils.CreateTestChannel(suite.collectionID, 2, 1, "test-insert-channel1"))
suite.distMgr.SegmentDistManager.Update(1, utils.CreateTestSegment(suite.collectionID, suite.partitionID, 1, 100, 1, "test-insert-channel1"))
replicas[0].AddNode(1)
suite.distMgr.ChannelDistManager.Update(100, utils.CreateTestChannel(suite.collectionID, 100, 1, "test-insert-channel2"))
suite.distMgr.SegmentDistManager.Update(100, utils.CreateTestSegment(suite.collectionID, suite.partitionID, 2, 100, 1, "test-insert-channel2"))
replicas[0].AddNode(100)
suite.Eventually(func() bool {
// node 100 should be kept
replicas := suite.meta.ReplicaManager.GetByCollection(suite.collectionID)
for _, node := range replicas[0].GetNodes() {
if node == 100 {
return true
}
}
return false
}, 6*time.Second, 2*time.Second)
suite.Len(replicas[0].GetNodes(), 2)
suite.distMgr.ChannelDistManager.Update(100)
suite.distMgr.SegmentDistManager.Update(100)
suite.Eventually(func() bool {
// node 100 should be removed
replicas := suite.meta.ReplicaManager.GetByCollection(suite.collectionID)
for _, node := range replicas[0].GetNodes() {
if node == 100 {
return false
}
}
return true
}, 5*time.Second, 1*time.Second)
suite.Len(replicas[0].GetNodes(), 1)
suite.Equal([]int64{1}, replicas[0].GetNodes())
}
func (suite *ReplicaObserverSuite) TearDownSuite() {
suite.kv.Close()
suite.observer.Stop()
}
func TestReplicaObserver(t *testing.T) {
suite.Run(t, new(ReplicaObserverSuite))
}

View File

@ -0,0 +1,107 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package observers
import (
"context"
"sync"
"time"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/milvus-io/milvus/internal/querycoordv2/params"
"go.uber.org/zap"
)
// check whether rg lack of node, try to transfer node from default rg
type ResourceObserver struct {
c chan struct{}
wg sync.WaitGroup
meta *meta.Meta
stopOnce sync.Once
}
func NewResourceObserver(meta *meta.Meta) *ResourceObserver {
return &ResourceObserver{
c: make(chan struct{}),
meta: meta,
}
}
func (ob *ResourceObserver) Start(ctx context.Context) {
ob.wg.Add(1)
go ob.schedule(ctx)
}
func (ob *ResourceObserver) Stop() {
ob.stopOnce.Do(func() {
close(ob.c)
ob.wg.Wait()
})
}
func (ob *ResourceObserver) schedule(ctx context.Context) {
defer ob.wg.Done()
log.Info("Start check resource group loop")
ticker := time.NewTicker(params.Params.QueryCoordCfg.CheckResourceGroupInterval.GetAsDuration(time.Second))
for {
select {
case <-ctx.Done():
log.Info("Close resource group observer due to context canceled")
return
case <-ob.c:
log.Info("Close resource group observer")
return
case <-ticker.C:
ob.checkResourceGroup()
}
}
}
func (ob *ResourceObserver) checkResourceGroup() {
manager := ob.meta.ResourceManager
rgNames := manager.ListResourceGroups()
enableRGAutoRecover := params.Params.QueryCoordCfg.EnableRGAutoRecover.GetAsBool()
for _, rgName := range rgNames {
if rgName == meta.DefaultResourceGroupName {
continue
}
lackNodeNum := manager.CheckLackOfNode(rgName)
if lackNodeNum > 0 {
log.Info("found resource group lack of nodes",
zap.String("rgName", rgName),
zap.Int("lackNodeNum", lackNodeNum),
)
if enableRGAutoRecover {
usedNodeNum, err := manager.AutoRecoverResourceGroup(rgName)
if err != nil {
log.Warn("failed to recover resource group",
zap.String("rgName", rgName),
zap.Int("lackNodeNum", lackNodeNum-usedNodeNum),
zap.Error(err),
)
}
}
}
}
}

View File

@ -0,0 +1,111 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package observers
import (
"context"
"testing"
"time"
etcdKV "github.com/milvus-io/milvus/internal/kv/etcd"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/util/etcd"
"github.com/milvus-io/milvus/internal/util/paramtable"
"github.com/stretchr/testify/suite"
)
type ResourceObserverSuite struct {
suite.Suite
kv *etcdKV.EtcdKV
//dependency
meta *meta.Meta
observer *ResourceObserver
nodeMgr *session.NodeManager
collectionID int64
partitionID int64
}
func (suite *ResourceObserverSuite) SetupSuite() {
paramtable.Init()
paramtable.Get().Save(Params.QueryCoordCfg.CheckResourceGroupInterval.Key, "3")
}
func (suite *ResourceObserverSuite) SetupTest() {
var err error
config := GenerateEtcdConfig()
cli, err := etcd.GetEtcdClient(
config.UseEmbedEtcd.GetAsBool(),
config.EtcdUseSSL.GetAsBool(),
config.Endpoints.GetAsStrings(),
config.EtcdTLSCert.GetValue(),
config.EtcdTLSKey.GetValue(),
config.EtcdTLSCACert.GetValue(),
config.EtcdTLSMinVersion.GetValue())
suite.Require().NoError(err)
suite.kv = etcdKV.NewEtcdKV(cli, config.MetaRootPath.GetValue())
// meta
store := meta.NewMetaStore(suite.kv)
idAllocator := RandomIncrementIDAllocator()
suite.nodeMgr = session.NewNodeManager()
suite.meta = meta.NewMeta(idAllocator, store, suite.nodeMgr)
suite.observer = NewResourceObserver(suite.meta)
suite.observer.Start(context.TODO())
for i := 1; i < 10; i++ {
suite.nodeMgr.Add(session.NewNodeInfo(int64(i), "localhost"))
suite.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, int64(i))
}
}
func (suite *ResourceObserverSuite) TestCheckNodesInReplica() {
suite.meta.ResourceManager.AddResourceGroup("rg")
suite.nodeMgr.Add(session.NewNodeInfo(int64(100), "localhost"))
suite.nodeMgr.Add(session.NewNodeInfo(int64(101), "localhost"))
suite.nodeMgr.Add(session.NewNodeInfo(int64(102), "localhost"))
suite.meta.ResourceManager.AssignNode("rg", 100)
suite.meta.ResourceManager.AssignNode("rg", 101)
suite.meta.ResourceManager.AssignNode("rg", 102)
suite.meta.ResourceManager.HandleNodeDown(100)
suite.meta.ResourceManager.HandleNodeDown(101)
//before auto recover rg
suite.Eventually(func() bool {
lackNodesNum := suite.meta.ResourceManager.CheckLackOfNode("rg")
return lackNodesNum == 2
}, 5*time.Second, 1*time.Second)
// after auto recover rg
suite.Eventually(func() bool {
lackNodesNum := suite.meta.ResourceManager.CheckLackOfNode("rg")
return lackNodesNum == 0
}, 5*time.Second, 1*time.Second)
}
func (suite *ResourceObserverSuite) TearDownSuite() {
suite.kv.Close()
suite.observer.Stop()
}
func TestResourceObserver(t *testing.T) {
suite.Run(t, new(ResourceObserverSuite))
}

View File

@ -29,6 +29,7 @@ import (
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
"github.com/milvus-io/milvus/internal/util/etcd"
"github.com/milvus-io/milvus/internal/util/paramtable"
@ -74,7 +75,7 @@ func (suite *TargetObserverSuite) SetupTest() {
// meta
store := meta.NewMetaStore(suite.kv)
idAllocator := RandomIncrementIDAllocator()
suite.meta = meta.NewMeta(idAllocator, store)
suite.meta = meta.NewMeta(idAllocator, store, session.NewNodeManager())
suite.broker = meta.NewMockBroker(suite.T())
suite.targetMgr = meta.NewTargetManager(suite.broker, suite.meta)
@ -86,7 +87,7 @@ func (suite *TargetObserverSuite) SetupTest() {
err = suite.meta.CollectionManager.PutCollection(utils.CreateTestCollection(suite.collectionID, 1))
suite.NoError(err)
replicas, err := suite.meta.ReplicaManager.Spawn(suite.collectionID, 1)
replicas, err := suite.meta.ReplicaManager.Spawn(suite.collectionID, 1, meta.DefaultResourceGroupName)
suite.NoError(err)
replicas[0].AddNode(2)
err = suite.meta.ReplicaManager.Put(replicas...)
@ -212,6 +213,6 @@ func (suite *TargetObserverSuite) TearDownSuite() {
suite.observer.Stop()
}
func TestTargetManager(t *testing.T) {
func TestTargetObserver(t *testing.T) {
suite.Run(t, new(TargetObserverSuite))
}

View File

@ -103,6 +103,8 @@ type Server struct {
collectionObserver *observers.CollectionObserver
leaderObserver *observers.LeaderObserver
targetObserver *observers.TargetObserver
replicaObserver *observers.ReplicaObserver
resourceObserver *observers.ResourceObserver
balancer balance.Balance
@ -177,13 +179,13 @@ func (s *Server) Init() error {
s.metricsCacheManager = metricsinfo.NewMetricsCacheManager()
// Init meta
s.nodeMgr = session.NewNodeManager()
err = s.initMeta()
if err != nil {
return err
}
// Init session
log.Info("init session")
s.nodeMgr = session.NewNodeManager()
s.cluster = session.NewCluster(s.nodeMgr, s.queryNodeCreator)
// Init schedulers
@ -244,7 +246,7 @@ func (s *Server) initMeta() error {
log.Info("init meta")
s.store = meta.NewMetaStore(s.kv)
s.meta = meta.NewMeta(s.idAllocator, s.store)
s.meta = meta.NewMeta(s.idAllocator, s.store, s.nodeMgr)
log.Info("recover meta...")
err := s.meta.CollectionManager.Recover()
@ -262,6 +264,12 @@ func (s *Server) initMeta() error {
return err
}
err = s.meta.ResourceManager.Recover()
if err != nil {
log.Error("failed to recover resource groups")
return err
}
s.dist = &meta.DistributionManager{
SegmentDistManager: meta.NewSegmentDistManager(),
ChannelDistManager: meta.NewChannelDistManager(),
@ -297,6 +305,13 @@ func (s *Server) initObserver() {
s.targetMgr,
s.targetObserver,
)
s.replicaObserver = observers.NewReplicaObserver(
s.meta,
s.dist,
)
s.resourceObserver = observers.NewResourceObserver(s.meta)
}
func (s *Server) afterStart() {
@ -360,6 +375,8 @@ func (s *Server) startServerLoop() {
s.collectionObserver.Start(s.ctx)
s.leaderObserver.Start(s.ctx)
s.targetObserver.Start(s.ctx)
s.replicaObserver.Start(s.ctx)
s.resourceObserver.Start(s.ctx)
}
func (s *Server) Stop() error {
@ -403,6 +420,12 @@ func (s *Server) Stop() error {
if s.targetObserver != nil {
s.targetObserver.Stop()
}
if s.replicaObserver != nil {
s.replicaObserver.Stop()
}
if s.resourceObserver != nil {
s.resourceObserver.Stop()
}
s.wg.Wait()
log.Info("QueryCoord stop successfully")
@ -580,17 +603,33 @@ func (s *Server) handleNodeUp(node int64) {
s.taskScheduler.AddExecutor(node)
s.distController.StartDistInstance(s.ctx, node)
// need assign to new rg and replica
rgName, err := s.meta.ResourceManager.HandleNodeUp(node)
if err != nil {
log.Warn("HandleNodeUp: failed to assign node to resource group",
zap.Error(err),
)
return
}
log.Info("HandleNodeUp: assign node to resource group",
zap.String("resourceGroup", rgName),
)
for _, collection := range s.meta.CollectionManager.GetAll() {
log := log.With(zap.Int64("collectionID", collection))
replica := s.meta.ReplicaManager.GetByCollectionAndNode(collection, node)
if replica == nil {
replicas := s.meta.ReplicaManager.GetByCollection(collection)
replicas := s.meta.ReplicaManager.GetByCollectionAndRG(collection, rgName)
if len(replicas) == 0 {
continue
}
sort.Slice(replicas, func(i, j int) bool {
return replicas[i].Nodes.Len() < replicas[j].Nodes.Len()
return replicas[i].Len() < replicas[j].Len()
})
replica := replicas[0]
// TODO(yah01): this may fail, need a component to check whether a node is assigned
err := s.meta.ReplicaManager.AddNode(replica.GetID(), node)
err = s.meta.ReplicaManager.AddNode(replica.GetID(), node)
if err != nil {
log.Warn("failed to assign node to replicas",
zap.Int64("replicaID", replica.GetID()),
@ -608,20 +647,6 @@ func (s *Server) handleNodeDown(node int64) {
s.taskScheduler.RemoveExecutor(node)
s.distController.Remove(node)
// Refresh the targets, to avoid consuming messages too early from channel
// FIXME(yah01): the leads to miss data, the segments flushed between the two check points
// are missed, it will recover for a while.
channels := s.dist.ChannelDistManager.GetByNode(node)
for _, channel := range channels {
_, err := s.targetObserver.UpdateNextTarget(channel.GetCollectionID())
if err != nil {
msg := "failed to update next targets for collection"
log.Error(msg,
zap.Error(err))
continue
}
}
// Clear dist
s.dist.LeaderViewManager.Update(node)
s.dist.ChannelDistManager.Update(node)
@ -647,6 +672,19 @@ func (s *Server) handleNodeDown(node int64) {
// Clear tasks
s.taskScheduler.RemoveByNode(node)
rgName, err := s.meta.ResourceManager.HandleNodeDown(node)
if err != nil {
log.Warn("HandleNodeDown: failed to remove node from resource group",
zap.String("resourceGroup", rgName),
zap.Error(err),
)
return
}
log.Info("HandleNodeDown: remove node from resource group",
zap.String("resourceGroup", rgName),
)
}
// checkReplicas checks whether replica contains offline node, and remove those nodes
@ -657,7 +695,7 @@ func (s *Server) checkReplicas() {
for _, replica := range replicas {
replica := replica.Clone()
toRemove := make([]int64, 0)
for node := range replica.Nodes {
for _, node := range replica.GetNodes() {
if s.nodeMgr.Get(node) == nil {
toRemove = append(toRemove, node)
}

View File

@ -110,6 +110,7 @@ func (suite *ServerSuite) SetupTest() {
suite.Require().NoError(err)
ok := suite.waitNodeUp(suite.nodes[i], 5*time.Second)
suite.Require().True(ok)
suite.server.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, suite.nodes[i].ID)
}
suite.loadAll()
@ -184,7 +185,6 @@ func (suite *ServerSuite) TestNodeUp() {
}
return true
}, 5*time.Second, time.Second)
}
func (suite *ServerSuite) TestNodeUpdate() {

View File

@ -45,6 +45,16 @@ import (
var (
successStatus = utils.WrapStatus(commonpb.ErrorCode_Success, "")
ErrCreateResourceGroupFailed = errors.New("failed to create resource group")
ErrDropResourceGroupFailed = errors.New("failed to drop resource group")
ErrAddNodeToRGFailed = errors.New("failed to add node to resource group")
ErrRemoveNodeFromRGFailed = errors.New("failed to remove node from resource group")
ErrTransferNodeFailed = errors.New("failed to transfer node between resource group")
ErrTransferReplicaFailed = errors.New("failed to transfer replica between resource group")
ErrListResourceGroupsFailed = errors.New("failed to list resource group")
ErrDescribeResourceGroupFailed = errors.New("failed to describe resource group")
ErrLoadUseWrongRG = errors.New("load operation should use collection's resource group")
)
func (s *Server) ShowCollections(ctx context.Context, req *querypb.ShowCollectionsRequest) (*querypb.ShowCollectionsResponse, error) {
@ -218,6 +228,13 @@ func (s *Server) LoadCollection(ctx context.Context, req *querypb.LoadCollection
return s.refreshCollection(ctx, req.GetCollectionID())
}
if err := s.checkResourceGroup(req.GetCollectionID(), req.GetResourceGroups()); err != nil {
msg := "failed to load collection"
log.Warn(msg, zap.Error(err))
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument, msg, err), nil
}
loadJob := job.NewLoadCollectionJob(ctx,
req,
s.dist,
@ -282,6 +299,8 @@ func (s *Server) ReleaseCollection(ctx context.Context, req *querypb.ReleaseColl
func (s *Server) LoadPartitions(ctx context.Context, req *querypb.LoadPartitionsRequest) (*commonpb.Status, error) {
log := log.Ctx(ctx).With(
zap.Int64("collectionID", req.GetCollectionID()),
zap.Int32("replicaNumber", req.GetReplicaNumber()),
zap.Strings("resourceGroups", req.GetResourceGroups()),
)
log.Info("received load partitions request",
@ -300,6 +319,14 @@ func (s *Server) LoadPartitions(ctx context.Context, req *querypb.LoadPartitions
// If refresh mode is ON.
if req.GetRefresh() {
return s.refreshPartitions(ctx, req.GetCollectionID(), req.GetPartitionIDs())
}
if err := s.checkResourceGroup(req.GetCollectionID(), req.GetResourceGroups()); err != nil {
msg := "failed to load partitions"
log.Warn(msg, zap.Error(ErrLoadUseWrongRG))
metrics.QueryCoordLoadCount.WithLabelValues(metrics.FailLabel).Inc()
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument, msg, ErrLoadUseWrongRG), nil
}
loadJob := job.NewLoadPartitionJob(ctx,
@ -323,6 +350,19 @@ func (s *Server) LoadPartitions(ctx context.Context, req *querypb.LoadPartitions
return successStatus, nil
}
func (s *Server) checkResourceGroup(collectionID int64, resourceGroups []string) error {
if len(resourceGroups) != 0 {
collectionUsedRG := s.meta.ReplicaManager.GetResourceGroupByCollection(collectionID)
for _, rgName := range resourceGroups {
if !collectionUsedRG.Contain(rgName) {
return ErrLoadUseWrongRG
}
}
}
return nil
}
func (s *Server) ReleasePartitions(ctx context.Context, req *querypb.ReleasePartitionsRequest) (*commonpb.Status, error) {
log := log.Ctx(ctx).With(
zap.Int64("collectionID", req.GetCollectionID()),
@ -637,7 +677,7 @@ func (s *Server) LoadBalance(ctx context.Context, req *querypb.LoadBalanceReques
fmt.Sprintf("can't balance, because the source node[%d] is invalid", srcNode), err), nil
}
for _, dstNode := range req.GetDstNodeIDs() {
if !replica.Nodes.Contain(dstNode) {
if !replica.Contains(dstNode) {
msg := "destination nodes have to be in the same replica of source node"
log.Warn(msg)
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, msg), nil
@ -924,3 +964,204 @@ func (s *Server) CheckHealth(ctx context.Context, req *milvuspb.CheckHealthReque
return &milvuspb.CheckHealthResponse{IsHealthy: true, Reasons: errReasons}, nil
}
func (s *Server) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error) {
log := log.Ctx(ctx).With(
zap.String("rgName", req.GetResourceGroup()),
)
log.Info("create resource group request received")
if s.status.Load() != commonpb.StateCode_Healthy {
log.Warn(ErrCreateResourceGroupFailed.Error(), zap.Error(ErrNotHealthy))
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrCreateResourceGroupFailed.Error(), ErrNotHealthy), nil
}
err := s.meta.ResourceManager.AddResourceGroup(req.GetResourceGroup())
if err != nil {
log.Warn(ErrCreateResourceGroupFailed.Error(), zap.Error(err))
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrCreateResourceGroupFailed.Error(), err), nil
}
return successStatus, nil
}
func (s *Server) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error) {
log := log.Ctx(ctx).With(
zap.String("rgName", req.GetResourceGroup()),
)
log.Info("drop resource group request received")
if s.status.Load() != commonpb.StateCode_Healthy {
log.Warn(ErrDropResourceGroupFailed.Error(), zap.Error(ErrNotHealthy))
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrDropResourceGroupFailed.Error(), ErrNotHealthy), nil
}
err := s.meta.ResourceManager.RemoveResourceGroup(req.GetResourceGroup())
if err != nil {
log.Warn(ErrDropResourceGroupFailed.Error(), zap.Error(err))
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrDropResourceGroupFailed.Error(), err), nil
}
return successStatus, nil
}
func (s *Server) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error) {
log := log.Ctx(ctx).With(
zap.String("source", req.GetSourceResourceGroup()),
zap.String("target", req.GetTargetResourceGroup()),
)
log.Info("transfer node between resource group request received")
if s.status.Load() != commonpb.StateCode_Healthy {
log.Warn(ErrTransferNodeFailed.Error(), zap.Error(ErrNotHealthy))
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrTransferNodeFailed.Error(), ErrNotHealthy), nil
}
if ok := s.meta.ResourceManager.ContainResourceGroup(req.GetSourceResourceGroup()); !ok {
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument,
fmt.Sprintf("the source resource group[%s] doesn't exist", req.GetTargetResourceGroup()), meta.ErrRGNotExist), nil
}
if ok := s.meta.ResourceManager.ContainResourceGroup(req.GetTargetResourceGroup()); !ok {
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument,
fmt.Sprintf("the target resource group[%s] doesn't exist", req.GetTargetResourceGroup()), meta.ErrRGNotExist), nil
}
err := s.meta.ResourceManager.TransferNode(req.GetSourceResourceGroup(), req.GetTargetResourceGroup())
if err != nil {
log.Warn(ErrTransferNodeFailed.Error(), zap.Error(err))
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrTransferNodeFailed.Error(), err), nil
}
return successStatus, nil
}
func (s *Server) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error) {
log := log.Ctx(ctx).With(
zap.String("source", req.GetSourceResourceGroup()),
zap.String("target", req.GetTargetResourceGroup()),
zap.Int64("collectionID", req.GetCollectionID()),
)
log.Info("transfer replica request received")
if s.status.Load() != commonpb.StateCode_Healthy {
log.Warn(ErrTransferReplicaFailed.Error(), zap.Error(ErrNotHealthy))
return utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrTransferReplicaFailed.Error(), ErrNotHealthy), nil
}
if ok := s.meta.ResourceManager.ContainResourceGroup(req.GetSourceResourceGroup()); !ok {
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument,
fmt.Sprintf("the source resource group[%s] doesn't exist", req.GetSourceResourceGroup()), meta.ErrRGNotExist), nil
}
if ok := s.meta.ResourceManager.ContainResourceGroup(req.GetTargetResourceGroup()); !ok {
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument,
fmt.Sprintf("the target resource group[%s] doesn't exist", req.GetTargetResourceGroup()), meta.ErrRGNotExist), nil
}
// for now, we don't support to transfer replica of same collection to same resource group
replicas := s.meta.ReplicaManager.GetByCollectionAndRG(req.GetCollectionID(), req.GetSourceResourceGroup())
if len(replicas) < int(req.GetNumReplica()) {
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument,
fmt.Sprintf("found [%d] replicas of collection[%d] in source resource group[%s]",
len(replicas), req.GetCollectionID(), req.GetSourceResourceGroup())), nil
}
err := s.transferReplica(req.GetTargetResourceGroup(), replicas[:req.GetNumReplica()])
if err != nil {
return utils.WrapStatus(commonpb.ErrorCode_IllegalArgument, ErrTransferReplicaFailed.Error(), err), nil
}
return successStatus, nil
}
func (s *Server) transferReplica(targetRG string, replicas []*meta.Replica) error {
ret := make([]*meta.Replica, 0)
for _, replica := range replicas {
newReplica := replica.Clone()
newReplica.ResourceGroup = targetRG
ret = append(ret, newReplica)
}
err := utils.AssignNodesToReplicas(s.meta, targetRG, ret...)
if err != nil {
return err
}
return s.meta.ReplicaManager.Put(ret...)
}
func (s *Server) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error) {
log := log.Ctx(ctx)
log.Info("list resource group request received")
resp := &milvuspb.ListResourceGroupsResponse{
Status: successStatus,
}
if s.status.Load() != commonpb.StateCode_Healthy {
log.Warn(ErrListResourceGroupsFailed.Error(), zap.Error(ErrNotHealthy))
resp.Status = utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrListResourceGroupsFailed.Error(), ErrNotHealthy)
return resp, nil
}
resp.ResourceGroups = s.meta.ResourceManager.ListResourceGroups()
return resp, nil
}
func (s *Server) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error) {
log := log.Ctx(ctx).With(
zap.String("rgName", req.GetResourceGroup()),
)
log.Info("describe resource group request received")
resp := &querypb.DescribeResourceGroupResponse{
Status: successStatus,
}
if s.status.Load() != commonpb.StateCode_Healthy {
log.Warn(ErrDescribeResourceGroupFailed.Error(), zap.Error(ErrNotHealthy))
resp.Status = utils.WrapStatus(commonpb.ErrorCode_UnexpectedError, ErrDescribeResourceGroupFailed.Error(), ErrNotHealthy)
return resp, nil
}
rg, err := s.meta.ResourceManager.GetResourceGroup(req.GetResourceGroup())
if err != nil {
resp.Status = utils.WrapStatus(commonpb.ErrorCode_IllegalArgument, ErrDescribeResourceGroupFailed.Error(), err)
return resp, nil
}
loadedReplicas := make(map[int64]int32)
outgoingNodes := make(map[int64]int32)
replicasInRG := s.meta.GetByResourceGroup(req.GetResourceGroup())
for _, replica := range replicasInRG {
loadedReplicas[replica.GetCollectionID()]++
for _, node := range replica.GetNodes() {
if !s.meta.ContainsNode(replica.GetResourceGroup(), node) {
outgoingNodes[replica.GetCollectionID()]++
}
}
}
incomingNodes := make(map[int64]int32)
collections := s.meta.GetAll()
for _, collection := range collections {
replicas := s.meta.GetByCollection(collection)
for _, replica := range replicas {
if replica.GetResourceGroup() == req.GetResourceGroup() {
continue
}
for _, node := range replica.GetNodes() {
if s.meta.ContainsNode(req.GetResourceGroup(), node) {
incomingNodes[collection]++
}
}
}
}
resp.ResourceGroup = &querypb.ResourceGroupInfo{
Name: req.GetResourceGroup(),
Capacity: int32(rg.GetCapacity()),
NumAvailableNode: int32(len(rg.GetNodes())),
NumLoadedReplica: loadedReplicas,
NumOutgoingNode: outgoingNodes,
NumIncomingNode: incomingNodes,
}
return resp, nil
}

View File

@ -128,7 +128,8 @@ func (suite *ServiceSuite) SetupTest() {
suite.store = meta.NewMetaStore(suite.kv)
suite.dist = meta.NewDistributionManager()
suite.meta = meta.NewMeta(params.RandomIncrementIDAllocator(), suite.store)
suite.nodeMgr = session.NewNodeManager()
suite.meta = meta.NewMeta(params.RandomIncrementIDAllocator(), suite.store, suite.nodeMgr)
suite.broker = meta.NewMockBroker(suite.T())
suite.targetMgr = meta.NewTargetManager(suite.broker, suite.meta)
suite.targetObserver = observers.NewTargetObserver(
@ -137,9 +138,10 @@ func (suite *ServiceSuite) SetupTest() {
suite.dist,
suite.broker,
)
suite.nodeMgr = session.NewNodeManager()
for _, node := range suite.nodes {
suite.nodeMgr.Add(session.NewNodeInfo(node, "localhost"))
err := suite.meta.ResourceManager.AssignNode(meta.DefaultResourceGroupName, node)
suite.NoError(err)
}
suite.cluster = session.NewMockCluster(suite.T())
suite.jobScheduler = job.NewScheduler()
@ -334,6 +336,260 @@ func (suite *ServiceSuite) TestLoadCollection() {
suite.Contains(resp.Reason, ErrNotHealthy.Error())
}
func (suite *ServiceSuite) TestResourceGroup() {
ctx := context.Background()
server := suite.server
createRG := &milvuspb.CreateResourceGroupRequest{
ResourceGroup: "rg1",
}
resp, err := server.CreateResourceGroup(ctx, createRG)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_Success, resp.ErrorCode)
resp, err = server.CreateResourceGroup(ctx, createRG)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp.ErrorCode)
suite.Contains(resp.Reason, ErrCreateResourceGroupFailed.Error())
suite.Contains(resp.Reason, meta.ErrRGAlreadyExist.Error())
listRG := &milvuspb.ListResourceGroupsRequest{}
resp1, err := server.ListResourceGroups(ctx, listRG)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_Success, resp1.Status.ErrorCode)
suite.Len(resp1.ResourceGroups, 2)
server.nodeMgr.Add(session.NewNodeInfo(1011, "localhost"))
server.nodeMgr.Add(session.NewNodeInfo(1012, "localhost"))
server.nodeMgr.Add(session.NewNodeInfo(1013, "localhost"))
server.nodeMgr.Add(session.NewNodeInfo(1014, "localhost"))
server.meta.ResourceManager.AddResourceGroup("rg11")
server.meta.ResourceManager.AssignNode("rg11", 1011)
server.meta.ResourceManager.AssignNode("rg11", 1012)
server.meta.ResourceManager.AddResourceGroup("rg12")
server.meta.ResourceManager.AssignNode("rg12", 1013)
server.meta.ResourceManager.AssignNode("rg12", 1014)
server.meta.CollectionManager.PutCollection(utils.CreateTestCollection(1, 1))
server.meta.CollectionManager.PutCollection(utils.CreateTestCollection(2, 1))
server.meta.ReplicaManager.Put(meta.NewReplica(&querypb.Replica{
ID: 1,
CollectionID: 1,
Nodes: []int64{1011, 1013},
ResourceGroup: "rg11"},
typeutil.NewUniqueSet(1011, 1013)),
)
server.meta.ReplicaManager.Put(meta.NewReplica(&querypb.Replica{
ID: 2,
CollectionID: 2,
Nodes: []int64{1012, 1014},
ResourceGroup: "rg12"},
typeutil.NewUniqueSet(1012, 1014)),
)
describeRG := &querypb.DescribeResourceGroupRequest{
ResourceGroup: "rg11",
}
resp2, err := server.DescribeResourceGroup(ctx, describeRG)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_Success, resp2.Status.ErrorCode)
suite.Equal("rg11", resp2.GetResourceGroup().GetName())
suite.Equal(int32(2), resp2.GetResourceGroup().GetCapacity())
suite.Equal(int32(2), resp2.GetResourceGroup().GetNumAvailableNode())
suite.Equal(map[int64]int32{1: 1}, resp2.GetResourceGroup().GetNumLoadedReplica())
suite.Equal(map[int64]int32{2: 1}, resp2.GetResourceGroup().GetNumIncomingNode())
suite.Equal(map[int64]int32{1: 1}, resp2.GetResourceGroup().GetNumOutgoingNode())
dropRG := &milvuspb.DropResourceGroupRequest{
ResourceGroup: "rg1",
}
resp3, err := server.DropResourceGroup(ctx, dropRG)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_Success, resp3.ErrorCode)
resp4, err := server.ListResourceGroups(ctx, listRG)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_Success, resp4.Status.ErrorCode)
suite.Len(resp4.GetResourceGroups(), 3)
}
func (suite *ServiceSuite) TestResourceGroupFailed() {
ctx := context.Background()
server := suite.server
// illegal argument
describeRG := &querypb.DescribeResourceGroupRequest{
ResourceGroup: "rfffff",
}
resp, err := server.DescribeResourceGroup(ctx, describeRG)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_IllegalArgument, resp.Status.ErrorCode)
// server unhealthy
server.status.Store(commonpb.StateCode_Abnormal)
createRG := &milvuspb.CreateResourceGroupRequest{
ResourceGroup: "rg1",
}
resp1, err := server.CreateResourceGroup(ctx, createRG)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp1.ErrorCode)
listRG := &milvuspb.ListResourceGroupsRequest{}
resp2, err := server.ListResourceGroups(ctx, listRG)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp2.Status.ErrorCode)
describeRG = &querypb.DescribeResourceGroupRequest{
ResourceGroup: "rg1",
}
resp3, err := server.DescribeResourceGroup(ctx, describeRG)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp3.Status.ErrorCode)
dropRG := &milvuspb.DropResourceGroupRequest{
ResourceGroup: "rg1",
}
resp4, err := server.DropResourceGroup(ctx, dropRG)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp4.ErrorCode)
resp5, err := server.ListResourceGroups(ctx, listRG)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp5.Status.ErrorCode)
}
func (suite *ServiceSuite) TestTransferNode() {
ctx := context.Background()
server := suite.server
err := server.meta.ResourceManager.AddResourceGroup("rg1")
suite.NoError(err)
err = server.meta.ResourceManager.AddResourceGroup("rg2")
suite.NoError(err)
// test transfer node
resp, err := server.TransferNode(ctx, &milvuspb.TransferNodeRequest{
SourceResourceGroup: meta.DefaultResourceGroupName,
TargetResourceGroup: "rg1",
})
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_Success, resp.ErrorCode)
nodes, err := server.meta.ResourceManager.GetNodes("rg1")
suite.NoError(err)
suite.Len(nodes, 1)
// test transfer node meet non-exist source rg
resp, err = server.TransferNode(ctx, &milvuspb.TransferNodeRequest{
SourceResourceGroup: "rgggg",
TargetResourceGroup: meta.DefaultResourceGroupName,
})
suite.NoError(err)
suite.Contains(resp.Reason, meta.ErrRGNotExist.Error())
suite.Equal(commonpb.ErrorCode_IllegalArgument, resp.ErrorCode)
// test transfer node meet non-exist target rg
resp, err = server.TransferNode(ctx, &milvuspb.TransferNodeRequest{
SourceResourceGroup: meta.DefaultResourceGroupName,
TargetResourceGroup: "rgggg",
})
suite.NoError(err)
suite.Contains(resp.Reason, meta.ErrRGNotExist.Error())
suite.Equal(commonpb.ErrorCode_IllegalArgument, resp.ErrorCode)
// server unhealthy
server.status.Store(commonpb.StateCode_Abnormal)
resp, err = server.TransferNode(ctx, &milvuspb.TransferNodeRequest{
SourceResourceGroup: meta.DefaultResourceGroupName,
TargetResourceGroup: "rg1",
})
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_UnexpectedError, resp.ErrorCode)
}
func (suite *ServiceSuite) TestTransferReplica() {
ctx := context.Background()
server := suite.server
err := server.meta.ResourceManager.AddResourceGroup("rg1")
suite.NoError(err)
err = server.meta.ResourceManager.AddResourceGroup("rg2")
suite.NoError(err)
err = server.meta.ResourceManager.AddResourceGroup("rg3")
suite.NoError(err)
resp, err := suite.server.TransferReplica(ctx, &querypb.TransferReplicaRequest{
SourceResourceGroup: meta.DefaultResourceGroupName,
TargetResourceGroup: "rg1",
CollectionID: 1,
NumReplica: 2,
})
suite.NoError(err)
suite.Contains(resp.Reason, "found [0] replicas of collection[1] in source resource group")
resp, err = suite.server.TransferReplica(ctx, &querypb.TransferReplicaRequest{
SourceResourceGroup: "rgg",
TargetResourceGroup: meta.DefaultResourceGroupName,
CollectionID: 1,
NumReplica: 2,
})
suite.NoError(err)
suite.Equal(resp.ErrorCode, commonpb.ErrorCode_IllegalArgument)
resp, err = suite.server.TransferReplica(ctx, &querypb.TransferReplicaRequest{
SourceResourceGroup: meta.DefaultResourceGroupName,
TargetResourceGroup: "rgg",
CollectionID: 1,
NumReplica: 2,
})
suite.NoError(err)
suite.Equal(resp.ErrorCode, commonpb.ErrorCode_IllegalArgument)
suite.server.meta.Put(meta.NewReplica(&querypb.Replica{
CollectionID: 1,
ID: 111,
ResourceGroup: meta.DefaultResourceGroupName,
}, typeutil.NewUniqueSet(1)))
suite.server.meta.Put(meta.NewReplica(&querypb.Replica{
CollectionID: 1,
ID: 222,
ResourceGroup: meta.DefaultResourceGroupName,
}, typeutil.NewUniqueSet(2)))
suite.server.nodeMgr.Add(session.NewNodeInfo(1001, "localhost"))
suite.server.nodeMgr.Add(session.NewNodeInfo(1002, "localhost"))
suite.server.nodeMgr.Add(session.NewNodeInfo(1003, "localhost"))
suite.server.nodeMgr.Add(session.NewNodeInfo(1004, "localhost"))
suite.server.meta.AssignNode("rg1", 1001)
suite.server.meta.AssignNode("rg2", 1002)
suite.server.meta.AssignNode("rg3", 1003)
suite.server.meta.AssignNode("rg3", 1004)
resp, err = suite.server.TransferReplica(ctx, &querypb.TransferReplicaRequest{
SourceResourceGroup: meta.DefaultResourceGroupName,
TargetResourceGroup: "rg3",
CollectionID: 1,
NumReplica: 2,
})
suite.NoError(err)
suite.Equal(resp.ErrorCode, commonpb.ErrorCode_Success)
suite.Len(suite.server.meta.GetByResourceGroup("rg3"), 2)
// server unhealthy
server.status.Store(commonpb.StateCode_Abnormal)
resp, err = suite.server.TransferReplica(ctx, &querypb.TransferReplicaRequest{
SourceResourceGroup: meta.DefaultResourceGroupName,
TargetResourceGroup: "rg3",
CollectionID: 1,
NumReplica: 2,
})
suite.NoError(err)
suite.Equal(resp.ErrorCode, commonpb.ErrorCode_UnexpectedError)
}
func (suite *ServiceSuite) TestLoadCollectionFailed() {
suite.loadAll()
ctx := context.Background()
@ -365,6 +621,19 @@ func (suite *ServiceSuite) TestLoadCollectionFailed() {
suite.Equal(commonpb.ErrorCode_IllegalArgument, resp.ErrorCode)
suite.Contains(resp.Reason, job.ErrLoadParameterMismatched.Error())
}
// Test load with wrong rg num
for _, collection := range suite.collections {
req := &querypb.LoadCollectionRequest{
CollectionID: collection,
ReplicaNumber: suite.replicaNumber[collection] + 1,
ResourceGroups: []string{"rg1", "rg2"},
}
resp, err := server.LoadCollection(ctx, req)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_IllegalArgument, resp.ErrorCode)
suite.Contains(resp.Reason, ErrLoadUseWrongRG.Error())
}
}
func (suite *ServiceSuite) TestLoadPartition() {
@ -756,8 +1025,9 @@ func (suite *ServiceSuite) TestLoadBalance() {
// Test get balance first segment
for _, collection := range suite.collections {
replicas := suite.meta.ReplicaManager.GetByCollection(collection)
srcNode := replicas[0].GetNodes()[0]
dstNode := replicas[0].GetNodes()[1]
nodes := replicas[0].GetNodes()
srcNode := nodes[0]
dstNode := nodes[1]
suite.updateCollectionStatus(collection, querypb.LoadStatus_Loaded)
suite.updateSegmentDist(collection, srcNode)
segments := suite.getAllSegments(collection)
@ -883,8 +1153,9 @@ func (suite *ServiceSuite) TestLoadBalanceFailed() {
// Test load balance with not fully loaded
for _, collection := range suite.collections {
replicas := suite.meta.ReplicaManager.GetByCollection(collection)
srcNode := replicas[0].GetNodes()[0]
dstNode := replicas[0].GetNodes()[1]
nodes := replicas[0].GetNodes()
srcNode := nodes[0]
dstNode := nodes[1]
suite.updateCollectionStatus(collection, querypb.LoadStatus_Loading)
segments := suite.getAllSegments(collection)
req := &querypb.LoadBalanceRequest{
@ -926,8 +1197,9 @@ func (suite *ServiceSuite) TestLoadBalanceFailed() {
// Test balance task failed
for _, collection := range suite.collections {
replicas := suite.meta.ReplicaManager.GetByCollection(collection)
srcNode := replicas[0].GetNodes()[0]
dstNode := replicas[0].GetNodes()[1]
nodes := replicas[0].GetNodes()
srcNode := nodes[0]
dstNode := nodes[1]
suite.updateCollectionStatus(collection, querypb.LoadStatus_Loaded)
suite.updateSegmentDist(collection, srcNode)
segments := suite.getAllSegments(collection)
@ -1171,6 +1443,11 @@ func (suite *ServiceSuite) TestGetShardLeadersFailed() {
suite.Equal(commonpb.ErrorCode_NoReplicaAvailable, resp.Status.ErrorCode)
// Segment not fully loaded
for _, node := range suite.nodes {
suite.dist.SegmentDistManager.Update(node)
suite.dist.ChannelDistManager.Update(node)
suite.dist.LeaderViewManager.Update(node)
}
suite.updateChannelDistWithoutSegment(collection)
suite.fetchHeartbeats(time.Now())
resp, err = server.GetShardLeaders(ctx, req)

View File

@ -130,7 +130,7 @@ func (suite *TaskSuite) SetupTest() {
suite.kv = etcdkv.NewEtcdKV(cli, config.MetaRootPath.GetValue())
suite.store = meta.NewMetaStore(suite.kv)
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), suite.store)
suite.meta = meta.NewMeta(RandomIncrementIDAllocator(), suite.store, session.NewNodeManager())
suite.dist = meta.NewDistributionManager()
suite.broker = meta.NewMockBroker(suite.T())
suite.target = meta.NewTargetManager(suite.broker, suite.meta)
@ -1260,14 +1260,14 @@ func (suite *TaskSuite) newScheduler() *taskScheduler {
}
func createReplica(collection int64, nodes ...int64) *meta.Replica {
return &meta.Replica{
Replica: &querypb.Replica{
return meta.NewReplica(
&querypb.Replica{
ID: rand.Int63()/2 + 1,
CollectionID: collection,
Nodes: nodes,
},
Nodes: typeutil.NewUniqueSet(nodes...),
}
typeutil.NewUniqueSet(nodes...),
)
}
func TestTask(t *testing.T) {

View File

@ -18,12 +18,22 @@ package utils
import (
"context"
"errors"
"fmt"
"math/rand"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/samber/lo"
"go.uber.org/zap"
)
var (
ErrGetNodesFromRG = errors.New("failed to get node from rg")
ErrNoReplicaFound = errors.New("no replica found during assign nodes")
ErrReplicasInconsistent = errors.New("all replicas should belong to same collection during assign nodes")
ErrUseWrongNumRG = errors.New("resource num can only be 0, 1 or same as replica number")
)
func GetReplicaNodesInfo(replicaMgr *meta.ReplicaManager, nodeMgr *session.NodeManager, replicaID int64) []*session.NodeInfo {
@ -32,8 +42,8 @@ func GetReplicaNodesInfo(replicaMgr *meta.ReplicaManager, nodeMgr *session.NodeM
return nil
}
nodes := make([]*session.NodeInfo, 0, len(replica.Nodes))
for node := range replica.Nodes {
nodes := make([]*session.NodeInfo, 0, len(replica.GetNodes()))
for _, node := range replica.GetNodes() {
nodes = append(nodes, nodeMgr.Get(node))
}
return nodes
@ -64,7 +74,7 @@ func GroupNodesByReplica(replicaMgr *meta.ReplicaManager, collectionID int64, no
replicas := replicaMgr.GetByCollection(collectionID)
for _, replica := range replicas {
for _, node := range nodes {
if replica.Nodes.Contain(node) {
if replica.Contains(node) {
ret[replica.ID] = append(ret[replica.ID], node)
}
}
@ -90,7 +100,7 @@ func GroupSegmentsByReplica(replicaMgr *meta.ReplicaManager, collectionID int64,
replicas := replicaMgr.GetByCollection(collectionID)
for _, replica := range replicas {
for _, segment := range segments {
if replica.Nodes.Contain(segment.Node) {
if replica.Contains(segment.Node) {
ret[replica.ID] = append(ret[replica.ID], segment)
}
}
@ -101,24 +111,92 @@ func GroupSegmentsByReplica(replicaMgr *meta.ReplicaManager, collectionID int64,
// AssignNodesToReplicas assigns nodes to the given replicas,
// all given replicas must be the same collection,
// the given replicas have to be not in ReplicaManager
func AssignNodesToReplicas(nodeMgr *session.NodeManager, replicas ...*meta.Replica) {
replicaNumber := len(replicas)
nodes := nodeMgr.GetAll()
rand.Shuffle(len(nodes), func(i, j int) {
nodes[i], nodes[j] = nodes[j], nodes[i]
func AssignNodesToReplicas(m *meta.Meta, rgName string, replicas ...*meta.Replica) error {
replicaIDs := lo.Map(replicas, func(r *meta.Replica, _ int) int64 { return r.GetID() })
log := log.With(zap.Int64("collectionID", replicas[0].GetCollectionID()),
zap.Int64s("replicas", replicaIDs),
zap.String("rgName", rgName),
)
if len(replicaIDs) == 0 {
return nil
}
nodeGroup, err := m.ResourceManager.GetNodes(rgName)
if err != nil {
log.Error("failed to get nodes", zap.Error(err))
return err
}
if len(nodeGroup) < len(replicaIDs) {
log.Error(meta.ErrNodeNotEnough.Error())
return meta.ErrNodeNotEnough
}
rand.Shuffle(len(nodeGroup), func(i, j int) {
nodeGroup[i], nodeGroup[j] = nodeGroup[j], nodeGroup[i]
})
for i, node := range nodes {
replicas[i%replicaNumber].AddNode(node.ID())
log.Info("assign nodes to replicas",
zap.Int64s("nodes", nodeGroup),
)
for i, node := range nodeGroup {
replicas[i%len(replicas)].AddNode(node)
}
return nil
}
// SpawnReplicas spawns replicas for given collection, assign nodes to them, and save them
func SpawnReplicas(replicaMgr *meta.ReplicaManager, nodeMgr *session.NodeManager, collection int64, replicaNumber int32) ([]*meta.Replica, error) {
replicas, err := replicaMgr.Spawn(collection, replicaNumber)
func SpawnAllReplicasInRG(m *meta.Meta, collection int64, replicaNumber int32, rgName string) ([]*meta.Replica, error) {
replicas, err := m.ReplicaManager.Spawn(collection, replicaNumber, rgName)
if err != nil {
return nil, err
}
AssignNodesToReplicas(nodeMgr, replicas...)
return replicas, replicaMgr.Put(replicas...)
err = AssignNodesToReplicas(m, rgName, replicas...)
if err != nil {
return nil, err
}
return replicas, m.ReplicaManager.Put(replicas...)
}
func checkResourceGroup(collectionID int64, replicaNumber int32, resourceGroups []string) error {
if len(resourceGroups) != 0 && len(resourceGroups) != 1 && len(resourceGroups) != int(replicaNumber) {
return ErrUseWrongNumRG
}
return nil
}
func SpawnReplicasWithRG(m *meta.Meta, collection int64, resourceGroups []string, replicaNumber int32) ([]*meta.Replica, error) {
if err := checkResourceGroup(collection, replicaNumber, resourceGroups); err != nil {
return nil, err
}
if len(resourceGroups) == 0 {
return SpawnAllReplicasInRG(m, collection, replicaNumber, meta.DefaultResourceGroupName)
}
if len(resourceGroups) == 1 {
return SpawnAllReplicasInRG(m, collection, replicaNumber, resourceGroups[0])
}
replicaSet := make([]*meta.Replica, 0)
for _, rgName := range resourceGroups {
if !m.ResourceManager.ContainResourceGroup(rgName) {
return nil, meta.ErrRGNotExist
}
replicas, err := m.ReplicaManager.Spawn(collection, 1, rgName)
if err != nil {
return nil, err
}
err = AssignNodesToReplicas(m, rgName, replicas...)
if err != nil {
return nil, err
}
replicaSet = append(replicaSet, replicas...)
}
return replicaSet, m.ReplicaManager.Put(replicaSet...)
}

View File

@ -0,0 +1,110 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package utils
import (
"testing"
etcdKV "github.com/milvus-io/milvus/internal/kv/etcd"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/internal/util/etcd"
)
func TestSpawnReplicasWithRG(t *testing.T) {
Params.Init()
config := GenerateEtcdConfig()
cli, _ := etcd.GetEtcdClient(
config.UseEmbedEtcd.GetAsBool(),
config.EtcdUseSSL.GetAsBool(),
config.Endpoints.GetAsStrings(),
config.EtcdTLSCert.GetValue(),
config.EtcdTLSKey.GetValue(),
config.EtcdTLSCACert.GetValue(),
config.EtcdTLSMinVersion.GetValue())
kv := etcdKV.NewEtcdKV(cli, config.MetaRootPath.GetValue())
store := meta.NewMetaStore(kv)
nodeMgr := session.NewNodeManager()
m := meta.NewMeta(RandomIncrementIDAllocator(), store, nodeMgr)
m.ResourceManager.AddResourceGroup("rg1")
m.ResourceManager.AddResourceGroup("rg2")
m.ResourceManager.AddResourceGroup("rg3")
for i := 1; i < 10; i++ {
nodeMgr.Add(session.NewNodeInfo(int64(i), "localhost"))
if i%3 == 0 {
m.ResourceManager.AssignNode("rg1", int64(i))
}
if i%3 == 1 {
m.ResourceManager.AssignNode("rg2", int64(i))
}
if i%3 == 2 {
m.ResourceManager.AssignNode("rg3", int64(i))
}
}
type args struct {
m *meta.Meta
collection int64
resourceGroups []string
replicaNumber int32
}
tests := []struct {
name string
args args
wantReplicaNum int
wantErr bool
}{
{
name: "test 3 replica on 1 rg",
args: args{m, 1000, []string{"rg1"}, 3},
wantReplicaNum: 3,
wantErr: false,
},
{
name: "test 3 replica on 2 rg",
args: args{m, 1000, []string{"rg1", "rg2"}, 3},
wantReplicaNum: 0,
wantErr: true,
},
{
name: "test 3 replica on 3 rg",
args: args{m, 1000, []string{"rg1", "rg2", "rg3"}, 3},
wantReplicaNum: 3,
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := SpawnReplicasWithRG(tt.args.m, tt.args.collection, tt.args.resourceGroups, tt.args.replicaNumber)
if (err != nil) != tt.wantErr {
t.Errorf("SpawnReplicasWithRG() error = %v, wantErr %v", err, tt.wantErr)
return
}
if len(got) != tt.wantReplicaNum {
t.Errorf("SpawnReplicasWithRG() = %v, want %d replicas", got, tt.args.replicaNumber)
}
})
}
}

View File

@ -52,14 +52,15 @@ func CreateTestChannel(collection, node, version int64, channel string) *meta.Dm
}
func CreateTestReplica(id, collectionID int64, nodes []int64) *meta.Replica {
return &meta.Replica{
Replica: &querypb.Replica{
ID: id,
CollectionID: collectionID,
Nodes: nodes,
return meta.NewReplica(
&querypb.Replica{
ID: id,
CollectionID: collectionID,
Nodes: nodes,
ResourceGroup: meta.DefaultResourceGroupName,
},
Nodes: typeutil.NewUniqueSet(nodes...),
}
typeutil.NewUniqueSet(nodes...),
)
}
func CreateTestCollection(collection int64, replica int32) *meta.Collection {

View File

@ -20,7 +20,6 @@ import (
"fmt"
"github.com/milvus-io/milvus-proto/go-api/commonpb"
"github.com/milvus-io/milvus-proto/go-api/milvuspb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
@ -148,11 +147,3 @@ func MergeDmChannelInfo(infos []*datapb.VchannelInfo) *meta.DmChannel {
return dmChannel
}
func Replica2ReplicaInfo(replica *querypb.Replica) *milvuspb.ReplicaInfo {
return &milvuspb.ReplicaInfo{
ReplicaID: replica.GetID(),
CollectionID: replica.GetCollectionID(),
NodeIds: replica.GetNodes(),
}
}

View File

@ -1304,6 +1304,13 @@ type ProxyComponent interface {
// RenameCollection rename collection from old name to new name
RenameCollection(ctx context.Context, req *milvuspb.RenameCollectionRequest) (*commonpb.Status, error)
CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error)
DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error)
TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error)
TransferReplica(ctx context.Context, req *milvuspb.TransferReplicaRequest) (*commonpb.Status, error)
ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error)
DescribeResourceGroup(ctx context.Context, req *milvuspb.DescribeResourceGroupRequest) (*milvuspb.DescribeResourceGroupResponse, error)
}
// QueryNode is the interface `querynode` package implements
@ -1376,6 +1383,13 @@ type QueryCoord interface {
GetShardLeaders(ctx context.Context, req *querypb.GetShardLeadersRequest) (*querypb.GetShardLeadersResponse, error)
CheckHealth(ctx context.Context, req *milvuspb.CheckHealthRequest) (*milvuspb.CheckHealthResponse, error)
CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest) (*commonpb.Status, error)
DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest) (*commonpb.Status, error)
TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest) (*commonpb.Status, error)
TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest) (*commonpb.Status, error)
ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest) (*milvuspb.ListResourceGroupsResponse, error)
DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest) (*querypb.DescribeResourceGroupResponse, error)
}
// QueryCoordComponent is used by grpc server of QueryCoord

View File

@ -101,3 +101,27 @@ func (m *GrpcQueryCoordClient) GetReplicas(ctx context.Context, in *milvuspb.Get
func (m *GrpcQueryCoordClient) GetShardLeaders(ctx context.Context, in *querypb.GetShardLeadersRequest, opts ...grpc.CallOption) (*querypb.GetShardLeadersResponse, error) {
return &querypb.GetShardLeadersResponse{}, m.Err
}
func (m *GrpcQueryCoordClient) CreateResourceGroup(ctx context.Context, req *milvuspb.CreateResourceGroupRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
return &commonpb.Status{}, m.Err
}
func (m *GrpcQueryCoordClient) DropResourceGroup(ctx context.Context, req *milvuspb.DropResourceGroupRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
return &commonpb.Status{}, m.Err
}
func (m *GrpcQueryCoordClient) TransferNode(ctx context.Context, req *milvuspb.TransferNodeRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
return &commonpb.Status{}, m.Err
}
func (m *GrpcQueryCoordClient) TransferReplica(ctx context.Context, req *querypb.TransferReplicaRequest, opts ...grpc.CallOption) (*commonpb.Status, error) {
return &commonpb.Status{}, m.Err
}
func (m *GrpcQueryCoordClient) ListResourceGroups(ctx context.Context, req *milvuspb.ListResourceGroupsRequest, opts ...grpc.CallOption) (*milvuspb.ListResourceGroupsResponse, error) {
return &milvuspb.ListResourceGroupsResponse{}, m.Err
}
func (m *GrpcQueryCoordClient) DescribeResourceGroup(ctx context.Context, req *querypb.DescribeResourceGroupRequest, opts ...grpc.CallOption) (*querypb.DescribeResourceGroupResponse, error) {
return &querypb.DescribeResourceGroupResponse{}, m.Err
}

View File

@ -888,8 +888,11 @@ type queryCoordConfig struct {
CheckHandoffInterval ParamItem `refreshable:"true"`
EnableActiveStandby ParamItem `refreshable:"false"`
NextTargetSurviveTime ParamItem `refreshable:"true"`
UpdateNextTargetInterval ParamItem `refreshable:"false"`
NextTargetSurviveTime ParamItem `refreshable:"true"`
UpdateNextTargetInterval ParamItem `refreshable:"false"`
CheckNodeInReplicaInterval ParamItem `refreshable:"false"`
CheckResourceGroupInterval ParamItem `refreshable:"false"`
EnableRGAutoRecover ParamItem `refreshable:"true"`
}
func (p *queryCoordConfig) init(base *BaseTable) {
@ -1040,6 +1043,30 @@ func (p *queryCoordConfig) init(base *BaseTable) {
PanicIfEmpty: true,
}
p.UpdateNextTargetInterval.Init(base.mgr)
p.CheckNodeInReplicaInterval = ParamItem{
Key: "queryCoord.checkNodeInReplicaInterval",
Version: "2.2.3",
DefaultValue: "60",
PanicIfEmpty: true,
}
p.CheckNodeInReplicaInterval.Init(base.mgr)
p.CheckResourceGroupInterval = ParamItem{
Key: "queryCoord.checkResourceGroupInterval",
Version: "2.2.3",
DefaultValue: "30",
PanicIfEmpty: true,
}
p.CheckResourceGroupInterval.Init(base.mgr)
p.EnableRGAutoRecover = ParamItem{
Key: "queryCoord.enableRGAutoRecover",
Version: "2.2.3",
DefaultValue: "true",
PanicIfEmpty: true,
}
p.EnableRGAutoRecover.Init(base.mgr)
}
// /////////////////////////////////////////////////////////////////////////////

View File

@ -243,6 +243,28 @@ func TestComponentParam(t *testing.T) {
Params := params.QueryCoordCfg
assert.Equal(t, Params.EnableActiveStandby.GetAsBool(), false)
t.Logf("queryCoord EnableActiveStandby = %t", Params.EnableActiveStandby.GetAsBool())
params.Save("queryCoord.NextTargetSurviveTime", "100")
NextTargetSurviveTime := Params.NextTargetSurviveTime
assert.Equal(t, int64(100), NextTargetSurviveTime.GetAsInt64())
params.Save("queryCoord.UpdateNextTargetInterval", "100")
UpdateNextTargetInterval := Params.UpdateNextTargetInterval
assert.Equal(t, int64(100), UpdateNextTargetInterval.GetAsInt64())
params.Save("queryCoord.checkNodeInReplicaInterval", "100")
checkNodeInReplicaInterval := Params.CheckNodeInReplicaInterval
assert.Equal(t, 100, checkNodeInReplicaInterval.GetAsInt())
params.Save("queryCoord.checkResourceGroupInterval", "10")
checkResourceGroupInterval := Params.CheckResourceGroupInterval
assert.Equal(t, 10, checkResourceGroupInterval.GetAsInt())
enableResourceGroupAutoRecover := Params.EnableRGAutoRecover
assert.Equal(t, true, enableResourceGroupAutoRecover.GetAsBool())
params.Save("queryCoord.enableRGAutoRecover", "false")
enableResourceGroupAutoRecover = Params.EnableRGAutoRecover
assert.Equal(t, false, enableResourceGroupAutoRecover.GetAsBool())
})
t.Run("test queryNodeConfig", func(t *testing.T) {