mirror of https://github.com/milvus-io/milvus.git
Signed-off-by: cai.zhang <cai.zhang@zilliz.com>pull/21868/head
parent
bb086f47e4
commit
bcae97b125
|
@ -238,6 +238,7 @@ func (s *Server) Register() error {
|
|||
s.icSession.Register()
|
||||
s.session.Register()
|
||||
if s.enableActiveStandBy {
|
||||
s.icSession.ProcessActiveStandBy(nil)
|
||||
s.session.ProcessActiveStandBy(s.activateFunc)
|
||||
}
|
||||
go s.session.LivenessCheck(s.serverLoopCtx, func() {
|
||||
|
@ -261,6 +262,7 @@ func (s *Server) initSession() error {
|
|||
return errors.New("failed to initialize IndexCoord session")
|
||||
}
|
||||
s.icSession.Init(typeutil.IndexCoordRole, s.address, true, true)
|
||||
s.icSession.SetEnableActiveStandBy(s.enableActiveStandBy)
|
||||
|
||||
s.session = sessionutil.NewSession(s.ctx, Params.EtcdCfg.MetaRootPath.GetValue(), s.etcdCli)
|
||||
if s.session == nil {
|
||||
|
@ -276,7 +278,30 @@ func (s *Server) Init() error {
|
|||
var err error
|
||||
s.stateCode.Store(commonpb.StateCode_Initializing)
|
||||
s.factory.Init(Params)
|
||||
if err = s.initSession(); err != nil {
|
||||
return err
|
||||
}
|
||||
if s.enableActiveStandBy {
|
||||
s.activateFunc = func() {
|
||||
log.Info("DataCoord switch from standby to active, activating")
|
||||
if err := s.initDataCoord(); err != nil {
|
||||
log.Warn("DataCoord init failed", zap.Error(err))
|
||||
// TODO: panic if error occurred?
|
||||
}
|
||||
s.startDataCoord()
|
||||
s.stateCode.Store(commonpb.StateCode_Healthy)
|
||||
log.Info("DataCoord startup success")
|
||||
}
|
||||
s.stateCode.Store(commonpb.StateCode_StandBy)
|
||||
log.Info("DataCoord enter standby mode successfully")
|
||||
return nil
|
||||
}
|
||||
|
||||
return s.initDataCoord()
|
||||
}
|
||||
|
||||
func (s *Server) initDataCoord() error {
|
||||
var err error
|
||||
if err = s.initRootCoordClient(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -316,6 +341,8 @@ func (s *Server) Init() error {
|
|||
s.initGarbageCollection(storageCli)
|
||||
s.initIndexBuilder(storageCli)
|
||||
|
||||
s.serverLoopCtx, s.serverLoopCancel = context.WithCancel(s.ctx)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -327,35 +354,27 @@ func (s *Server) Init() error {
|
|||
// datanodes etcd watch, etcd alive check and flush completed status check
|
||||
// 4. set server state to Healthy
|
||||
func (s *Server) Start() error {
|
||||
if !s.enableActiveStandBy {
|
||||
s.startDataCoord()
|
||||
s.stateCode.Store(commonpb.StateCode_Healthy)
|
||||
log.Info("DataCoord startup successfully")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) startDataCoord() {
|
||||
if Params.DataCoordCfg.EnableCompaction.GetAsBool() {
|
||||
s.compactionHandler.start()
|
||||
s.compactionTrigger.start()
|
||||
}
|
||||
|
||||
if s.enableActiveStandBy {
|
||||
s.activateFunc = func() {
|
||||
// todo complete the activateFunc
|
||||
log.Info("datacoord switch from standby to active, activating")
|
||||
s.startServerLoop()
|
||||
s.stateCode.Store(commonpb.StateCode_Healthy)
|
||||
logutil.Logger(s.ctx).Info("startup success")
|
||||
}
|
||||
s.stateCode.Store(commonpb.StateCode_StandBy)
|
||||
logutil.Logger(s.ctx).Info("DataCoord enter standby mode successfully")
|
||||
} else {
|
||||
s.startServerLoop()
|
||||
s.stateCode.Store(commonpb.StateCode_Healthy)
|
||||
logutil.Logger(s.ctx).Info("DataCoord startup successfully")
|
||||
}
|
||||
|
||||
s.startServerLoop()
|
||||
// DataCoord (re)starts successfully and starts to collection segment stats
|
||||
// data from all DataNode.
|
||||
// This will prevent DataCoord from missing out any important segment stats
|
||||
// data while offline.
|
||||
log.Info("DataCoord (re)starts successfully and re-collecting segment stats from DataNodes")
|
||||
s.reCollectSegmentStats(s.ctx)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) initCluster() error {
|
||||
|
@ -516,7 +535,6 @@ func (s *Server) initIndexNodeManager() {
|
|||
}
|
||||
|
||||
func (s *Server) startServerLoop() {
|
||||
s.serverLoopCtx, s.serverLoopCancel = context.WithCancel(s.ctx)
|
||||
s.serverLoopWg.Add(3)
|
||||
s.startDataNodeTtLoop(s.serverLoopCtx)
|
||||
s.startWatchService(s.serverLoopCtx)
|
||||
|
|
|
@ -164,16 +164,17 @@ func (s *Server) startGrpcLoop(grpcPort int) {
|
|||
}
|
||||
|
||||
func (s *Server) start() error {
|
||||
err := s.dataCoord.Start()
|
||||
if err != nil {
|
||||
log.Error("DataCoord start failed", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
err = s.dataCoord.Register()
|
||||
err := s.dataCoord.Register()
|
||||
if err != nil {
|
||||
log.Debug("DataCoord register service failed", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
err = s.dataCoord.Start()
|
||||
if err != nil {
|
||||
log.Error("DataCoord start failed", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -241,11 +241,11 @@ func (s *Server) startGrpcLoop(grpcPort int) {
|
|||
|
||||
// start starts QueryCoord's grpc service.
|
||||
func (s *Server) start() error {
|
||||
err := s.queryCoord.Start()
|
||||
err := s.queryCoord.Register()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return s.queryCoord.Register()
|
||||
return s.queryCoord.Start()
|
||||
}
|
||||
|
||||
// Stop stops QueryCoord's grpc service.
|
||||
|
|
|
@ -256,16 +256,17 @@ func (s *Server) startGrpcLoop(port int) {
|
|||
}
|
||||
|
||||
func (s *Server) start() error {
|
||||
log.Debug("RootCoord Core start ...")
|
||||
if err := s.rootCoord.Start(); err != nil {
|
||||
log.Error(err.Error())
|
||||
return err
|
||||
}
|
||||
log.Info("RootCoord Core start ...")
|
||||
if err := s.rootCoord.Register(); err != nil {
|
||||
log.Error("RootCoord registers service failed", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.rootCoord.Start(); err != nil {
|
||||
log.Error("RootCoord start service failed", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -144,11 +144,7 @@ func (s *Server) Register() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) Init() error {
|
||||
log.Info("QueryCoord start init",
|
||||
zap.String("meta-root-path", Params.EtcdCfg.MetaRootPath.GetValue()),
|
||||
zap.String("address", s.address))
|
||||
|
||||
func (s *Server) initSession() error {
|
||||
// Init QueryCoord session
|
||||
s.session = sessionutil.NewSession(s.ctx, Params.EtcdCfg.MetaRootPath.GetValue(), s.etcdCli)
|
||||
if s.session == nil {
|
||||
|
@ -157,7 +153,39 @@ func (s *Server) Init() error {
|
|||
s.session.Init(typeutil.QueryCoordRole, s.address, true, true)
|
||||
s.enableActiveStandBy = Params.QueryCoordCfg.EnableActiveStandby.GetAsBool()
|
||||
s.session.SetEnableActiveStandBy(s.enableActiveStandBy)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) Init() error {
|
||||
log.Info("QueryCoord start init",
|
||||
zap.String("meta-root-path", Params.EtcdCfg.MetaRootPath.GetValue()),
|
||||
zap.String("address", s.address))
|
||||
|
||||
if err := s.initSession(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.enableActiveStandBy {
|
||||
s.activateFunc = func() {
|
||||
log.Info("QueryCoord switch from standby to active, activating")
|
||||
if err := s.initQueryCoord(); err != nil {
|
||||
log.Warn("QueryCoord init failed", zap.Error(err))
|
||||
}
|
||||
if err := s.startQueryCoord(); err != nil {
|
||||
log.Warn("QueryCoord init failed", zap.Error(err))
|
||||
}
|
||||
s.UpdateStateCode(commonpb.StateCode_Healthy)
|
||||
log.Info("QueryCoord startup success")
|
||||
}
|
||||
s.UpdateStateCode(commonpb.StateCode_StandBy)
|
||||
log.Info("QueryCoord enter standby mode successfully")
|
||||
return nil
|
||||
}
|
||||
|
||||
return s.initQueryCoord()
|
||||
}
|
||||
|
||||
func (s *Server) initQueryCoord() error {
|
||||
// Init KV
|
||||
etcdKV := etcdkv.NewEtcdKV(s.etcdCli, Params.EtcdCfg.MetaRootPath.GetValue())
|
||||
s.kv = etcdKV
|
||||
|
@ -318,6 +346,17 @@ func (s *Server) afterStart() {
|
|||
}
|
||||
|
||||
func (s *Server) Start() error {
|
||||
if !s.enableActiveStandBy {
|
||||
if err := s.startQueryCoord(); err != nil {
|
||||
return err
|
||||
}
|
||||
s.UpdateStateCode(commonpb.StateCode_Healthy)
|
||||
log.Info("QueryCoord started")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) startQueryCoord() error {
|
||||
log.Info("start watcher...")
|
||||
sessions, revision, err := s.session.GetSessions(typeutil.QueryNodeRole)
|
||||
if err != nil {
|
||||
|
@ -339,22 +378,8 @@ func (s *Server) Start() error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.enableActiveStandBy {
|
||||
s.activateFunc = func() {
|
||||
log.Info("querycoord switch from standby to active, activating")
|
||||
s.startServerLoop()
|
||||
s.UpdateStateCode(commonpb.StateCode_Healthy)
|
||||
}
|
||||
s.UpdateStateCode(commonpb.StateCode_StandBy)
|
||||
} else {
|
||||
s.startServerLoop()
|
||||
s.UpdateStateCode(commonpb.StateCode_Healthy)
|
||||
}
|
||||
log.Info("QueryCoord started")
|
||||
|
||||
s.startServerLoop()
|
||||
s.afterStart()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -27,8 +27,10 @@ import (
|
|||
"github.com/stretchr/testify/suite"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/commonpb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/milvuspb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/log"
|
||||
coordMocks "github.com/milvus-io/milvus/internal/mocks"
|
||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/checkers"
|
||||
|
@ -39,6 +41,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/querycoordv2/params"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
||||
"github.com/milvus-io/milvus/internal/querycoordv2/task"
|
||||
"github.com/milvus-io/milvus/internal/util/commonpbutil"
|
||||
"github.com/milvus-io/milvus/internal/util/etcd"
|
||||
"github.com/milvus-io/milvus/internal/util/paramtable"
|
||||
)
|
||||
|
@ -244,14 +247,41 @@ func (suite *ServerSuite) TestEnableActiveStandby() {
|
|||
|
||||
suite.server, err = newQueryCoord()
|
||||
suite.NoError(err)
|
||||
suite.hackServer()
|
||||
err = suite.server.Start()
|
||||
mockRootCoord := coordMocks.NewRootCoord(suite.T())
|
||||
mockDataCoord := coordMocks.NewDataCoord(suite.T())
|
||||
|
||||
mockRootCoord.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
|
||||
Status: successStatus,
|
||||
Schema: &schemapb.CollectionSchema{},
|
||||
}, nil).Maybe()
|
||||
for _, collection := range suite.collections {
|
||||
if suite.loadTypes[collection] == querypb.LoadType_LoadCollection {
|
||||
req := &milvuspb.ShowPartitionsRequest{
|
||||
Base: commonpbutil.NewMsgBase(
|
||||
commonpbutil.WithMsgType(commonpb.MsgType_ShowPartitions),
|
||||
),
|
||||
CollectionID: collection,
|
||||
}
|
||||
mockRootCoord.EXPECT().ShowPartitionsInternal(mock.Anything, req).Return(&milvuspb.ShowPartitionsResponse{
|
||||
Status: successStatus,
|
||||
PartitionIDs: suite.partitions[collection],
|
||||
}, nil).Maybe()
|
||||
}
|
||||
suite.expectGetRecoverInfoByMockDataCoord(collection, mockDataCoord)
|
||||
|
||||
}
|
||||
err = suite.server.SetRootCoord(mockRootCoord)
|
||||
suite.NoError(err)
|
||||
err = suite.server.SetDataCoord(mockDataCoord)
|
||||
suite.NoError(err)
|
||||
//suite.hackServer()
|
||||
states1, err := suite.server.GetComponentStates(context.Background())
|
||||
suite.NoError(err)
|
||||
suite.Equal(commonpb.StateCode_StandBy, states1.GetState().GetStateCode())
|
||||
err = suite.server.Register()
|
||||
suite.NoError(err)
|
||||
err = suite.server.Start()
|
||||
suite.NoError(err)
|
||||
|
||||
states2, err := suite.server.GetComponentStates(context.Background())
|
||||
suite.NoError(err)
|
||||
|
@ -351,6 +381,43 @@ func (suite *ServerSuite) expectGetRecoverInfo(collection int64) {
|
|||
}
|
||||
}
|
||||
|
||||
func (suite *ServerSuite) expectGetRecoverInfoByMockDataCoord(collection int64, dataCoord *coordMocks.DataCoord) {
|
||||
var (
|
||||
vChannels []*datapb.VchannelInfo
|
||||
segmentBinlogs []*datapb.SegmentBinlogs
|
||||
)
|
||||
|
||||
for partition, segments := range suite.segments[collection] {
|
||||
segments := segments
|
||||
getRecoveryInfoRequest := &datapb.GetRecoveryInfoRequest{
|
||||
Base: commonpbutil.NewMsgBase(
|
||||
commonpbutil.WithMsgType(commonpb.MsgType_GetRecoveryInfo),
|
||||
),
|
||||
CollectionID: collection,
|
||||
PartitionID: partition,
|
||||
}
|
||||
vChannels = []*datapb.VchannelInfo{}
|
||||
for _, channel := range suite.channels[collection] {
|
||||
vChannels = append(vChannels, &datapb.VchannelInfo{
|
||||
CollectionID: collection,
|
||||
ChannelName: channel,
|
||||
})
|
||||
}
|
||||
segmentBinlogs = []*datapb.SegmentBinlogs{}
|
||||
for _, segment := range segments {
|
||||
segmentBinlogs = append(segmentBinlogs, &datapb.SegmentBinlogs{
|
||||
SegmentID: segment,
|
||||
InsertChannel: suite.channels[collection][segment%2],
|
||||
})
|
||||
}
|
||||
dataCoord.EXPECT().GetRecoveryInfo(mock.Anything, getRecoveryInfoRequest).Maybe().Return(&datapb.GetRecoveryInfoResponse{
|
||||
Status: successStatus,
|
||||
Channels: vChannels,
|
||||
Binlogs: segmentBinlogs,
|
||||
}, nil)
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *ServerSuite) updateCollectionStatus(collectionID int64, status querypb.LoadStatus) {
|
||||
collection := suite.server.meta.GetCollection(collectionID)
|
||||
if collection != nil {
|
||||
|
|
|
@ -405,10 +405,6 @@ func (c *Core) initImportManager() error {
|
|||
}
|
||||
|
||||
func (c *Core) initInternal() error {
|
||||
if err := c.initSession(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.initKVCreator()
|
||||
|
||||
if err := c.initMetaTable(); err != nil {
|
||||
|
@ -426,7 +422,6 @@ func (c *Core) initInternal() error {
|
|||
c.scheduler = newScheduler(c.ctx, c.idAllocator, c.tsoAllocator)
|
||||
|
||||
c.factory.Init(Params)
|
||||
|
||||
chanMap := c.meta.ListCollectionPhysicalChannels()
|
||||
c.chanTimeTick = newTimeTickSync(c.ctx, c.session.ServerID, c.factory, chanMap)
|
||||
c.proxyClientManager = newProxyClientManager(c.proxyCreator)
|
||||
|
@ -468,9 +463,36 @@ func (c *Core) initInternal() error {
|
|||
// Init initialize routine
|
||||
func (c *Core) Init() error {
|
||||
var initError error
|
||||
c.initOnce.Do(func() {
|
||||
initError = c.initInternal()
|
||||
})
|
||||
c.factory.Init(Params)
|
||||
if err := c.initSession(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if c.enableActiveStandBy {
|
||||
c.activateFunc = func() {
|
||||
log.Info("RootCoord switch from standby to active, activating")
|
||||
c.initOnce.Do(func() {
|
||||
if err := c.initInternal(); err != nil {
|
||||
log.Warn("RootCoord init failed", zap.Error(err))
|
||||
}
|
||||
})
|
||||
c.startOnce.Do(func() {
|
||||
if err := c.startInternal(); err != nil {
|
||||
log.Warn("RootCoord start failed", zap.Error(err))
|
||||
}
|
||||
})
|
||||
|
||||
c.UpdateStateCode(commonpb.StateCode_Healthy)
|
||||
log.Info("RootCoord startup success")
|
||||
}
|
||||
c.UpdateStateCode(commonpb.StateCode_StandBy)
|
||||
log.Info("RootCoord enter standby mode successfully")
|
||||
} else {
|
||||
c.initOnce.Do(func() {
|
||||
initError = c.initInternal()
|
||||
})
|
||||
}
|
||||
|
||||
return initError
|
||||
}
|
||||
|
||||
|
@ -603,20 +625,10 @@ func (c *Core) startInternal() error {
|
|||
c.scheduler.Start()
|
||||
c.stepExecutor.Start()
|
||||
|
||||
if c.enableActiveStandBy {
|
||||
c.activateFunc = func() {
|
||||
// todo to complete
|
||||
log.Info("rootcoord switch from standby to active, activating")
|
||||
c.startServerLoop()
|
||||
c.UpdateStateCode(commonpb.StateCode_Healthy)
|
||||
}
|
||||
c.UpdateStateCode(commonpb.StateCode_StandBy)
|
||||
logutil.Logger(c.ctx).Info("rootcoord enter standby mode successfully")
|
||||
} else {
|
||||
c.startServerLoop()
|
||||
c.UpdateStateCode(commonpb.StateCode_Healthy)
|
||||
logutil.Logger(c.ctx).Info("rootcoord startup successfully")
|
||||
}
|
||||
c.startServerLoop()
|
||||
c.UpdateStateCode(commonpb.StateCode_Healthy)
|
||||
logutil.Logger(c.ctx).Info("rootcoord startup successfully")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -633,9 +645,12 @@ func (c *Core) startServerLoop() {
|
|||
// Start starts RootCoord.
|
||||
func (c *Core) Start() error {
|
||||
var err error
|
||||
c.startOnce.Do(func() {
|
||||
err = c.startInternal()
|
||||
})
|
||||
if !c.enableActiveStandBy {
|
||||
c.startOnce.Do(func() {
|
||||
err = c.startInternal()
|
||||
})
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue