mirror of https://github.com/milvus-io/milvus.git
Fix standalone can't start after down (#6148)
* mutil go routine do registerNode and deleteNode Signed-off-by: xige-16 <xi.ge@zilliz.com> * code format Signed-off-by: xige-16 <xi.ge@zilliz.com> * stop retry create querynode client Signed-off-by: xige-16 <xi.ge@zilliz.com>pull/6149/head^2
parent
b22ab71222
commit
6036ef2c7d
|
@ -70,7 +70,7 @@ func (c *queryNodeCluster) reloadFromKV() error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
nodeIDs = append(nodeIDs, nodeID)
|
||||
|
||||
session := &sessionutil.Session{}
|
||||
err = json.Unmarshal([]byte(values[index]), session)
|
||||
if err != nil {
|
||||
|
@ -78,8 +78,10 @@ func (c *queryNodeCluster) reloadFromKV() error {
|
|||
}
|
||||
err = c.RegisterNode(context.Background(), session, nodeID)
|
||||
if err != nil {
|
||||
return err
|
||||
log.Debug("query node failed to register")
|
||||
continue
|
||||
}
|
||||
nodeIDs = append(nodeIDs, nodeID)
|
||||
}
|
||||
for _, nodeID := range nodeIDs {
|
||||
infoPrefix := fmt.Sprintf("%s/%d", queryNodeMetaPrefix, nodeID)
|
||||
|
|
|
@ -184,11 +184,13 @@ func (qc *QueryCoord) watchNodeLoop() {
|
|||
for nodeID, session := range sessionMap {
|
||||
if _, ok := qc.cluster.nodes[nodeID]; !ok {
|
||||
serverID := session.ServerID
|
||||
err := qc.cluster.RegisterNode(ctx, session, serverID)
|
||||
if err != nil {
|
||||
log.Error("register queryNode error", zap.Any("error", err.Error()))
|
||||
}
|
||||
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
|
||||
go func() {
|
||||
err := qc.cluster.RegisterNode(ctx, session, serverID)
|
||||
if err != nil {
|
||||
log.Error("register queryNode error", zap.Any("error", err.Error()))
|
||||
}
|
||||
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
|
||||
}()
|
||||
}
|
||||
}
|
||||
for nodeID := range qc.cluster.nodes {
|
||||
|
@ -228,11 +230,13 @@ func (qc *QueryCoord) watchNodeLoop() {
|
|||
switch event.EventType {
|
||||
case sessionutil.SessionAddEvent:
|
||||
serverID := event.Session.ServerID
|
||||
err := qc.cluster.RegisterNode(ctx, event.Session, serverID)
|
||||
if err != nil {
|
||||
log.Error(err.Error())
|
||||
}
|
||||
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
|
||||
go func() {
|
||||
err := qc.cluster.RegisterNode(ctx, event.Session, serverID)
|
||||
if err != nil {
|
||||
log.Error(err.Error())
|
||||
}
|
||||
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
|
||||
}()
|
||||
case sessionutil.SessionDelEvent:
|
||||
serverID := event.Session.ServerID
|
||||
log.Debug("query coordinator", zap.Any("The QueryNode crashed with ID", serverID))
|
||||
|
@ -260,12 +264,14 @@ func (qc *QueryCoord) watchNodeLoop() {
|
|||
meta: qc.meta,
|
||||
}
|
||||
qc.scheduler.Enqueue([]task{loadBalanceTask})
|
||||
err := loadBalanceTask.WaitToFinish()
|
||||
if err != nil {
|
||||
log.Error(err.Error())
|
||||
}
|
||||
log.Debug("load balance done after queryNode down", zap.Int64s("nodeIDs", loadBalanceTask.SourceNodeIDs))
|
||||
//TODO::remove nodeInfo and clear etcd
|
||||
go func() {
|
||||
err := loadBalanceTask.WaitToFinish()
|
||||
if err != nil {
|
||||
log.Error(err.Error())
|
||||
}
|
||||
log.Debug("load balance done after queryNode down", zap.Int64s("nodeIDs", loadBalanceTask.SourceNodeIDs))
|
||||
//TODO::remove nodeInfo and clear etcd
|
||||
}()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue