Fix standalone can't start after down (#6148)

* mutil go routine do registerNode and  deleteNode

Signed-off-by: xige-16 <xi.ge@zilliz.com>

* code format

Signed-off-by: xige-16 <xi.ge@zilliz.com>

* stop retry create querynode client

Signed-off-by: xige-16 <xi.ge@zilliz.com>
pull/6149/head^2
xige-16 2021-06-26 22:28:10 +08:00 committed by GitHub
parent b22ab71222
commit 6036ef2c7d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 26 additions and 18 deletions

View File

@ -70,7 +70,7 @@ func (c *queryNodeCluster) reloadFromKV() error {
if err != nil {
return err
}
nodeIDs = append(nodeIDs, nodeID)
session := &sessionutil.Session{}
err = json.Unmarshal([]byte(values[index]), session)
if err != nil {
@ -78,8 +78,10 @@ func (c *queryNodeCluster) reloadFromKV() error {
}
err = c.RegisterNode(context.Background(), session, nodeID)
if err != nil {
return err
log.Debug("query node failed to register")
continue
}
nodeIDs = append(nodeIDs, nodeID)
}
for _, nodeID := range nodeIDs {
infoPrefix := fmt.Sprintf("%s/%d", queryNodeMetaPrefix, nodeID)

View File

@ -184,11 +184,13 @@ func (qc *QueryCoord) watchNodeLoop() {
for nodeID, session := range sessionMap {
if _, ok := qc.cluster.nodes[nodeID]; !ok {
serverID := session.ServerID
err := qc.cluster.RegisterNode(ctx, session, serverID)
if err != nil {
log.Error("register queryNode error", zap.Any("error", err.Error()))
}
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
go func() {
err := qc.cluster.RegisterNode(ctx, session, serverID)
if err != nil {
log.Error("register queryNode error", zap.Any("error", err.Error()))
}
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
}()
}
}
for nodeID := range qc.cluster.nodes {
@ -228,11 +230,13 @@ func (qc *QueryCoord) watchNodeLoop() {
switch event.EventType {
case sessionutil.SessionAddEvent:
serverID := event.Session.ServerID
err := qc.cluster.RegisterNode(ctx, event.Session, serverID)
if err != nil {
log.Error(err.Error())
}
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
go func() {
err := qc.cluster.RegisterNode(ctx, event.Session, serverID)
if err != nil {
log.Error(err.Error())
}
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
}()
case sessionutil.SessionDelEvent:
serverID := event.Session.ServerID
log.Debug("query coordinator", zap.Any("The QueryNode crashed with ID", serverID))
@ -260,12 +264,14 @@ func (qc *QueryCoord) watchNodeLoop() {
meta: qc.meta,
}
qc.scheduler.Enqueue([]task{loadBalanceTask})
err := loadBalanceTask.WaitToFinish()
if err != nil {
log.Error(err.Error())
}
log.Debug("load balance done after queryNode down", zap.Int64s("nodeIDs", loadBalanceTask.SourceNodeIDs))
//TODO::remove nodeInfo and clear etcd
go func() {
err := loadBalanceTask.WaitToFinish()
if err != nil {
log.Error(err.Error())
}
log.Debug("load balance done after queryNode down", zap.Int64s("nodeIDs", loadBalanceTask.SourceNodeIDs))
//TODO::remove nodeInfo and clear etcd
}()
}
}
}