Fix standalone can't start after down (#6148)

* mutil go routine do registerNode and  deleteNode

Signed-off-by: xige-16 <xi.ge@zilliz.com>

* code format

Signed-off-by: xige-16 <xi.ge@zilliz.com>

* stop retry create querynode client

Signed-off-by: xige-16 <xi.ge@zilliz.com>
pull/6149/head^2
xige-16 2021-06-26 22:28:10 +08:00 committed by GitHub
parent b22ab71222
commit 6036ef2c7d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 26 additions and 18 deletions

View File

@ -70,7 +70,7 @@ func (c *queryNodeCluster) reloadFromKV() error {
if err != nil { if err != nil {
return err return err
} }
nodeIDs = append(nodeIDs, nodeID)
session := &sessionutil.Session{} session := &sessionutil.Session{}
err = json.Unmarshal([]byte(values[index]), session) err = json.Unmarshal([]byte(values[index]), session)
if err != nil { if err != nil {
@ -78,8 +78,10 @@ func (c *queryNodeCluster) reloadFromKV() error {
} }
err = c.RegisterNode(context.Background(), session, nodeID) err = c.RegisterNode(context.Background(), session, nodeID)
if err != nil { if err != nil {
return err log.Debug("query node failed to register")
continue
} }
nodeIDs = append(nodeIDs, nodeID)
} }
for _, nodeID := range nodeIDs { for _, nodeID := range nodeIDs {
infoPrefix := fmt.Sprintf("%s/%d", queryNodeMetaPrefix, nodeID) infoPrefix := fmt.Sprintf("%s/%d", queryNodeMetaPrefix, nodeID)

View File

@ -184,11 +184,13 @@ func (qc *QueryCoord) watchNodeLoop() {
for nodeID, session := range sessionMap { for nodeID, session := range sessionMap {
if _, ok := qc.cluster.nodes[nodeID]; !ok { if _, ok := qc.cluster.nodes[nodeID]; !ok {
serverID := session.ServerID serverID := session.ServerID
err := qc.cluster.RegisterNode(ctx, session, serverID) go func() {
if err != nil { err := qc.cluster.RegisterNode(ctx, session, serverID)
log.Error("register queryNode error", zap.Any("error", err.Error())) if err != nil {
} log.Error("register queryNode error", zap.Any("error", err.Error()))
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID)) }
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
}()
} }
} }
for nodeID := range qc.cluster.nodes { for nodeID := range qc.cluster.nodes {
@ -228,11 +230,13 @@ func (qc *QueryCoord) watchNodeLoop() {
switch event.EventType { switch event.EventType {
case sessionutil.SessionAddEvent: case sessionutil.SessionAddEvent:
serverID := event.Session.ServerID serverID := event.Session.ServerID
err := qc.cluster.RegisterNode(ctx, event.Session, serverID) go func() {
if err != nil { err := qc.cluster.RegisterNode(ctx, event.Session, serverID)
log.Error(err.Error()) if err != nil {
} log.Error(err.Error())
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID)) }
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
}()
case sessionutil.SessionDelEvent: case sessionutil.SessionDelEvent:
serverID := event.Session.ServerID serverID := event.Session.ServerID
log.Debug("query coordinator", zap.Any("The QueryNode crashed with ID", serverID)) log.Debug("query coordinator", zap.Any("The QueryNode crashed with ID", serverID))
@ -260,12 +264,14 @@ func (qc *QueryCoord) watchNodeLoop() {
meta: qc.meta, meta: qc.meta,
} }
qc.scheduler.Enqueue([]task{loadBalanceTask}) qc.scheduler.Enqueue([]task{loadBalanceTask})
err := loadBalanceTask.WaitToFinish() go func() {
if err != nil { err := loadBalanceTask.WaitToFinish()
log.Error(err.Error()) if err != nil {
} log.Error(err.Error())
log.Debug("load balance done after queryNode down", zap.Int64s("nodeIDs", loadBalanceTask.SourceNodeIDs)) }
//TODO::remove nodeInfo and clear etcd log.Debug("load balance done after queryNode down", zap.Int64s("nodeIDs", loadBalanceTask.SourceNodeIDs))
//TODO::remove nodeInfo and clear etcd
}()
} }
} }
} }