mirror of https://github.com/milvus-io/milvus.git
Fix standalone can't start after down (#6148)
* mutil go routine do registerNode and deleteNode Signed-off-by: xige-16 <xi.ge@zilliz.com> * code format Signed-off-by: xige-16 <xi.ge@zilliz.com> * stop retry create querynode client Signed-off-by: xige-16 <xi.ge@zilliz.com>pull/6149/head^2
parent
b22ab71222
commit
6036ef2c7d
|
@ -70,7 +70,7 @@ func (c *queryNodeCluster) reloadFromKV() error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
nodeIDs = append(nodeIDs, nodeID)
|
|
||||||
session := &sessionutil.Session{}
|
session := &sessionutil.Session{}
|
||||||
err = json.Unmarshal([]byte(values[index]), session)
|
err = json.Unmarshal([]byte(values[index]), session)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -78,8 +78,10 @@ func (c *queryNodeCluster) reloadFromKV() error {
|
||||||
}
|
}
|
||||||
err = c.RegisterNode(context.Background(), session, nodeID)
|
err = c.RegisterNode(context.Background(), session, nodeID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
log.Debug("query node failed to register")
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
nodeIDs = append(nodeIDs, nodeID)
|
||||||
}
|
}
|
||||||
for _, nodeID := range nodeIDs {
|
for _, nodeID := range nodeIDs {
|
||||||
infoPrefix := fmt.Sprintf("%s/%d", queryNodeMetaPrefix, nodeID)
|
infoPrefix := fmt.Sprintf("%s/%d", queryNodeMetaPrefix, nodeID)
|
||||||
|
|
|
@ -184,11 +184,13 @@ func (qc *QueryCoord) watchNodeLoop() {
|
||||||
for nodeID, session := range sessionMap {
|
for nodeID, session := range sessionMap {
|
||||||
if _, ok := qc.cluster.nodes[nodeID]; !ok {
|
if _, ok := qc.cluster.nodes[nodeID]; !ok {
|
||||||
serverID := session.ServerID
|
serverID := session.ServerID
|
||||||
err := qc.cluster.RegisterNode(ctx, session, serverID)
|
go func() {
|
||||||
if err != nil {
|
err := qc.cluster.RegisterNode(ctx, session, serverID)
|
||||||
log.Error("register queryNode error", zap.Any("error", err.Error()))
|
if err != nil {
|
||||||
}
|
log.Error("register queryNode error", zap.Any("error", err.Error()))
|
||||||
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
|
}
|
||||||
|
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for nodeID := range qc.cluster.nodes {
|
for nodeID := range qc.cluster.nodes {
|
||||||
|
@ -228,11 +230,13 @@ func (qc *QueryCoord) watchNodeLoop() {
|
||||||
switch event.EventType {
|
switch event.EventType {
|
||||||
case sessionutil.SessionAddEvent:
|
case sessionutil.SessionAddEvent:
|
||||||
serverID := event.Session.ServerID
|
serverID := event.Session.ServerID
|
||||||
err := qc.cluster.RegisterNode(ctx, event.Session, serverID)
|
go func() {
|
||||||
if err != nil {
|
err := qc.cluster.RegisterNode(ctx, event.Session, serverID)
|
||||||
log.Error(err.Error())
|
if err != nil {
|
||||||
}
|
log.Error(err.Error())
|
||||||
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
|
}
|
||||||
|
log.Debug("query coordinator", zap.Any("Add QueryNode, session serverID", serverID))
|
||||||
|
}()
|
||||||
case sessionutil.SessionDelEvent:
|
case sessionutil.SessionDelEvent:
|
||||||
serverID := event.Session.ServerID
|
serverID := event.Session.ServerID
|
||||||
log.Debug("query coordinator", zap.Any("The QueryNode crashed with ID", serverID))
|
log.Debug("query coordinator", zap.Any("The QueryNode crashed with ID", serverID))
|
||||||
|
@ -260,12 +264,14 @@ func (qc *QueryCoord) watchNodeLoop() {
|
||||||
meta: qc.meta,
|
meta: qc.meta,
|
||||||
}
|
}
|
||||||
qc.scheduler.Enqueue([]task{loadBalanceTask})
|
qc.scheduler.Enqueue([]task{loadBalanceTask})
|
||||||
err := loadBalanceTask.WaitToFinish()
|
go func() {
|
||||||
if err != nil {
|
err := loadBalanceTask.WaitToFinish()
|
||||||
log.Error(err.Error())
|
if err != nil {
|
||||||
}
|
log.Error(err.Error())
|
||||||
log.Debug("load balance done after queryNode down", zap.Int64s("nodeIDs", loadBalanceTask.SourceNodeIDs))
|
}
|
||||||
//TODO::remove nodeInfo and clear etcd
|
log.Debug("load balance done after queryNode down", zap.Int64s("nodeIDs", loadBalanceTask.SourceNodeIDs))
|
||||||
|
//TODO::remove nodeInfo and clear etcd
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue