mirror of https://github.com/milvus-io/milvus.git
Remove indexnode in indexservice when indexnode crashed (#5719)
* Remove indexnode in indexservice when indexnode crashed Signed-off-by: xiaocai2333 <cai.zhang@zilliz.com>pull/5779/head
parent
561897c3d0
commit
81d3546b7c
|
@ -493,6 +493,7 @@ func (i *IndexService) assignmentTasksLoop() {
|
|||
i.assignChan <- []UniqueID{indexBuildID}
|
||||
continue
|
||||
}
|
||||
i.nodeTasks.assignTask(nodeID, indexBuildID)
|
||||
req := &indexpb.CreateIndexRequest{
|
||||
IndexBuildID: indexBuildID,
|
||||
IndexName: meta.indexMeta.Req.IndexName,
|
||||
|
@ -506,9 +507,11 @@ func (i *IndexService) assignmentTasksLoop() {
|
|||
resp, err := builderClient.CreateIndex(ctx, req)
|
||||
if err != nil {
|
||||
log.Debug("IndexService assignmentTasksLoop builderClient.CreateIndex failed", zap.Error(err))
|
||||
continue
|
||||
}
|
||||
if resp.ErrorCode != commonpb.ErrorCode_Success {
|
||||
log.Debug("IndexService assignmentTasksLoop builderClient.CreateIndex failed", zap.String("Reason", resp.Reason))
|
||||
continue
|
||||
}
|
||||
if err = i.metaTable.BuildIndex(indexBuildID, nodeID); err != nil {
|
||||
log.Debug("IndexService assignmentTasksLoop metaTable.BuildIndex failed", zap.Error(err))
|
||||
|
@ -537,8 +540,10 @@ func (i *IndexService) watchNodeLoop() {
|
|||
log.Debug("IndexService watchNodeLoop SessionAddEvent", zap.Any("serverID", serverID))
|
||||
case sessionutil.SessionDelEvent:
|
||||
serverID := event.Session.ServerID
|
||||
i.removeNode(serverID)
|
||||
log.Debug("IndexService watchNodeLoop SessionDelEvent ", zap.Any("serverID", serverID))
|
||||
indexBuildIDs := i.nodeTasks.getTasksByNodeID(serverID)
|
||||
log.Debug("IndexNode crashed", zap.Any("IndexNode ID", serverID), zap.Any("task IDs", indexBuildIDs))
|
||||
i.assignChan <- indexBuildIDs
|
||||
i.nodeTasks.delete(serverID)
|
||||
}
|
||||
|
|
|
@ -434,6 +434,8 @@ func (mt *metaTable) LoadMetaFromETCD(indexBuildID int64, revision int64) bool {
|
|||
|
||||
type nodeTasks struct {
|
||||
nodeID2Tasks map[int64][]UniqueID
|
||||
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
||||
func NewNodeTasks() *nodeTasks {
|
||||
|
@ -443,6 +445,9 @@ func NewNodeTasks() *nodeTasks {
|
|||
}
|
||||
|
||||
func (nt *nodeTasks) getTasksByNodeID(nodeID int64) []UniqueID {
|
||||
nt.lock.Lock()
|
||||
defer nt.lock.Unlock()
|
||||
|
||||
indexBuildIDs, ok := nt.nodeID2Tasks[nodeID]
|
||||
if !ok {
|
||||
return nil
|
||||
|
@ -451,6 +456,9 @@ func (nt *nodeTasks) getTasksByNodeID(nodeID int64) []UniqueID {
|
|||
}
|
||||
|
||||
func (nt *nodeTasks) assignTask(serverID int64, indexBuildID UniqueID) {
|
||||
nt.lock.Lock()
|
||||
defer nt.lock.Unlock()
|
||||
|
||||
indexBuildIDs, ok := nt.nodeID2Tasks[serverID]
|
||||
if !ok {
|
||||
var IDs []UniqueID
|
||||
|
@ -463,6 +471,9 @@ func (nt *nodeTasks) assignTask(serverID int64, indexBuildID UniqueID) {
|
|||
}
|
||||
|
||||
func (nt *nodeTasks) finishTask(indexBuildID UniqueID) {
|
||||
nt.lock.Lock()
|
||||
defer nt.lock.Unlock()
|
||||
|
||||
for serverID := range nt.nodeID2Tasks {
|
||||
for i, buildID := range nt.nodeID2Tasks[serverID] {
|
||||
if buildID == indexBuildID {
|
||||
|
@ -473,5 +484,8 @@ func (nt *nodeTasks) finishTask(indexBuildID UniqueID) {
|
|||
}
|
||||
|
||||
func (nt *nodeTasks) delete(serverID int64) {
|
||||
nt.lock.Lock()
|
||||
defer nt.lock.Unlock()
|
||||
|
||||
delete(nt.nodeID2Tasks, serverID)
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import (
|
|||
func (i *IndexService) removeNode(nodeID UniqueID) {
|
||||
i.nodeLock.Lock()
|
||||
defer i.nodeLock.Unlock()
|
||||
log.Debug("IndexService", zap.Any("Remove node with ID", nodeID))
|
||||
i.nodeClients.Remove(nodeID)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue