Remove indexnode in indexservice when indexnode crashed (#5719)

* Remove indexnode in indexservice when indexnode crashed

Signed-off-by: xiaocai2333 <cai.zhang@zilliz.com>
pull/5779/head
cai.zhang 2021-06-11 16:53:42 +08:00 committed by zhenshan.cao
parent 561897c3d0
commit 81d3546b7c
3 changed files with 20 additions and 0 deletions

View File

@ -493,6 +493,7 @@ func (i *IndexService) assignmentTasksLoop() {
i.assignChan <- []UniqueID{indexBuildID}
continue
}
i.nodeTasks.assignTask(nodeID, indexBuildID)
req := &indexpb.CreateIndexRequest{
IndexBuildID: indexBuildID,
IndexName: meta.indexMeta.Req.IndexName,
@ -506,9 +507,11 @@ func (i *IndexService) assignmentTasksLoop() {
resp, err := builderClient.CreateIndex(ctx, req)
if err != nil {
log.Debug("IndexService assignmentTasksLoop builderClient.CreateIndex failed", zap.Error(err))
continue
}
if resp.ErrorCode != commonpb.ErrorCode_Success {
log.Debug("IndexService assignmentTasksLoop builderClient.CreateIndex failed", zap.String("Reason", resp.Reason))
continue
}
if err = i.metaTable.BuildIndex(indexBuildID, nodeID); err != nil {
log.Debug("IndexService assignmentTasksLoop metaTable.BuildIndex failed", zap.Error(err))
@ -537,8 +540,10 @@ func (i *IndexService) watchNodeLoop() {
log.Debug("IndexService watchNodeLoop SessionAddEvent", zap.Any("serverID", serverID))
case sessionutil.SessionDelEvent:
serverID := event.Session.ServerID
i.removeNode(serverID)
log.Debug("IndexService watchNodeLoop SessionDelEvent ", zap.Any("serverID", serverID))
indexBuildIDs := i.nodeTasks.getTasksByNodeID(serverID)
log.Debug("IndexNode crashed", zap.Any("IndexNode ID", serverID), zap.Any("task IDs", indexBuildIDs))
i.assignChan <- indexBuildIDs
i.nodeTasks.delete(serverID)
}

View File

@ -434,6 +434,8 @@ func (mt *metaTable) LoadMetaFromETCD(indexBuildID int64, revision int64) bool {
type nodeTasks struct {
nodeID2Tasks map[int64][]UniqueID
lock sync.RWMutex
}
func NewNodeTasks() *nodeTasks {
@ -443,6 +445,9 @@ func NewNodeTasks() *nodeTasks {
}
func (nt *nodeTasks) getTasksByNodeID(nodeID int64) []UniqueID {
nt.lock.Lock()
defer nt.lock.Unlock()
indexBuildIDs, ok := nt.nodeID2Tasks[nodeID]
if !ok {
return nil
@ -451,6 +456,9 @@ func (nt *nodeTasks) getTasksByNodeID(nodeID int64) []UniqueID {
}
func (nt *nodeTasks) assignTask(serverID int64, indexBuildID UniqueID) {
nt.lock.Lock()
defer nt.lock.Unlock()
indexBuildIDs, ok := nt.nodeID2Tasks[serverID]
if !ok {
var IDs []UniqueID
@ -463,6 +471,9 @@ func (nt *nodeTasks) assignTask(serverID int64, indexBuildID UniqueID) {
}
func (nt *nodeTasks) finishTask(indexBuildID UniqueID) {
nt.lock.Lock()
defer nt.lock.Unlock()
for serverID := range nt.nodeID2Tasks {
for i, buildID := range nt.nodeID2Tasks[serverID] {
if buildID == indexBuildID {
@ -473,5 +484,8 @@ func (nt *nodeTasks) finishTask(indexBuildID UniqueID) {
}
func (nt *nodeTasks) delete(serverID int64) {
nt.lock.Lock()
defer nt.lock.Unlock()
delete(nt.nodeID2Tasks, serverID)
}

View File

@ -28,6 +28,7 @@ import (
func (i *IndexService) removeNode(nodeID UniqueID) {
i.nodeLock.Lock()
defer i.nodeLock.Unlock()
log.Debug("IndexService", zap.Any("Remove node with ID", nodeID))
i.nodeClients.Remove(nodeID)
}