fix: Remove read only node from replica immedaitely after node down (#32666)

issue: #32665

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
pull/32619/head
wei liu 2024-04-28 20:25:25 +08:00 committed by GitHub
parent cb1dbf20c7
commit c0555d4b45
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 8 additions and 1 deletions

View File

@ -455,7 +455,14 @@ func (rm *ResourceManager) HandleNodeDown(node int64) {
// failure of node down can be ignored, node down can be done by `RemoveAllDownNode`.
rm.incomingNode.Remove(node)
// for stopping query node becomes offline, node change won't be triggered,
// cause when it becomes stopping, it already remove from resource manager
// then `unassignNode` will do nothing
rgName, err := rm.unassignNode(node)
// trigger node changes, expected to remove ro node from replica immediately
rm.nodeChangedNotifier.NotifyAll()
log.Info("HandleNodeDown: remove node from resource group",
zap.String("rgName", rgName),
zap.Int64("node", node),

View File

@ -438,7 +438,7 @@ func (node *QueryNode) Stop() error {
select {
case <-timeoutCh:
log.Warn("migrate data timed out", zap.Int64("ServerID", paramtable.GetNodeID()),
log.Warn("migrate data timed out", zap.Int64("ServerID", node.GetNodeID()),
zap.Int64s("sealedSegments", lo.Map(sealedSegments, func(s segments.Segment, i int) int64 {
return s.ID()
})),