mirror of https://github.com/milvus-io/milvus.git
Make sure querynode stop only once (#21203)
Signed-off-by: Congqi Xia <congqi.xia@zilliz.com> Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>pull/21242/head
parent
cc56d58052
commit
1518baacd1
internal/querynode
|
@ -30,8 +30,6 @@ import "C"
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/samber/lo"
|
|
||||||
uberatomic "go.uber.org/atomic"
|
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
@ -42,6 +40,8 @@ import (
|
||||||
"time"
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/samber/lo"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/commonpb"
|
||||||
|
|
||||||
"github.com/panjf2000/ants/v2"
|
"github.com/panjf2000/ants/v2"
|
||||||
|
@ -89,7 +89,7 @@ type QueryNode struct {
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
|
|
||||||
stateCode atomic.Value
|
stateCode atomic.Value
|
||||||
stopFlag uberatomic.Bool
|
stopOnce sync.Once
|
||||||
|
|
||||||
//call once
|
//call once
|
||||||
initOnce sync.Once
|
initOnce sync.Once
|
||||||
|
@ -140,7 +140,6 @@ func NewQueryNode(ctx context.Context, factory dependency.Factory) *QueryNode {
|
||||||
node.tSafeReplica = newTSafeReplica()
|
node.tSafeReplica = newTSafeReplica()
|
||||||
node.scheduler = newTaskScheduler(ctx1, node.tSafeReplica)
|
node.scheduler = newTaskScheduler(ctx1, node.tSafeReplica)
|
||||||
node.UpdateStateCode(commonpb.StateCode_Abnormal)
|
node.UpdateStateCode(commonpb.StateCode_Abnormal)
|
||||||
node.stopFlag.Store(false)
|
|
||||||
|
|
||||||
return node
|
return node
|
||||||
}
|
}
|
||||||
|
@ -325,54 +324,52 @@ func (node *QueryNode) Start() error {
|
||||||
|
|
||||||
// Stop mainly stop QueryNode's query service, historical loop and streaming loop.
|
// Stop mainly stop QueryNode's query service, historical loop and streaming loop.
|
||||||
func (node *QueryNode) Stop() error {
|
func (node *QueryNode) Stop() error {
|
||||||
if node.stopFlag.Load() {
|
node.stopOnce.Do(func() {
|
||||||
return nil
|
log.Warn("Query node stop..")
|
||||||
}
|
err := node.session.GoingStop()
|
||||||
node.stopFlag.Store(true)
|
if err != nil {
|
||||||
|
log.Warn("session fail to go stopping state", zap.Error(err))
|
||||||
log.Warn("Query node stop..")
|
} else {
|
||||||
err := node.session.GoingStop()
|
node.UpdateStateCode(commonpb.StateCode_Stopping)
|
||||||
if err != nil {
|
noSegmentChan := node.metaReplica.getNoSegmentChan()
|
||||||
log.Warn("session fail to go stopping state", zap.Error(err))
|
select {
|
||||||
} else {
|
case <-noSegmentChan:
|
||||||
node.UpdateStateCode(commonpb.StateCode_Stopping)
|
case <-time.After(Params.QueryNodeCfg.GracefulStopTimeout.GetAsDuration(time.Second)):
|
||||||
noSegmentChan := node.metaReplica.getNoSegmentChan()
|
log.Warn("migrate data timed out", zap.Int64("server_id", paramtable.GetNodeID()),
|
||||||
select {
|
zap.Int64s("sealed_segment", lo.Map(node.metaReplica.getSealedSegments(), func(t *Segment, i int) int64 {
|
||||||
case <-noSegmentChan:
|
return t.ID()
|
||||||
case <-time.After(Params.QueryNodeCfg.GracefulStopTimeout.GetAsDuration(time.Second)):
|
})),
|
||||||
log.Warn("migrate data timed out", zap.Int64("server_id", paramtable.GetNodeID()),
|
zap.Int64s("growing_segment", lo.Map(node.metaReplica.getGrowingSegments(), func(t *Segment, i int) int64 {
|
||||||
zap.Int64s("sealed_segment", lo.Map(node.metaReplica.getSealedSegments(), func(t *Segment, i int) int64 {
|
return t.ID()
|
||||||
return t.ID()
|
})),
|
||||||
})),
|
)
|
||||||
zap.Int64s("growing_segment", lo.Map(node.metaReplica.getGrowingSegments(), func(t *Segment, i int) int64 {
|
}
|
||||||
return t.ID()
|
|
||||||
})),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
node.UpdateStateCode(commonpb.StateCode_Abnormal)
|
node.UpdateStateCode(commonpb.StateCode_Abnormal)
|
||||||
node.wg.Wait()
|
node.wg.Wait()
|
||||||
node.queryNodeLoopCancel()
|
node.queryNodeLoopCancel()
|
||||||
|
|
||||||
// close services
|
// close services
|
||||||
if node.dataSyncService != nil {
|
if node.dataSyncService != nil {
|
||||||
node.dataSyncService.close()
|
node.dataSyncService.close()
|
||||||
}
|
}
|
||||||
|
|
||||||
if node.metaReplica != nil {
|
if node.metaReplica != nil {
|
||||||
node.metaReplica.freeAll()
|
node.metaReplica.freeAll()
|
||||||
}
|
}
|
||||||
|
|
||||||
if node.ShardClusterService != nil {
|
if node.ShardClusterService != nil {
|
||||||
node.ShardClusterService.close()
|
node.ShardClusterService.close()
|
||||||
}
|
}
|
||||||
|
|
||||||
if node.queryShardService != nil {
|
if node.queryShardService != nil {
|
||||||
node.queryShardService.close()
|
node.queryShardService.close()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
node.session.Revoke(time.Second)
|
||||||
|
})
|
||||||
|
|
||||||
node.session.Revoke(time.Second)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue