fix: sync partitiion stats blocking balance task(#33741) (#33742)

related: #33741

Signed-off-by: MrPresent-Han <chun.han@zilliz.com>
pull/33754/head^2
Chun Han 2024-06-11 02:21:56 -04:00 committed by GitHub
parent ecf2bcee42
commit f7af323d1e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 36 additions and 53 deletions

View File

@ -134,6 +134,7 @@ func (h *ServerHandler) GetQueryVChanPositions(channel RWChannel, partitionIDs .
zap.String("channel", channel.GetName()),
zap.Int("numOfSegments", len(segments)),
zap.Int("indexed segment", len(indexedSegments)),
zap.Int64("currentPartitionStatsVersion", currentPartitionStatsVersion),
)
unIndexedIDs := make(typeutil.UniqueSet)

View File

@ -128,7 +128,7 @@ func (b *BalanceChecker) replicasToBalance() []int64 {
hasUnbalancedCollection := false
for _, cid := range loadedCollections {
if b.normalBalanceCollectionsCurrentRound.Contain(cid) {
log.Debug("ScoreBasedBalancer has balanced collection, skip balancing in this round",
log.Debug("ScoreBasedBalancer is balancing this collection, skip balancing in this round",
zap.Int64("collectionID", cid))
continue
}

View File

@ -71,7 +71,7 @@ func NewCheckerController(
utils.IndexChecker: NewIndexChecker(meta, dist, broker, nodeMgr),
// todo temporary work around must fix
// utils.LeaderChecker: NewLeaderChecker(meta, dist, targetMgr, nodeMgr, true),
utils.LeaderChecker: NewLeaderChecker(meta, dist, targetMgr, nodeMgr, Params.QueryNodeCfg.EnableSyncPartitionStats.GetAsBool()),
utils.LeaderChecker: NewLeaderChecker(meta, dist, targetMgr, nodeMgr),
}
manualCheckChs := map[utils.CheckerType]chan struct{}{

View File

@ -35,11 +35,10 @@ var _ Checker = (*LeaderChecker)(nil)
// LeaderChecker perform segment index check.
type LeaderChecker struct {
*checkerActivation
meta *meta.Meta
dist *meta.DistributionManager
target *meta.TargetManager
nodeMgr *session.NodeManager
enableSyncPartitionStats bool
meta *meta.Meta
dist *meta.DistributionManager
target *meta.TargetManager
nodeMgr *session.NodeManager
}
func NewLeaderChecker(
@ -47,15 +46,13 @@ func NewLeaderChecker(
dist *meta.DistributionManager,
target *meta.TargetManager,
nodeMgr *session.NodeManager,
enableSyncPartitionStats bool,
) *LeaderChecker {
return &LeaderChecker{
checkerActivation: newCheckerActivation(),
meta: meta,
dist: dist,
target: target,
nodeMgr: nodeMgr,
enableSyncPartitionStats: enableSyncPartitionStats,
checkerActivation: newCheckerActivation(),
meta: meta,
dist: dist,
target: target,
nodeMgr: nodeMgr,
}
}
@ -100,9 +97,7 @@ func (c *LeaderChecker) Check(ctx context.Context) []task.Task {
dist := c.dist.SegmentDistManager.GetByFilter(meta.WithChannel(leaderView.Channel), meta.WithReplica(replica))
tasks = append(tasks, c.findNeedLoadedSegments(ctx, replica, leaderView, dist)...)
tasks = append(tasks, c.findNeedRemovedSegments(ctx, replica, leaderView, dist)...)
if c.enableSyncPartitionStats {
tasks = append(tasks, c.findNeedSyncPartitionStats(ctx, replica, leaderView, node)...)
}
tasks = append(tasks, c.findNeedSyncPartitionStats(ctx, replica, leaderView, node)...)
}
}
}
@ -127,22 +122,24 @@ func (c *LeaderChecker) findNeedSyncPartitionStats(ctx context.Context, replica
partStatsToUpdate[partID] = psVersionInTarget
}
}
if len(partStatsToUpdate) > 0 {
action := task.NewLeaderUpdatePartStatsAction(leaderView.ID, nodeID, task.ActionTypeUpdate, leaderView.Channel, partStatsToUpdate)
action := task.NewLeaderUpdatePartStatsAction(leaderView.ID, nodeID, task.ActionTypeUpdate, leaderView.Channel, partStatsToUpdate)
t := task.NewLeaderPartStatsTask(
ctx,
c.ID(),
leaderView.CollectionID,
replica,
leaderView.ID,
action,
)
t := task.NewLeaderPartStatsTask(
ctx,
c.ID(),
leaderView.CollectionID,
replica,
leaderView.ID,
action,
)
// leader task shouldn't replace executing segment task
t.SetPriority(task.TaskPriorityLow)
t.SetReason("sync partition stats versions")
ret = append(ret, t)
}
// leader task shouldn't replace executing segment task
t.SetPriority(task.TaskPriorityLow)
t.SetReason("sync partition stats versions")
ret = append(ret, t)
return ret
}

View File

@ -75,7 +75,7 @@ func (suite *LeaderCheckerTestSuite) SetupTest() {
distManager := meta.NewDistributionManager()
targetManager := meta.NewTargetManager(suite.broker, suite.meta)
suite.checker = NewLeaderChecker(suite.meta, distManager, targetManager, suite.nodeMgr, false)
suite.checker = NewLeaderChecker(suite.meta, distManager, targetManager, suite.nodeMgr)
}
func (suite *LeaderCheckerTestSuite) TearDownTest() {
@ -476,10 +476,6 @@ func (suite *LeaderCheckerTestSuite) TestIgnoreSyncRemovedSegments() {
func (suite *LeaderCheckerTestSuite) TestUpdatePartitionStats() {
testChannel := "test-insert-channel"
suite.checker.enableSyncPartitionStats = true
defer func() {
suite.checker.enableSyncPartitionStats = false
}()
leaderID := int64(2)
observer := suite.checker
observer.meta.CollectionManager.PutCollection(utils.CreateTestCollection(1, 1))

View File

@ -17,6 +17,8 @@
package task
import (
"reflect"
"github.com/samber/lo"
"go.uber.org/atomic"
@ -225,6 +227,8 @@ func (action *LeaderAction) IsFinished(distMgr *meta.DistributionManager) bool {
return action.rpcReturned.Load() && dist != nil && dist.NodeID == action.Node()
case ActionTypeReduce:
return action.rpcReturned.Load() && (dist == nil || dist.NodeID != action.Node())
case ActionTypeUpdate:
return action.rpcReturned.Load() && (dist != nil && reflect.DeepEqual(action.partStatsVersions, view.PartitionStatsVersions))
}
return false
}

View File

@ -2168,11 +2168,9 @@ type queryNodeConfig struct {
MemoryIndexLoadPredictMemoryUsageFactor ParamItem `refreshable:"true"`
EnableSegmentPrune ParamItem `refreshable:"false"`
// todo temporary work around must fix
EnableSyncPartitionStats ParamItem `refreshable:"false"`
DefaultSegmentFilterRatio ParamItem `refreshable:"false"`
UseStreamComputing ParamItem `refreshable:"false"`
QueryStreamBatchSize ParamItem `refreshable:"false"`
DefaultSegmentFilterRatio ParamItem `refreshable:"false"`
UseStreamComputing ParamItem `refreshable:"false"`
QueryStreamBatchSize ParamItem `refreshable:"false"`
}
func (p *queryNodeConfig) init(base *BaseTable) {
@ -2741,16 +2739,6 @@ user-task-polling:
Export: true,
}
p.EnableSegmentPrune.Init(base.mgr)
p.EnableSyncPartitionStats = ParamItem{
Key: "queryNode.enableSyncPartitionStats",
Version: "2.4.4",
DefaultValue: "false",
Doc: "enable sync partitionStats",
Export: true,
}
p.EnableSyncPartitionStats.Init(base.mgr)
p.DefaultSegmentFilterRatio = ParamItem{
Key: "queryNode.defaultSegmentFilterRatio",
Version: "2.4.0",

View File

@ -53,15 +53,12 @@ func (s *BalanceTestSuit) SetupSuite() {
// disable compaction
paramtable.Get().Save(paramtable.Get().DataCoordCfg.EnableCompaction.Key, "false")
// todo @wayblink repair this test
// paramtable.Get().Save(paramtable.Get().QueryNodeCfg.EnableSyncPartitionStats.Key, "false")
s.Require().NoError(s.SetupEmbedEtcd())
}
func (s *BalanceTestSuit) TearDownSuite() {
defer paramtable.Get().Reset(paramtable.Get().DataCoordCfg.EnableCompaction.Key)
// defer paramtable.Get().Reset(paramtable.Get().QueryNodeCfg.EnableSyncPartitionStats.Key)
s.MiniClusterSuite.TearDownSuite()
}