avoid concurrent sub/unsub on same channel (#26454)

Signed-off-by: Wei Liu <wei.liu@zilliz.com>
pull/26558/head
wei liu 2023-08-23 10:10:22 +08:00 committed by GitHub
parent 0bb68cac36
commit 7af0f7d90c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 51 additions and 12 deletions

View File

@ -150,6 +150,10 @@ func (sd *shardDelegator) Serviceable() bool {
return sd.lifetime.GetState() == working
}
func (sd *shardDelegator) Stopped() bool {
return sd.lifetime.GetState() == stopped
}
// Start sets delegator to working state.
func (sd *shardDelegator) Start() {
sd.lifetime.SetState(working)

View File

@ -250,6 +250,10 @@ func (sd *shardDelegator) applyDelete(ctx context.Context, nodeID int64, worker
if ok {
log.Debug("delegator plan to applyDelete via worker")
err := retry.Do(ctx, func() error {
if sd.Stopped() {
return retry.Unrecoverable(merr.WrapErrChannelUnsubscribing(sd.vchannelName))
}
err := worker.Delete(ctx, &querypb.DeleteRequest{
Base: commonpbutil.NewMsgBase(commonpbutil.WithTargetID(nodeID)),
CollectionId: sd.collectionID,

View File

@ -930,4 +930,8 @@ func TestDelegatorTSafeListenerClosed(t *testing.T) {
case <-time.After(time.Second):
assert.FailNow(t, "watchTsafe still working after listener closed")
}
sd.Close()
assert.Equal(t, sd.Serviceable(), false)
assert.Equal(t, sd.Stopped(), true)
}

View File

@ -95,12 +95,13 @@ type QueryNode struct {
stopOnce sync.Once
// internal components
manager *segments.Manager
clusterManager cluster.Manager
tSafeManager tsafe.Manager
pipelineManager pipeline.Manager
subscribingChannels *typeutil.ConcurrentSet[string]
delegators *typeutil.ConcurrentMap[string, delegator.ShardDelegator]
manager *segments.Manager
clusterManager cluster.Manager
tSafeManager tsafe.Manager
pipelineManager pipeline.Manager
subscribingChannels *typeutil.ConcurrentSet[string]
unsubscribingChannels *typeutil.ConcurrentSet[string]
delegators *typeutil.ConcurrentMap[string, delegator.ShardDelegator]
// segment loader
loader segments.Loader
@ -324,6 +325,7 @@ func (node *QueryNode) Init() error {
})
node.delegators = typeutil.NewConcurrentMap[string, delegator.ShardDelegator]()
node.subscribingChannels = typeutil.NewConcurrentSet[string]()
node.unsubscribingChannels = typeutil.NewConcurrentSet[string]()
node.manager = segments.NewManager()
node.loader = segments.NewLoader(node.manager, node.vectorStorage)
node.dispClient = msgdispatcher.NewClient(node.factory, typeutil.QueryNodeRole, paramtable.GetNodeID())

View File

@ -253,6 +253,13 @@ func (node *QueryNode) WatchDmChannels(ctx context.Context, req *querypb.WatchDm
}
defer node.subscribingChannels.Remove(channel.GetChannelName())
// to avoid concurrent watch/unwatch
if node.unsubscribingChannels.Contain(channel.GetChannelName()) {
err := merr.WrapErrChannelUnsubscribing(channel.GetChannelName())
log.Warn(err.Error())
return merr.Status(err), nil
}
_, exist := node.delegators.Get(channel.GetChannelName())
if exist {
log.Info("channel already subscribed")
@ -375,6 +382,8 @@ func (node *QueryNode) UnsubDmChannel(ctx context.Context, req *querypb.UnsubDmC
return status, nil
}
node.unsubscribingChannels.Insert(req.GetChannelName())
defer node.unsubscribingChannels.Remove(req.GetChannelName())
delegator, ok := node.delegators.GetAndRemove(req.GetChannelName())
if ok {
// close the delegator first to block all coming query/search requests
@ -386,7 +395,6 @@ func (node *QueryNode) UnsubDmChannel(ctx context.Context, req *querypb.UnsubDmC
node.manager.Collection.Unref(req.GetCollectionID(), 1)
}
log.Info("unsubscribed channel")
return util.SuccessStatus(), nil

View File

@ -373,13 +373,20 @@ func (suite *ServiceSuite) TestWatchDmChannels_Failed() {
},
}
// test channel is unsubscribing
suite.node.unsubscribingChannels.Insert(suite.vchannel)
status, err := suite.node.WatchDmChannels(ctx, req)
suite.NoError(err)
suite.Equal(status.GetReason(), merr.WrapErrChannelUnsubscribing(suite.vchannel).Error())
suite.node.unsubscribingChannels.Remove(suite.vchannel)
// init msgstream failed
suite.factory.EXPECT().NewTtMsgStream(mock.Anything).Return(suite.msgStream, nil)
suite.msgStream.EXPECT().AsConsumer([]string{suite.pchannel}, mock.Anything, mock.Anything).Return()
suite.msgStream.EXPECT().Close().Return()
suite.msgStream.EXPECT().Seek(mock.Anything).Return(errors.New("mock error"))
status, err := suite.node.WatchDmChannels(ctx, req)
status, err = suite.node.WatchDmChannels(ctx, req)
suite.NoError(err)
suite.Equal(commonpb.ErrorCode_UnexpectedError, status.GetErrorCode())

View File

@ -76,10 +76,11 @@ var (
ErrReplicaNotAvailable = newMilvusError("replica not available", 401, false)
// Channel related
ErrChannelNotFound = newMilvusError("channel not found", 500, false)
ErrChannelLack = newMilvusError("channel lacks", 501, false)
ErrChannelReduplicate = newMilvusError("channel reduplicates", 502, false)
ErrChannelNotAvailable = newMilvusError("channel not available", 503, false)
ErrChannelNotFound = newMilvusError("channel not found", 500, false)
ErrChannelLack = newMilvusError("channel lacks", 501, false)
ErrChannelReduplicate = newMilvusError("channel reduplicates", 502, false)
ErrChannelNotAvailable = newMilvusError("channel not available", 503, false)
ErrChannelUnsubscribing = newMilvusError("chanel is unsubscribing", 504, true)
// Segment related
ErrSegmentNotFound = newMilvusError("segment not found", 600, false)

View File

@ -100,6 +100,7 @@ func (s *ErrSuite) TestWrap() {
s.ErrorIs(WrapErrChannelNotFound("test_Channel", "failed to get Channel"), ErrChannelNotFound)
s.ErrorIs(WrapErrChannelLack("test_Channel", "failed to get Channel"), ErrChannelLack)
s.ErrorIs(WrapErrChannelReduplicate("test_Channel", "failed to get Channel"), ErrChannelReduplicate)
s.ErrorIs(WrapErrChannelUnsubscribing("test_channel"), ErrChannelUnsubscribing)
// Segment related
s.ErrorIs(WrapErrSegmentNotFound(1, "failed to get Segment"), ErrSegmentNotFound)

View File

@ -345,6 +345,14 @@ func WrapErrChannelNotAvailable(name string, msg ...string) error {
return err
}
func WrapErrChannelUnsubscribing(name string, msg ...string) error {
err := wrapWithField(ErrChannelUnsubscribing, "channel", name)
if len(msg) > 0 {
err = errors.Wrap(err, strings.Join(msg, "; "))
}
return err
}
// Segment related
func WrapErrSegmentNotFound(id int64, msg ...string) error {
err := wrapWithField(ErrSegmentNotFound, "segment", id)