mirror of https://github.com/milvus-io/milvus.git
fix: Make syncmgr lock key before returning future (#32865)
See also #32860 SyncMgr did not ensure task key is locked before `SyncData` returning which may cause concurrent problem during sync wich multiple policies. This PR change sync mgr implementation to make sure the key is locked before returning task result `*conc.Future` --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>pull/32822/head
parent
3d78b90fe7
commit
a06f601c6e
internal/datanode/syncmgr
|
@ -13,32 +13,33 @@ type Task interface {
|
||||||
StartPosition() *msgpb.MsgPosition
|
StartPosition() *msgpb.MsgPosition
|
||||||
ChannelName() string
|
ChannelName() string
|
||||||
Run() error
|
Run() error
|
||||||
|
HandleError(error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type keyLockDispatcher[K comparable] struct {
|
type keyLockDispatcher[K comparable] struct {
|
||||||
keyLock *lock.KeyLock[K]
|
keyLock *lock.KeyLock[K]
|
||||||
workerPool *conc.Pool[error]
|
workerPool *conc.Pool[struct{}]
|
||||||
}
|
}
|
||||||
|
|
||||||
func newKeyLockDispatcher[K comparable](maxParallel int) *keyLockDispatcher[K] {
|
func newKeyLockDispatcher[K comparable](maxParallel int) *keyLockDispatcher[K] {
|
||||||
dispatcher := &keyLockDispatcher[K]{
|
dispatcher := &keyLockDispatcher[K]{
|
||||||
workerPool: conc.NewPool[error](maxParallel, conc.WithPreAlloc(false)),
|
workerPool: conc.NewPool[struct{}](maxParallel, conc.WithPreAlloc(false)),
|
||||||
keyLock: lock.NewKeyLock[K](),
|
keyLock: lock.NewKeyLock[K](),
|
||||||
}
|
}
|
||||||
return dispatcher
|
return dispatcher
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *keyLockDispatcher[K]) Submit(key K, t Task, callbacks ...func(error)) *conc.Future[error] {
|
func (d *keyLockDispatcher[K]) Submit(key K, t Task, callbacks ...func(error) error) *conc.Future[struct{}] {
|
||||||
d.keyLock.Lock(key)
|
d.keyLock.Lock(key)
|
||||||
|
|
||||||
return d.workerPool.Submit(func() (error, error) {
|
return d.workerPool.Submit(func() (struct{}, error) {
|
||||||
defer d.keyLock.Unlock(key)
|
defer d.keyLock.Unlock(key)
|
||||||
err := t.Run()
|
err := t.Run()
|
||||||
|
|
||||||
for _, callback := range callbacks {
|
for _, callback := range callbacks {
|
||||||
callback(err)
|
err = callback(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return err, nil
|
return struct{}{}, err
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -155,6 +155,39 @@ func (_c *MockTask_Checkpoint_Call) RunAndReturn(run func() *msgpb.MsgPosition)
|
||||||
return _c
|
return _c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HandleError provides a mock function with given fields: _a0
|
||||||
|
func (_m *MockTask) HandleError(_a0 error) {
|
||||||
|
_m.Called(_a0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MockTask_HandleError_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'HandleError'
|
||||||
|
type MockTask_HandleError_Call struct {
|
||||||
|
*mock.Call
|
||||||
|
}
|
||||||
|
|
||||||
|
// HandleError is a helper method to define mock.On call
|
||||||
|
// - _a0 error
|
||||||
|
func (_e *MockTask_Expecter) HandleError(_a0 interface{}) *MockTask_HandleError_Call {
|
||||||
|
return &MockTask_HandleError_Call{Call: _e.mock.On("HandleError", _a0)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockTask_HandleError_Call) Run(run func(_a0 error)) *MockTask_HandleError_Call {
|
||||||
|
_c.Call.Run(func(args mock.Arguments) {
|
||||||
|
run(args[0].(error))
|
||||||
|
})
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockTask_HandleError_Call) Return() *MockTask_HandleError_Call {
|
||||||
|
_c.Call.Return()
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (_c *MockTask_HandleError_Call) RunAndReturn(run func(error)) *MockTask_HandleError_Call {
|
||||||
|
_c.Call.Return(run)
|
||||||
|
return _c
|
||||||
|
}
|
||||||
|
|
||||||
// Run provides a mock function with given fields:
|
// Run provides a mock function with given fields:
|
||||||
func (_m *MockTask) Run() error {
|
func (_m *MockTask) Run() error {
|
||||||
ret := _m.Called()
|
ret := _m.Called()
|
||||||
|
|
|
@ -121,30 +121,44 @@ func (mgr *syncManager) SyncData(ctx context.Context, task Task) *conc.Future[st
|
||||||
func (mgr *syncManager) safeSubmitTask(task Task) *conc.Future[struct{}] {
|
func (mgr *syncManager) safeSubmitTask(task Task) *conc.Future[struct{}] {
|
||||||
taskKey := fmt.Sprintf("%d-%d", task.SegmentID(), task.Checkpoint().GetTimestamp())
|
taskKey := fmt.Sprintf("%d-%d", task.SegmentID(), task.Checkpoint().GetTimestamp())
|
||||||
mgr.tasks.Insert(taskKey, task)
|
mgr.tasks.Insert(taskKey, task)
|
||||||
|
defer mgr.tasks.Remove(taskKey)
|
||||||
|
|
||||||
return conc.Go(func() (struct{}, error) {
|
key, err := task.CalcTargetSegment()
|
||||||
defer mgr.tasks.Remove(taskKey)
|
if err != nil {
|
||||||
for {
|
task.HandleError(err)
|
||||||
targetID, err := task.CalcTargetSegment()
|
return conc.Go(func() (struct{}, error) { return struct{}{}, err })
|
||||||
if err != nil {
|
}
|
||||||
return struct{}{}, err
|
|
||||||
}
|
|
||||||
log.Info("task calculated target segment id",
|
|
||||||
zap.Int64("targetID", targetID),
|
|
||||||
zap.Int64("segmentID", task.SegmentID()),
|
|
||||||
)
|
|
||||||
|
|
||||||
// make sync for same segment execute in sequence
|
return mgr.submit(key, task)
|
||||||
// if previous sync task is not finished, block here
|
}
|
||||||
f := mgr.Submit(targetID, task)
|
|
||||||
err, _ = f.Await()
|
func (mgr *syncManager) submit(key int64, task Task) *conc.Future[struct{}] {
|
||||||
if errors.Is(err, errTargetSegmentNotMatch) {
|
handler := func(err error) error {
|
||||||
log.Info("target updated during submitting", zap.Error(err))
|
// unexpected error
|
||||||
continue
|
if !errors.Is(err, errTargetSegmentNotMatch) {
|
||||||
}
|
task.HandleError(err)
|
||||||
return struct{}{}, err
|
return err
|
||||||
}
|
}
|
||||||
})
|
|
||||||
|
targetID, err := task.CalcTargetSegment()
|
||||||
|
// shall not reach, segment meta lost during sync
|
||||||
|
if err != nil {
|
||||||
|
task.HandleError(err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if targetID == key {
|
||||||
|
err = merr.WrapErrServiceInternal("recaluated with same key", fmt.Sprint(targetID))
|
||||||
|
task.HandleError(err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
log.Info("task calculated target segment id",
|
||||||
|
zap.Int64("targetID", targetID),
|
||||||
|
zap.Int64("segmentID", task.SegmentID()),
|
||||||
|
)
|
||||||
|
return mgr.submit(targetID, task).Err()
|
||||||
|
}
|
||||||
|
log.Info("sync mgr sumbit task with key", zap.Int64("key", key))
|
||||||
|
return mgr.Submit(key, task, handler)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (mgr *syncManager) GetEarliestPosition(channel string) (int64, *msgpb.MsgPosition) {
|
func (mgr *syncManager) GetEarliestPosition(channel string) (int64, *msgpb.MsgPosition) {
|
||||||
|
|
|
@ -23,6 +23,7 @@ import (
|
||||||
"github.com/milvus-io/milvus/internal/storage"
|
"github.com/milvus-io/milvus/internal/storage"
|
||||||
"github.com/milvus-io/milvus/pkg/common"
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/config"
|
"github.com/milvus-io/milvus/pkg/config"
|
||||||
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||||
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
||||||
)
|
)
|
||||||
|
@ -241,7 +242,7 @@ func (s *SyncManagerSuite) TestBlock() {
|
||||||
MsgID: []byte{1, 2, 3, 4},
|
MsgID: []byte{1, 2, 3, 4},
|
||||||
Timestamp: 100,
|
Timestamp: 100,
|
||||||
})
|
})
|
||||||
manager.SyncData(context.Background(), task)
|
go manager.SyncData(context.Background(), task)
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-sig:
|
case <-sig:
|
||||||
|
@ -317,12 +318,86 @@ func (s *SyncManagerSuite) TestTargetUpdated() {
|
||||||
task.EXPECT().CalcTargetSegment().Return(1001, nil).Once()
|
task.EXPECT().CalcTargetSegment().Return(1001, nil).Once()
|
||||||
task.EXPECT().Run().Return(errTargetSegmentNotMatch).Once()
|
task.EXPECT().Run().Return(errTargetSegmentNotMatch).Once()
|
||||||
task.EXPECT().Run().Return(nil).Once()
|
task.EXPECT().Run().Return(nil).Once()
|
||||||
|
task.EXPECT().HandleError(mock.Anything)
|
||||||
|
|
||||||
f := manager.SyncData(context.Background(), task)
|
f := manager.SyncData(context.Background(), task)
|
||||||
_, err = f.Await()
|
_, err = f.Await()
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *SyncManagerSuite) TestUnexpectedError() {
|
||||||
|
manager, err := NewSyncManager(s.chunkManager, s.allocator)
|
||||||
|
s.NoError(err)
|
||||||
|
|
||||||
|
task := NewMockTask(s.T())
|
||||||
|
task.EXPECT().SegmentID().Return(1000)
|
||||||
|
task.EXPECT().Checkpoint().Return(&msgpb.MsgPosition{})
|
||||||
|
task.EXPECT().CalcTargetSegment().Return(1000, nil).Once()
|
||||||
|
task.EXPECT().Run().Return(merr.WrapErrServiceInternal("mocked")).Once()
|
||||||
|
task.EXPECT().HandleError(mock.Anything)
|
||||||
|
|
||||||
|
f := manager.SyncData(context.Background(), task)
|
||||||
|
_, err = f.Await()
|
||||||
|
s.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SyncManagerSuite) TestCalcTargetError() {
|
||||||
|
s.Run("fail_before_submit", func() {
|
||||||
|
manager, err := NewSyncManager(s.chunkManager, s.allocator)
|
||||||
|
s.NoError(err)
|
||||||
|
|
||||||
|
mockErr := merr.WrapErrServiceInternal("mocked")
|
||||||
|
|
||||||
|
task := NewMockTask(s.T())
|
||||||
|
task.EXPECT().SegmentID().Return(1000)
|
||||||
|
task.EXPECT().Checkpoint().Return(&msgpb.MsgPosition{})
|
||||||
|
task.EXPECT().CalcTargetSegment().Return(0, mockErr).Once()
|
||||||
|
task.EXPECT().HandleError(mock.Anything)
|
||||||
|
|
||||||
|
f := manager.SyncData(context.Background(), task)
|
||||||
|
_, err = f.Await()
|
||||||
|
s.Error(err)
|
||||||
|
s.ErrorIs(err, mockErr)
|
||||||
|
})
|
||||||
|
|
||||||
|
s.Run("fail_during_rerun", func() {
|
||||||
|
manager, err := NewSyncManager(s.chunkManager, s.allocator)
|
||||||
|
s.NoError(err)
|
||||||
|
|
||||||
|
mockErr := merr.WrapErrServiceInternal("mocked")
|
||||||
|
|
||||||
|
task := NewMockTask(s.T())
|
||||||
|
task.EXPECT().SegmentID().Return(1000)
|
||||||
|
task.EXPECT().Checkpoint().Return(&msgpb.MsgPosition{})
|
||||||
|
task.EXPECT().CalcTargetSegment().Return(1000, nil).Once()
|
||||||
|
task.EXPECT().CalcTargetSegment().Return(0, mockErr).Once()
|
||||||
|
task.EXPECT().Run().Return(errTargetSegmentNotMatch).Once()
|
||||||
|
task.EXPECT().HandleError(mock.Anything)
|
||||||
|
|
||||||
|
f := manager.SyncData(context.Background(), task)
|
||||||
|
_, err = f.Await()
|
||||||
|
s.Error(err)
|
||||||
|
s.ErrorIs(err, mockErr)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SyncManagerSuite) TestTargetUpdateSameID() {
|
||||||
|
manager, err := NewSyncManager(s.chunkManager, s.allocator)
|
||||||
|
s.NoError(err)
|
||||||
|
|
||||||
|
task := NewMockTask(s.T())
|
||||||
|
task.EXPECT().SegmentID().Return(1000)
|
||||||
|
task.EXPECT().Checkpoint().Return(&msgpb.MsgPosition{})
|
||||||
|
task.EXPECT().CalcTargetSegment().Return(1000, nil).Once()
|
||||||
|
task.EXPECT().CalcTargetSegment().Return(1000, nil).Once()
|
||||||
|
task.EXPECT().Run().Return(errTargetSegmentNotMatch).Once()
|
||||||
|
task.EXPECT().HandleError(mock.Anything)
|
||||||
|
|
||||||
|
f := manager.SyncData(context.Background(), task)
|
||||||
|
_, err = f.Await()
|
||||||
|
s.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
func TestSyncManager(t *testing.T) {
|
func TestSyncManager(t *testing.T) {
|
||||||
suite.Run(t, new(SyncManagerSuite))
|
suite.Run(t, new(SyncManagerSuite))
|
||||||
}
|
}
|
||||||
|
|
|
@ -109,7 +109,7 @@ func (t *SyncTask) getLogger() *log.MLogger {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *SyncTask) handleError(err error) {
|
func (t *SyncTask) HandleError(err error) {
|
||||||
if errors.Is(err, errTargetSegmentNotMatch) {
|
if errors.Is(err, errTargetSegmentNotMatch) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -129,7 +129,7 @@ func (t *SyncTask) Run() (err error) {
|
||||||
log := t.getLogger()
|
log := t.getLogger()
|
||||||
defer func() {
|
defer func() {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.handleError(err)
|
t.HandleError(err)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
@ -138,7 +138,6 @@ func (t *SyncTask) Run() (err error) {
|
||||||
if !has {
|
if !has {
|
||||||
log.Warn("failed to sync data, segment not found in metacache")
|
log.Warn("failed to sync data, segment not found in metacache")
|
||||||
err := merr.WrapErrSegmentNotFound(t.segmentID)
|
err := merr.WrapErrSegmentNotFound(t.segmentID)
|
||||||
t.handleError(err)
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -175,7 +174,6 @@ func (t *SyncTask) Run() (err error) {
|
||||||
err = t.writeLogs()
|
err = t.writeLogs()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("failed to save serialized data into storage", zap.Error(err))
|
log.Warn("failed to save serialized data into storage", zap.Error(err))
|
||||||
t.handleError(err)
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -195,7 +193,6 @@ func (t *SyncTask) Run() (err error) {
|
||||||
err = t.writeMeta()
|
err = t.writeMeta()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("failed to save serialized data into storage", zap.Error(err))
|
log.Warn("failed to save serialized data into storage", zap.Error(err))
|
||||||
t.handleError(err)
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue