milvus/internal/flushcommon/syncmgr/sync_manager.go

165 lines
5.1 KiB
Go

package syncmgr
import (
"context"
"fmt"
"strconv"
"time"
"github.com/hashicorp/golang-lru/v2/expirable"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/allocator"
"github.com/milvus-io/milvus/internal/flushcommon/metacache"
"github.com/milvus-io/milvus/internal/json"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/v2/config"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/util/conc"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
type SyncManagerOption struct {
chunkManager storage.ChunkManager
allocator allocator.Interface
parallelTask int
}
type SyncMeta struct {
collectionID int64
partitionID int64
segmentID int64
channelName string
schema *schemapb.CollectionSchema
checkpoint *msgpb.MsgPosition
tsFrom typeutil.Timestamp
tsTo typeutil.Timestamp
metacache metacache.MetaCache
}
// SyncManager is the interface for sync manager.
// it processes the sync tasks inside and changes the meta.
//
//go:generate mockery --name=SyncManager --structname=MockSyncManager --output=./ --filename=mock_sync_manager.go --with-expecter --inpackage
type SyncManager interface {
// SyncData is the method to submit sync task.
SyncData(ctx context.Context, task Task, callbacks ...func(error) error) (*conc.Future[struct{}], error)
// Close waits for the task to finish and then shuts down the sync manager.
Close() error
TaskStatsJSON() string
}
type syncManager struct {
*keyLockDispatcher[int64]
chunkManager storage.ChunkManager
tasks *typeutil.ConcurrentMap[string, Task]
taskStats *expirable.LRU[string, Task]
handler config.EventHandler
}
func NewSyncManager(chunkManager storage.ChunkManager) SyncManager {
params := paramtable.Get()
initPoolSize := params.DataNodeCfg.MaxParallelSyncMgrTasks.GetAsInt()
dispatcher := newKeyLockDispatcher[int64](initPoolSize)
log.Info("sync manager initialized", zap.Int("initPoolSize", initPoolSize))
syncMgr := &syncManager{
keyLockDispatcher: dispatcher,
chunkManager: chunkManager,
tasks: typeutil.NewConcurrentMap[string, Task](),
taskStats: expirable.NewLRU[string, Task](64, nil, time.Minute*15),
}
// setup config update watcher
handler := config.NewHandler("datanode.syncmgr.poolsize", syncMgr.resizeHandler)
syncMgr.handler = handler
params.Watch(params.DataNodeCfg.MaxParallelSyncMgrTasks.Key, handler)
return syncMgr
}
func (mgr *syncManager) resizeHandler(evt *config.Event) {
if evt.HasUpdated {
log := log.Ctx(context.Background()).With(
zap.String("key", evt.Key),
zap.String("value", evt.Value),
)
size, err := strconv.ParseInt(evt.Value, 10, 64)
if err != nil {
log.Warn("failed to parse new datanode syncmgr pool size", zap.Error(err))
return
}
err = mgr.keyLockDispatcher.workerPool.Resize(int(size))
if err != nil {
log.Warn("failed to resize datanode syncmgr pool size", zap.String("key", evt.Key), zap.String("value", evt.Value), zap.Error(err))
return
}
log.Info("sync mgr pool size updated", zap.Int64("newSize", size))
}
}
func (mgr *syncManager) SyncData(ctx context.Context, task Task, callbacks ...func(error) error) (*conc.Future[struct{}], error) {
if mgr.workerPool.IsClosed() {
return nil, fmt.Errorf("sync manager is closed")
}
switch t := task.(type) {
case *SyncTask:
t.WithChunkManager(mgr.chunkManager)
}
return mgr.safeSubmitTask(ctx, task, callbacks...), nil
}
// safeSubmitTask submits task to SyncManager
func (mgr *syncManager) safeSubmitTask(ctx context.Context, task Task, callbacks ...func(error) error) *conc.Future[struct{}] {
taskKey := fmt.Sprintf("%d-%d", task.SegmentID(), task.Checkpoint().GetTimestamp())
mgr.tasks.Insert(taskKey, task)
mgr.taskStats.Add(taskKey, task)
key := task.SegmentID()
return mgr.submit(ctx, key, task, callbacks...)
}
func (mgr *syncManager) submit(ctx context.Context, key int64, task Task, callbacks ...func(error) error) *conc.Future[struct{}] {
handler := func(err error) error {
taskKey := fmt.Sprintf("%d-%d", task.SegmentID(), task.Checkpoint().GetTimestamp())
defer func() {
mgr.tasks.Remove(taskKey)
}()
if err == nil {
return nil
}
task.HandleError(err)
return err
}
callbacks = append([]func(error) error{handler}, callbacks...)
log.Info("sync mgr sumbit task with key", zap.Int64("key", key))
return mgr.Submit(ctx, key, task, callbacks...)
}
func (mgr *syncManager) TaskStatsJSON() string {
tasks := mgr.taskStats.Values()
if len(tasks) == 0 {
return ""
}
ret, err := json.Marshal(tasks)
if err != nil {
log.Warn("failed to marshal sync task stats", zap.Error(err))
return ""
}
return string(ret)
}
func (mgr *syncManager) Close() error {
paramtable.Get().Unwatch(paramtable.Get().DataNodeCfg.MaxParallelSyncMgrTasks.Key, mgr.handler)
timeout := paramtable.Get().CommonCfg.SyncTaskPoolReleaseTimeoutSeconds.GetAsDuration(time.Second)
return mgr.workerPool.ReleaseTimeout(timeout)
}