enhance: Change proxy connection manager to concurrent safe (#31008)

See also #31007

This PR:
- Add param item for connection manager behavior: TTL & check interval
- Change clientInfo map to concurrent map

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
pull/31023/head
congqixia 2024-03-05 10:39:00 +08:00 committed by GitHub
parent 8f7019468f
commit 3b5ce73ded
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 78 additions and 114 deletions

View File

@ -208,6 +208,8 @@ proxy:
ginLogging: true
ginLogSkipPaths: "/" # skipped url path for gin log split by comma
maxTaskNum: 1024 # max task number of proxy task queue
connectionMgrCheckInterval: 120 # the interval time(in seconds) for connection manager to scan inactive client info
connectionClientInfoTTL: 86400 # inactive client info TTL duration, in seconds
accessLog:
enable: false
# Log filename, set as "" to use stdout.

View File

@ -33,6 +33,11 @@ import (
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
func TestMain(m *testing.M) {
paramtable.Init()
os.Exit(m.Run())
}
func TestAccessLogger_NotEnable(t *testing.T) {
var Params paramtable.ComponentParam

View File

@ -46,6 +46,10 @@ type GrpcAccessInfoSuite struct {
info *GrpcAccessInfo
}
func (s *GrpcAccessInfoSuite) SetupSuite() {
paramtable.Init()
}
func (s *GrpcAccessInfoSuite) SetupTest() {
s.username = "test-user"
s.traceID = "test-trace"

View File

@ -8,9 +8,7 @@ var getConnectionManagerInstanceOnce sync.Once
func GetManager() *connectionManager {
getConnectionManagerInstanceOnce.Do(func() {
connectionManagerInstance = newConnectionManager(
withDuration(defaultConnCheckDuration),
withTTL(defaultTTLForInactiveConn))
connectionManagerInstance = newConnectionManager()
})
return connectionManagerInstance
}

View File

@ -6,52 +6,23 @@ import (
"sync"
"time"
"github.com/golang/protobuf/proto"
"go.uber.org/atomic"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/pkg/log"
)
const (
// we shouldn't check this too frequently.
defaultConnCheckDuration = 2 * time.Minute
defaultTTLForInactiveConn = 24 * time.Hour
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
type connectionManager struct {
mu sync.RWMutex
initOnce sync.Once
stopOnce sync.Once
closeSignal chan struct{}
wg sync.WaitGroup
buffer chan int64
duration time.Duration
ttl time.Duration
clientInfos map[int64]clientInfo
}
type connectionManagerOption func(s *connectionManager)
func withDuration(duration time.Duration) connectionManagerOption {
return func(s *connectionManager) {
s.duration = duration
}
}
func withTTL(ttl time.Duration) connectionManagerOption {
return func(s *connectionManager) {
s.ttl = ttl
}
}
func (s *connectionManager) apply(opts ...connectionManagerOption) {
for _, opt := range opts {
opt(s)
}
clientInfos *typeutil.ConcurrentMap[int64, clientInfo]
count atomic.Int64
}
func (s *connectionManager) init() {
@ -71,7 +42,7 @@ func (s *connectionManager) Stop() {
func (s *connectionManager) checkLoop() {
defer s.wg.Done()
t := time.NewTicker(s.duration)
t := time.NewTicker(paramtable.Get().ProxyCfg.ConnectionCheckIntervalSeconds.GetAsDuration(time.Second))
defer t.Stop()
for {
@ -79,10 +50,9 @@ func (s *connectionManager) checkLoop() {
case <-s.closeSignal:
log.Info("connection manager closed")
return
case identifier := <-s.buffer:
s.Update(identifier)
case <-t.C:
s.removeLongInactiveClients()
t.Reset(paramtable.Get().ProxyCfg.ConnectionCheckIntervalSeconds.GetAsDuration(time.Second))
}
}
}
@ -94,49 +64,42 @@ func (s *connectionManager) Register(ctx context.Context, identifier int64, info
lastActiveTime: time.Now(),
}
s.mu.Lock()
defer s.mu.Unlock()
s.clientInfos[identifier] = cli
s.count.Inc()
s.clientInfos.Insert(identifier, cli)
log.Ctx(ctx).Info("client register", cli.GetLogger()...)
}
func (s *connectionManager) KeepActive(identifier int64) {
// make this asynchronous and then the rpc won't be blocked too long.
s.buffer <- identifier
s.Update(identifier)
}
func (s *connectionManager) List() []*commonpb.ClientInfo {
s.mu.RLock()
defer s.mu.RUnlock()
clients := make([]*commonpb.ClientInfo, 0, s.count.Load())
clients := make([]*commonpb.ClientInfo, 0, len(s.clientInfos))
for identifier, cli := range s.clientInfos {
if cli.ClientInfo != nil {
client := proto.Clone(cli.ClientInfo).(*commonpb.ClientInfo)
s.clientInfos.Range(func(identifier int64, info clientInfo) bool {
if info.ClientInfo != nil {
client := typeutil.Clone(info.ClientInfo)
if client.Reserved == nil {
client.Reserved = make(map[string]string)
}
client.Reserved["identifier"] = string(strconv.AppendInt(nil, identifier, 10))
client.Reserved["last_active_time"] = cli.lastActiveTime.String()
client.Reserved["last_active_time"] = info.lastActiveTime.String()
clients = append(clients, client)
}
}
return true
})
return clients
}
func (s *connectionManager) Get(ctx context.Context) *commonpb.ClientInfo {
s.mu.RLock()
defer s.mu.RUnlock()
identifier, err := GetIdentifierFromContext(ctx)
if err != nil {
return nil
}
cli, ok := s.clientInfos[identifier]
cli, ok := s.clientInfos.Get(identifier)
if !ok {
return nil
}
@ -144,37 +107,30 @@ func (s *connectionManager) Get(ctx context.Context) *commonpb.ClientInfo {
}
func (s *connectionManager) Update(identifier int64) {
s.mu.Lock()
defer s.mu.Unlock()
cli, ok := s.clientInfos[identifier]
info, ok := s.clientInfos.Get(identifier)
if ok {
cli.lastActiveTime = time.Now()
s.clientInfos[identifier] = cli
info.lastActiveTime = time.Now()
s.clientInfos.Insert(identifier, info)
}
}
func (s *connectionManager) removeLongInactiveClients() {
s.mu.Lock()
defer s.mu.Unlock()
for candidate, cli := range s.clientInfos {
if time.Since(cli.lastActiveTime) > s.ttl {
log.Info("client deregister", cli.GetLogger()...)
delete(s.clientInfos, candidate)
ttl := paramtable.Get().ProxyCfg.ConnectionClientInfoTTLSeconds.GetAsDuration(time.Second)
s.clientInfos.Range(func(candidate int64, info clientInfo) bool {
if time.Since(info.lastActiveTime) > ttl {
log.Info("client deregister", info.GetLogger()...)
s.clientInfos.Remove(candidate)
s.count.Dec()
}
}
return true
})
}
func newConnectionManager(opts ...connectionManagerOption) *connectionManager {
func newConnectionManager() *connectionManager {
s := &connectionManager{
closeSignal: make(chan struct{}, 1),
buffer: make(chan int64, 64),
duration: defaultConnCheckDuration,
ttl: defaultTTLForInactiveConn,
clientInfos: make(map[int64]clientInfo),
clientInfos: typeutil.NewConcurrentMap[int64, clientInfo](),
}
s.apply(opts...)
s.init()
return s

View File

@ -8,39 +8,19 @@ import (
"github.com/stretchr/testify/assert"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
func Test_withDuration(t *testing.T) {
s := &connectionManager{}
s.apply(withDuration(defaultConnCheckDuration))
assert.Equal(t, defaultConnCheckDuration, s.duration)
}
func Test_withTTL(t *testing.T) {
s := &connectionManager{}
s.apply(withTTL(defaultTTLForInactiveConn))
assert.Equal(t, defaultTTLForInactiveConn, s.ttl)
}
func Test_connectionManager_apply(t *testing.T) {
s := &connectionManager{}
s.apply(
withDuration(defaultConnCheckDuration),
withTTL(defaultTTLForInactiveConn))
assert.Equal(t, defaultConnCheckDuration, s.duration)
assert.Equal(t, defaultTTLForInactiveConn, s.ttl)
}
func TestGetConnectionManager(t *testing.T) {
s := GetManager()
assert.Equal(t, defaultConnCheckDuration, s.duration)
assert.Equal(t, defaultTTLForInactiveConn, s.ttl)
}
func TestConnectionManager(t *testing.T) {
s := newConnectionManager(
withDuration(time.Millisecond*5),
withTTL(time.Millisecond*100))
paramtable.Init()
pt := paramtable.Get()
pt.Save(pt.ProxyCfg.ConnectionCheckIntervalSeconds.Key, "2")
pt.Save(pt.ProxyCfg.ConnectionClientInfoTTLSeconds.Key, "1")
defer pt.Reset(pt.ProxyCfg.ConnectionCheckIntervalSeconds.Key)
defer pt.Reset(pt.ProxyCfg.ConnectionClientInfoTTLSeconds.Key)
s := newConnectionManager()
defer s.Stop()
s.Register(context.TODO(), 1, &commonpb.ClientInfo{
Reserved: map[string]string{"for_test": "for_test"},
@ -60,10 +40,7 @@ func TestConnectionManager(t *testing.T) {
time.Sleep(time.Millisecond * 5)
assert.Equal(t, 2, len(s.List()))
time.Sleep(time.Millisecond * 100)
assert.Equal(t, 0, len(s.List()))
s.Stop()
time.Sleep(time.Millisecond * 5)
assert.Eventually(t, func() bool {
return len(s.List()) == 0
}, time.Second*5, time.Second)
}

View File

@ -1048,6 +1048,10 @@ type proxyConfig struct {
AccessLog AccessLogConfig
// connection manager
ConnectionCheckIntervalSeconds ParamItem `refreshable:"true"`
ConnectionClientInfoTTLSeconds ParamItem `refreshable:"true"`
GracefulStopTimeout ParamItem `refreshable:"true"`
}
@ -1364,6 +1368,24 @@ please adjust in embedded Milvus: false`,
Export: true,
}
p.GracefulStopTimeout.Init(base.mgr)
p.ConnectionCheckIntervalSeconds = ParamItem{
Key: "proxy.connectionMgrCheckInterval",
Version: "2.3.11",
Doc: "the interval time(in seconds) for connection manager to scan inactive client info",
DefaultValue: "120",
Export: true,
}
p.ConnectionCheckIntervalSeconds.Init(base.mgr)
p.ConnectionClientInfoTTLSeconds = ParamItem{
Key: "proxy.connectionClientInfoTTL",
Version: "2.3.11",
Doc: "inactive client info TTL duration, in seconds",
DefaultValue: "86400",
Export: true,
}
p.ConnectionClientInfoTTLSeconds.Init(base.mgr)
}
// /////////////////////////////////////////////////////////////////////////////