mirror of https://github.com/milvus-io/milvus.git
fix: Fix collection leak in querynode (#37061)
Unref the removed L0 segment count. issue: https://github.com/milvus-io/milvus/issues/36918 Signed-off-by: bigsheeper <yihao.dai@zilliz.com>pull/37353/head
parent
139f4e5ab5
commit
ed37c27bda
|
@ -127,7 +127,8 @@ func (m *collectionManager) Unref(collectionID int64, count uint32) bool {
|
|||
|
||||
if collection, ok := m.collections[collectionID]; ok {
|
||||
if collection.Unref(count) == 0 {
|
||||
log.Info("release collection due to ref count to 0", zap.Int64("collectionID", collectionID))
|
||||
log.Info("release collection due to ref count to 0",
|
||||
zap.Int64("nodeID", paramtable.GetNodeID()), zap.Int64("collectionID", collectionID))
|
||||
delete(m.collections, collectionID)
|
||||
DeleteCollection(collection)
|
||||
|
||||
|
@ -217,7 +218,8 @@ func (c *Collection) GetLoadType() querypb.LoadType {
|
|||
|
||||
func (c *Collection) Ref(count uint32) uint32 {
|
||||
refCount := c.refCount.Add(count)
|
||||
log.Debug("collection ref increment",
|
||||
log.Info("collection ref increment",
|
||||
zap.Int64("nodeID", paramtable.GetNodeID()),
|
||||
zap.Int64("collectionID", c.ID()),
|
||||
zap.Uint32("refCount", refCount),
|
||||
)
|
||||
|
@ -226,7 +228,8 @@ func (c *Collection) Ref(count uint32) uint32 {
|
|||
|
||||
func (c *Collection) Unref(count uint32) uint32 {
|
||||
refCount := c.refCount.Sub(count)
|
||||
log.Debug("collection ref decrement",
|
||||
log.Info("collection ref decrement",
|
||||
zap.Int64("nodeID", paramtable.GetNodeID()),
|
||||
zap.Int64("collectionID", c.ID()),
|
||||
zap.Uint32("refCount", refCount),
|
||||
)
|
||||
|
|
|
@ -369,10 +369,10 @@ func (node *QueryNode) UnsubDmChannel(ctx context.Context, req *querypb.UnsubDmC
|
|||
|
||||
node.pipelineManager.Remove(req.GetChannelName())
|
||||
node.manager.Segment.RemoveBy(ctx, segments.WithChannel(req.GetChannelName()), segments.WithType(segments.SegmentTypeGrowing))
|
||||
node.manager.Segment.RemoveBy(ctx, segments.WithChannel(req.GetChannelName()), segments.WithLevel(datapb.SegmentLevel_L0))
|
||||
_, sealed := node.manager.Segment.RemoveBy(ctx, segments.WithChannel(req.GetChannelName()), segments.WithLevel(datapb.SegmentLevel_L0))
|
||||
node.tSafeManager.Remove(ctx, req.GetChannelName())
|
||||
|
||||
node.manager.Collection.Unref(req.GetCollectionID(), 1)
|
||||
node.manager.Collection.Unref(req.GetCollectionID(), uint32(1+sealed))
|
||||
}
|
||||
log.Info("unsubscribed channel")
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/util/hookutil"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/etcd"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
||||
)
|
||||
|
||||
|
@ -153,9 +154,13 @@ func (s *MiniClusterSuite) TearDownTest() {
|
|||
if err == nil {
|
||||
for idx, collectionName := range resp.GetCollectionNames() {
|
||||
if resp.GetInMemoryPercentages()[idx] == 100 || resp.GetQueryServiceAvailable()[idx] {
|
||||
s.Cluster.Proxy.ReleaseCollection(context.Background(), &milvuspb.ReleaseCollectionRequest{
|
||||
status, err := s.Cluster.Proxy.ReleaseCollection(context.Background(), &milvuspb.ReleaseCollectionRequest{
|
||||
CollectionName: collectionName,
|
||||
})
|
||||
err = merr.CheckRPCCall(status, err)
|
||||
s.NoError(err)
|
||||
collectionID := resp.GetCollectionIds()[idx]
|
||||
s.CheckCollectionCacheReleased(collectionID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,8 @@ import (
|
|||
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/pkg/common"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
"github.com/milvus-io/milvus/pkg/util/metricsinfo"
|
||||
"github.com/milvus-io/milvus/pkg/util/testutils"
|
||||
)
|
||||
|
||||
|
@ -99,6 +101,36 @@ func (s *MiniClusterSuite) WaitForLoadRefresh(ctx context.Context, dbName, colle
|
|||
}
|
||||
}
|
||||
|
||||
// CheckCollectionCacheReleased checks if the collection cache was released from querynodes.
|
||||
func (s *MiniClusterSuite) CheckCollectionCacheReleased(collectionID int64) {
|
||||
for _, qn := range s.Cluster.GetAllQueryNodes() {
|
||||
s.Eventually(func() bool {
|
||||
state, err := qn.GetComponentStates(context.Background(), &milvuspb.GetComponentStatesRequest{})
|
||||
s.NoError(err)
|
||||
if state.GetState().GetStateCode() != commonpb.StateCode_Healthy {
|
||||
// skip checking stopping/stopped node
|
||||
return true
|
||||
}
|
||||
req, err := metricsinfo.ConstructRequestByMetricType(metricsinfo.SystemInfoMetrics)
|
||||
s.NoError(err)
|
||||
resp, err := qn.GetQueryNode().GetMetrics(context.Background(), req)
|
||||
err = merr.CheckRPCCall(resp.GetStatus(), err)
|
||||
s.NoError(err)
|
||||
infos := metricsinfo.QueryNodeInfos{}
|
||||
err = metricsinfo.UnmarshalComponentInfos(resp.Response, &infos)
|
||||
s.NoError(err)
|
||||
for _, id := range infos.QuotaMetrics.Effect.CollectionIDs {
|
||||
if id == collectionID {
|
||||
s.T().Logf("collection %d was not released in querynode %d", collectionID, qn.GetQueryNode().GetNodeID())
|
||||
return false
|
||||
}
|
||||
}
|
||||
s.T().Logf("collection %d has been released from querynode %d", collectionID, qn.GetQueryNode().GetNodeID())
|
||||
return true
|
||||
}, 3*time.Minute, 200*time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
func ConstructSearchRequest(
|
||||
dbName, collectionName string,
|
||||
expr string,
|
||||
|
|
Loading…
Reference in New Issue