enhance: Add id,type filter shortcut for QueryNode segment manager (#31179)

See also #31143

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
pull/31247/head
congqixia 2024-03-13 20:15:03 +08:00 committed by GitHub
parent b5c67948b7
commit 57a5e44b88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 185 additions and 102 deletions

View File

@ -32,6 +32,7 @@ import (
"go.uber.org/zap"
"golang.org/x/sync/singleflight"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/pkg/eventlog"
@ -40,45 +41,90 @@ import (
"github.com/milvus-io/milvus/pkg/util/cache"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/paramtable"
. "github.com/milvus-io/milvus/pkg/util/typeutil"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
type SegmentFilter func(segment Segment) bool
func WithSkipEmpty() SegmentFilter {
return func(segment Segment) bool {
return segment.InsertCount() > 0
}
// SegmentFilter is the interface for segment selection criteria.
type SegmentFilter interface {
Filter(segment Segment) bool
SegmentType() (SegmentType, bool)
SegmentIDs() ([]int64, bool)
}
func WithPartition(partitionID UniqueID) SegmentFilter {
return func(segment Segment) bool {
// SegmentFilterFunc is a type wrapper for `func(Segment) bool` to SegmentFilter.
type SegmentFilterFunc func(segment Segment) bool
func (f SegmentFilterFunc) Filter(segment Segment) bool {
return f(segment)
}
func (f SegmentFilterFunc) SegmentType() (SegmentType, bool) {
return commonpb.SegmentState_SegmentStateNone, false
}
func (s SegmentFilterFunc) SegmentIDs() ([]int64, bool) {
return nil, false
}
// SegmentIDFilter is the specific segment filter for SegmentID only.
type SegmentIDFilter int64
func (f SegmentIDFilter) Filter(segment Segment) bool {
return segment.ID() == int64(f)
}
func (f SegmentIDFilter) SegmentType() (SegmentType, bool) {
return commonpb.SegmentState_SegmentStateNone, false
}
func (f SegmentIDFilter) SegmentIDs() ([]int64, bool) {
return []int64{int64(f)}, true
}
type SegmentTypeFilter SegmentType
func (f SegmentTypeFilter) Filter(segment Segment) bool {
return segment.Type() == SegmentType(f)
}
func (f SegmentTypeFilter) SegmentType() (SegmentType, bool) {
return SegmentType(f), true
}
func (f SegmentTypeFilter) SegmentIDs() ([]int64, bool) {
return nil, false
}
func WithSkipEmpty() SegmentFilter {
return SegmentFilterFunc(func(segment Segment) bool {
return segment.InsertCount() > 0
})
}
func WithPartition(partitionID typeutil.UniqueID) SegmentFilter {
return SegmentFilterFunc(func(segment Segment) bool {
return segment.Partition() == partitionID
}
})
}
func WithChannel(channel string) SegmentFilter {
return func(segment Segment) bool {
return SegmentFilterFunc(func(segment Segment) bool {
return segment.Shard() == channel
}
})
}
func WithType(typ SegmentType) SegmentFilter {
return func(segment Segment) bool {
return segment.Type() == typ
}
return SegmentTypeFilter(typ)
}
func WithID(id int64) SegmentFilter {
return func(segment Segment) bool {
return segment.ID() == id
}
return SegmentIDFilter(id)
}
func WithLevel(level datapb.SegmentLevel) SegmentFilter {
return func(segment Segment) bool {
return SegmentFilterFunc(func(segment Segment) bool {
return segment.Level() == level
}
})
}
type SegmentAction func(segment Segment) bool
@ -116,8 +162,8 @@ type Manager struct {
func NewManager() *Manager {
diskCap := paramtable.Get().QueryNodeCfg.DiskCapacityLimit.GetAsInt64()
segmentMaxSIze := paramtable.Get().DataCoordCfg.SegmentMaxSize.GetAsInt64()
cacheMaxItemNum := diskCap / segmentMaxSIze
segmentMaxSize := paramtable.Get().DataCoordCfg.SegmentMaxSize.GetAsInt64()
cacheMaxItemNum := diskCap / segmentMaxSize
segMgr := NewSegmentManager()
sf := singleflight.Group{}
@ -166,22 +212,22 @@ type SegmentManager interface {
// dup segments will not increase the ref count
Put(segmentType SegmentType, segments ...Segment)
UpdateBy(action SegmentAction, filters ...SegmentFilter) int
Get(segmentID UniqueID) Segment
GetWithType(segmentID UniqueID, typ SegmentType) Segment
Get(segmentID typeutil.UniqueID) Segment
GetWithType(segmentID typeutil.UniqueID, typ SegmentType) Segment
GetBy(filters ...SegmentFilter) []Segment
// Get segments and acquire the read locks
GetAndPinBy(filters ...SegmentFilter) ([]Segment, error)
GetAndPin(segments []int64, filters ...SegmentFilter) ([]Segment, error)
Unpin(segments []Segment)
GetSealed(segmentID UniqueID) Segment
GetGrowing(segmentID UniqueID) Segment
GetSealed(segmentID typeutil.UniqueID) Segment
GetGrowing(segmentID typeutil.UniqueID) Segment
Empty() bool
// Remove removes the given segment,
// and decreases the ref count of the corresponding collection,
// will not decrease the ref count if the given segment not exists
Remove(segmentID UniqueID, scope querypb.DataScope) (int, int)
Remove(segmentID typeutil.UniqueID, scope querypb.DataScope) (int, int)
RemoveBy(filters ...SegmentFilter) (int, int)
Clear()
}
@ -192,8 +238,8 @@ var _ SegmentManager = (*segmentManager)(nil)
type segmentManager struct {
mu sync.RWMutex // guards all
growingSegments map[UniqueID]Segment
sealedSegments map[UniqueID]Segment
growingSegments map[typeutil.UniqueID]Segment
sealedSegments map[typeutil.UniqueID]Segment
}
func NewSegmentManager() *segmentManager {
@ -263,25 +309,16 @@ func (mgr *segmentManager) UpdateBy(action SegmentAction, filters ...SegmentFilt
defer mgr.mu.RUnlock()
updated := 0
for _, segment := range mgr.growingSegments {
if filter(segment, filters...) {
if action(segment) {
updated++
}
mgr.rangeWithFilter(func(_ int64, _ SegmentType, segment Segment) bool {
if action(segment) {
updated++
}
}
for _, segment := range mgr.sealedSegments {
if filter(segment, filters...) {
if action(segment) {
updated++
}
}
}
return true
}, filters...)
return updated
}
func (mgr *segmentManager) Get(segmentID UniqueID) Segment {
func (mgr *segmentManager) Get(segmentID typeutil.UniqueID) Segment {
mgr.mu.RLock()
defer mgr.mu.RUnlock()
@ -294,7 +331,7 @@ func (mgr *segmentManager) Get(segmentID UniqueID) Segment {
return nil
}
func (mgr *segmentManager) GetWithType(segmentID UniqueID, typ SegmentType) Segment {
func (mgr *segmentManager) GetWithType(segmentID typeutil.UniqueID, typ SegmentType) Segment {
mgr.mu.RLock()
defer mgr.mu.RUnlock()
@ -312,18 +349,11 @@ func (mgr *segmentManager) GetBy(filters ...SegmentFilter) []Segment {
mgr.mu.RLock()
defer mgr.mu.RUnlock()
ret := make([]Segment, 0)
for _, segment := range mgr.growingSegments {
if filter(segment, filters...) {
ret = append(ret, segment)
}
}
for _, segment := range mgr.sealedSegments {
if filter(segment, filters...) {
ret = append(ret, segment)
}
}
var ret []Segment
mgr.rangeWithFilter(func(id int64, _ SegmentType, segment Segment) bool {
ret = append(ret, segment)
return true
}, filters...)
return ret
}
@ -331,7 +361,7 @@ func (mgr *segmentManager) GetAndPinBy(filters ...SegmentFilter) ([]Segment, err
mgr.mu.RLock()
defer mgr.mu.RUnlock()
ret := make([]Segment, 0)
var ret []Segment
var err error
defer func() {
if err != nil {
@ -341,25 +371,18 @@ func (mgr *segmentManager) GetAndPinBy(filters ...SegmentFilter) ([]Segment, err
}
}()
for _, segment := range mgr.growingSegments {
if filter(segment, filters...) {
err = segment.RLock()
if err != nil {
return nil, err
}
ret = append(ret, segment)
mgr.rangeWithFilter(func(id int64, _ SegmentType, segment Segment) bool {
if segment.Level() == datapb.SegmentLevel_L0 {
return true
}
}
err = segment.RLock()
if err != nil {
return false
}
ret = append(ret, segment)
return true
}, filters...)
for _, segment := range mgr.sealedSegments {
if segment.Level() != datapb.SegmentLevel_L0 && filter(segment, filters...) {
err = segment.RLock()
if err != nil {
return nil, err
}
ret = append(ret, segment)
}
}
return ret, nil
}
@ -419,16 +442,82 @@ func (mgr *segmentManager) Unpin(segments []Segment) {
}
}
func (mgr *segmentManager) rangeWithFilter(process func(id int64, segType SegmentType, segment Segment) bool, filters ...SegmentFilter) {
var segType SegmentType
var hasSegType, hasSegIDs bool
segmentIDs := typeutil.NewSet[int64]()
otherFilters := make([]SegmentFilter, 0, len(filters))
for _, filter := range filters {
if sType, ok := filter.SegmentType(); ok {
segType = sType
hasSegType = true
continue
}
if segIDs, ok := filter.SegmentIDs(); ok {
hasSegIDs = true
segmentIDs.Insert(segIDs...)
continue
}
otherFilters = append(otherFilters, filter)
}
mergedFilter := func(info Segment) bool {
for _, filter := range otherFilters {
if !filter.Filter(info) {
return false
}
}
return true
}
var candidates map[SegmentType]map[int64]Segment
switch segType {
case SegmentTypeSealed:
candidates = map[SegmentType]map[int64]Segment{SegmentTypeSealed: mgr.sealedSegments}
case SegmentTypeGrowing:
candidates = map[SegmentType]map[int64]Segment{SegmentTypeGrowing: mgr.growingSegments}
default:
if !hasSegType {
candidates = map[SegmentType]map[int64]Segment{
SegmentTypeSealed: mgr.sealedSegments,
SegmentTypeGrowing: mgr.growingSegments,
}
}
}
for segType, candidate := range candidates {
if hasSegIDs {
for id := range segmentIDs {
segment, has := candidate[id]
if has && mergedFilter(segment) {
if !process(id, segType, segment) {
break
}
}
}
} else {
for id, segment := range candidate {
if mergedFilter(segment) {
if !process(id, segType, segment) {
break
}
}
}
}
}
}
func filter(segment Segment, filters ...SegmentFilter) bool {
for _, filter := range filters {
if !filter(segment) {
if !filter.Filter(segment) {
return false
}
}
return true
}
func (mgr *segmentManager) GetSealed(segmentID UniqueID) Segment {
func (mgr *segmentManager) GetSealed(segmentID typeutil.UniqueID) Segment {
mgr.mu.RLock()
defer mgr.mu.RUnlock()
@ -439,7 +528,7 @@ func (mgr *segmentManager) GetSealed(segmentID UniqueID) Segment {
return nil
}
func (mgr *segmentManager) GetGrowing(segmentID UniqueID) Segment {
func (mgr *segmentManager) GetGrowing(segmentID typeutil.UniqueID) Segment {
mgr.mu.RLock()
defer mgr.mu.RUnlock()
@ -459,7 +548,7 @@ func (mgr *segmentManager) Empty() bool {
// returns true if the segment exists,
// false otherwise
func (mgr *segmentManager) Remove(segmentID UniqueID, scope querypb.DataScope) (int, int) {
func (mgr *segmentManager) Remove(segmentID typeutil.UniqueID, scope querypb.DataScope) (int, int) {
mgr.mu.Lock()
var removeGrowing, removeSealed int
@ -502,7 +591,7 @@ func (mgr *segmentManager) Remove(segmentID UniqueID, scope querypb.DataScope) (
return removeGrowing, removeSealed
}
func (mgr *segmentManager) removeSegmentWithType(typ SegmentType, segmentID UniqueID) Segment {
func (mgr *segmentManager) removeSegmentWithType(typ SegmentType, segmentID typeutil.UniqueID) Segment {
switch typ {
case SegmentTypeGrowing:
s, ok := mgr.growingSegments[segmentID]
@ -527,36 +616,30 @@ func (mgr *segmentManager) removeSegmentWithType(typ SegmentType, segmentID Uniq
func (mgr *segmentManager) RemoveBy(filters ...SegmentFilter) (int, int) {
mgr.mu.Lock()
var removeGrowing, removeSealed []Segment
for id, segment := range mgr.growingSegments {
if filter(segment, filters...) {
s := mgr.removeSegmentWithType(SegmentTypeGrowing, id)
if s != nil {
removeGrowing = append(removeGrowing, s)
}
}
}
var removeSegments []Segment
var removeGrowing, removeSealed int
for id, segment := range mgr.sealedSegments {
if filter(segment, filters...) {
s := mgr.removeSegmentWithType(SegmentTypeSealed, id)
if s != nil {
removeSealed = append(removeSealed, s)
mgr.rangeWithFilter(func(id int64, segType SegmentType, segment Segment) bool {
s := mgr.removeSegmentWithType(segType, id)
if s != nil {
removeSegments = append(removeSegments, s)
switch segType {
case SegmentTypeGrowing:
removeGrowing++
case SegmentTypeSealed:
removeSealed++
}
}
}
return true
}, filters...)
mgr.updateMetric()
mgr.mu.Unlock()
for _, s := range removeGrowing {
for _, s := range removeSegments {
remove(s)
}
for _, s := range removeSealed {
remove(s)
}
return len(removeGrowing), len(removeSealed)
return removeGrowing, removeSealed
}
func (mgr *segmentManager) Clear() {
@ -577,7 +660,7 @@ func (mgr *segmentManager) Clear() {
func (mgr *segmentManager) updateMetric() {
// update collection and partiation metric
collections, partiations := make(Set[int64]), make(Set[int64])
collections, partiations := make(typeutil.Set[int64]), make(typeutil.Set[int64])
for _, seg := range mgr.growingSegments {
collections.Insert(seg.Collection())
partiations.Insert(seg.Partition())