2023-10-23 11:42:10 +00:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package metacache
|
|
|
|
|
|
|
|
import (
|
|
|
|
"sync"
|
|
|
|
|
2023-10-28 03:12:11 +00:00
|
|
|
"github.com/samber/lo"
|
2023-10-23 11:42:10 +00:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2023-10-28 03:12:11 +00:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
2023-11-14 07:08:19 +00:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
2024-08-22 11:42:57 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/flushcommon/metacache/pkoracle"
|
2023-11-14 07:08:19 +00:00
|
|
|
"github.com/milvus-io/milvus/internal/storage"
|
2023-10-28 03:12:11 +00:00
|
|
|
"github.com/milvus-io/milvus/pkg/log"
|
2025-01-10 02:53:01 +00:00
|
|
|
"github.com/milvus-io/milvus/pkg/proto/datapb"
|
2023-10-23 11:42:10 +00:00
|
|
|
)
|
|
|
|
|
2024-05-30 05:37:44 +00:00
|
|
|
//go:generate mockery --name=MetaCache --structname=MockMetaCache --output=./ --filename=mock_meta_cache.go --with-expecter --inpackage
|
2023-10-23 11:42:10 +00:00
|
|
|
type MetaCache interface {
|
2023-11-14 07:08:19 +00:00
|
|
|
// Collection returns collection id of metacache.
|
|
|
|
Collection() int64
|
|
|
|
// Schema returns collection schema.
|
|
|
|
Schema() *schemapb.CollectionSchema
|
|
|
|
// AddSegment adds a segment from segment info.
|
2024-09-19 02:57:12 +00:00
|
|
|
AddSegment(segInfo *datapb.SegmentInfo, pkFactory PkStatsFactory, bmFactory BM25StatsFactory, actions ...SegmentAction)
|
2023-11-14 07:08:19 +00:00
|
|
|
// UpdateSegments applies action to segment(s) satisfy the provided filters.
|
2023-10-28 03:12:11 +00:00
|
|
|
UpdateSegments(action SegmentAction, filters ...SegmentFilter)
|
2023-11-24 07:38:25 +00:00
|
|
|
// RemoveSegments removes segments matches the provided filter.
|
|
|
|
RemoveSegments(filters ...SegmentFilter) []int64
|
2023-11-14 07:08:19 +00:00
|
|
|
// GetSegmentsBy returns segments statify the provided filters.
|
2023-10-28 03:12:11 +00:00
|
|
|
GetSegmentsBy(filters ...SegmentFilter) []*SegmentInfo
|
2023-11-14 07:08:19 +00:00
|
|
|
// GetSegmentByID returns segment with provided segment id if exists.
|
|
|
|
GetSegmentByID(id int64, filters ...SegmentFilter) (*SegmentInfo, bool)
|
|
|
|
// GetSegmentIDs returns ids of segments which satifiy the provided filters.
|
2023-10-23 11:42:10 +00:00
|
|
|
GetSegmentIDsBy(filters ...SegmentFilter) []int64
|
2023-11-24 07:38:25 +00:00
|
|
|
// PredictSegments returns the segment ids which may contain the provided primary key.
|
2023-11-14 07:08:19 +00:00
|
|
|
PredictSegments(pk storage.PrimaryKey, filters ...SegmentFilter) ([]int64, bool)
|
2024-05-30 05:37:44 +00:00
|
|
|
// DetectMissingSegments returns the segment ids which is missing in datanode.
|
|
|
|
DetectMissingSegments(segments map[int64]struct{}) []int64
|
|
|
|
// UpdateSegmentView updates the segments BF from datacoord view.
|
2024-08-22 11:42:57 +00:00
|
|
|
UpdateSegmentView(partitionID int64, newSegments []*datapb.SyncSegmentInfo, newSegmentsBF []*pkoracle.BloomFilterSet, allSegments map[int64]struct{})
|
2023-10-23 11:42:10 +00:00
|
|
|
}
|
|
|
|
|
2023-10-28 03:12:11 +00:00
|
|
|
var _ MetaCache = (*metaCacheImpl)(nil)
|
2023-10-23 11:42:10 +00:00
|
|
|
|
2024-09-19 02:57:12 +00:00
|
|
|
type (
|
|
|
|
PkStatsFactory func(vchannel *datapb.SegmentInfo) pkoracle.PkStat
|
|
|
|
BM25StatsFactory func(vchannel *datapb.SegmentInfo) *SegmentBM25Stats
|
|
|
|
)
|
|
|
|
|
|
|
|
func NoneBm25StatsFactory(vchannel *datapb.SegmentInfo) *SegmentBM25Stats {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewBM25StatsFactory(vchannel *datapb.SegmentInfo) *SegmentBM25Stats {
|
|
|
|
return NewEmptySegmentBM25Stats()
|
|
|
|
}
|
2023-10-23 11:42:10 +00:00
|
|
|
|
2023-10-28 03:12:11 +00:00
|
|
|
type metaCacheImpl struct {
|
2023-10-23 11:42:10 +00:00
|
|
|
collectionID int64
|
|
|
|
vChannelName string
|
2023-11-14 07:08:19 +00:00
|
|
|
schema *schemapb.CollectionSchema
|
2024-05-28 06:19:42 +00:00
|
|
|
|
|
|
|
mu sync.RWMutex
|
|
|
|
segmentInfos map[int64]*SegmentInfo
|
|
|
|
stateSegments map[commonpb.SegmentState]map[int64]*SegmentInfo
|
2023-10-23 11:42:10 +00:00
|
|
|
}
|
|
|
|
|
2024-09-19 02:57:12 +00:00
|
|
|
func NewMetaCache(info *datapb.ChannelWatchInfo, pkFactory PkStatsFactory, bmFactor BM25StatsFactory) MetaCache {
|
2023-11-14 07:08:19 +00:00
|
|
|
vchannel := info.GetVchan()
|
2023-10-28 03:12:11 +00:00
|
|
|
cache := &metaCacheImpl{
|
2024-05-28 06:19:42 +00:00
|
|
|
collectionID: vchannel.GetCollectionID(),
|
|
|
|
vChannelName: vchannel.GetChannelName(),
|
|
|
|
segmentInfos: make(map[int64]*SegmentInfo),
|
|
|
|
stateSegments: make(map[commonpb.SegmentState]map[int64]*SegmentInfo),
|
|
|
|
schema: info.GetSchema(),
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, state := range []commonpb.SegmentState{
|
|
|
|
commonpb.SegmentState_Growing,
|
|
|
|
commonpb.SegmentState_Sealed,
|
|
|
|
commonpb.SegmentState_Flushing,
|
|
|
|
commonpb.SegmentState_Flushed,
|
|
|
|
commonpb.SegmentState_Dropped,
|
|
|
|
commonpb.SegmentState_Importing,
|
|
|
|
} {
|
|
|
|
cache.stateSegments[state] = make(map[int64]*SegmentInfo)
|
2023-10-23 11:42:10 +00:00
|
|
|
}
|
|
|
|
|
2024-09-19 02:57:12 +00:00
|
|
|
cache.init(vchannel, pkFactory, bmFactor)
|
2023-10-23 11:42:10 +00:00
|
|
|
return cache
|
|
|
|
}
|
|
|
|
|
2024-09-19 02:57:12 +00:00
|
|
|
func (c *metaCacheImpl) init(vchannel *datapb.VchannelInfo, pkFactory PkStatsFactory, bmFactor BM25StatsFactory) {
|
2023-10-23 11:42:10 +00:00
|
|
|
for _, seg := range vchannel.FlushedSegments {
|
2024-09-19 02:57:12 +00:00
|
|
|
c.addSegment(NewSegmentInfo(seg, pkFactory(seg), bmFactor(seg)))
|
2023-10-23 11:42:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, seg := range vchannel.UnflushedSegments {
|
2023-12-23 13:32:43 +00:00
|
|
|
// segment state could be sealed for growing segment if flush request processed before datanode watch
|
|
|
|
seg.State = commonpb.SegmentState_Growing
|
2024-09-19 02:57:12 +00:00
|
|
|
c.addSegment(NewSegmentInfo(seg, pkFactory(seg), bmFactor(seg)))
|
2023-10-23 11:42:10 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-14 07:08:19 +00:00
|
|
|
// Collection returns collection id of metacache.
|
|
|
|
func (c *metaCacheImpl) Collection() int64 {
|
|
|
|
return c.collectionID
|
|
|
|
}
|
|
|
|
|
|
|
|
// Schema returns collection schema.
|
|
|
|
func (c *metaCacheImpl) Schema() *schemapb.CollectionSchema {
|
|
|
|
return c.schema
|
|
|
|
}
|
|
|
|
|
|
|
|
// AddSegment adds a segment from segment info.
|
2024-09-19 02:57:12 +00:00
|
|
|
func (c *metaCacheImpl) AddSegment(segInfo *datapb.SegmentInfo, pkFactory PkStatsFactory, bmFactory BM25StatsFactory, actions ...SegmentAction) {
|
|
|
|
segment := NewSegmentInfo(segInfo, pkFactory(segInfo), bmFactory(segInfo))
|
2023-11-14 07:08:19 +00:00
|
|
|
|
|
|
|
for _, action := range actions {
|
|
|
|
action(segment)
|
|
|
|
}
|
|
|
|
c.mu.Lock()
|
|
|
|
defer c.mu.Unlock()
|
|
|
|
|
2024-05-28 06:19:42 +00:00
|
|
|
c.addSegment(segment)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *metaCacheImpl) addSegment(segment *SegmentInfo) {
|
|
|
|
segID := segment.SegmentID()
|
|
|
|
c.segmentInfos[segID] = segment
|
|
|
|
c.stateSegments[segment.State()][segID] = segment
|
2023-11-14 07:08:19 +00:00
|
|
|
}
|
|
|
|
|
2023-11-24 07:38:25 +00:00
|
|
|
func (c *metaCacheImpl) RemoveSegments(filters ...SegmentFilter) []int64 {
|
|
|
|
if len(filters) == 0 {
|
|
|
|
log.Warn("remove segment without filters is not allowed", zap.Stack("callstack"))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
c.mu.Lock()
|
|
|
|
defer c.mu.Unlock()
|
|
|
|
|
2024-03-11 02:55:02 +00:00
|
|
|
var result []int64
|
|
|
|
process := func(id int64, info *SegmentInfo) {
|
|
|
|
delete(c.segmentInfos, id)
|
2024-05-28 06:19:42 +00:00
|
|
|
delete(c.stateSegments[info.State()], id)
|
2024-03-11 02:55:02 +00:00
|
|
|
result = append(result, id)
|
2023-11-24 07:38:25 +00:00
|
|
|
}
|
2024-03-11 02:55:02 +00:00
|
|
|
c.rangeWithFilter(process, filters...)
|
|
|
|
return result
|
2023-10-28 03:12:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (c *metaCacheImpl) GetSegmentsBy(filters ...SegmentFilter) []*SegmentInfo {
|
|
|
|
c.mu.RLock()
|
|
|
|
defer c.mu.RUnlock()
|
|
|
|
|
|
|
|
var segments []*SegmentInfo
|
2024-03-11 02:55:02 +00:00
|
|
|
c.rangeWithFilter(func(_ int64, info *SegmentInfo) {
|
|
|
|
segments = append(segments, info)
|
|
|
|
}, filters...)
|
2023-10-28 03:12:11 +00:00
|
|
|
return segments
|
|
|
|
}
|
|
|
|
|
2023-11-14 07:08:19 +00:00
|
|
|
// GetSegmentByID returns segment with provided segment id if exists.
|
|
|
|
func (c *metaCacheImpl) GetSegmentByID(id int64, filters ...SegmentFilter) (*SegmentInfo, bool) {
|
|
|
|
c.mu.RLock()
|
|
|
|
defer c.mu.RUnlock()
|
|
|
|
|
|
|
|
segment, ok := c.segmentInfos[id]
|
|
|
|
if !ok {
|
|
|
|
return nil, false
|
|
|
|
}
|
2024-03-11 02:55:02 +00:00
|
|
|
for _, filter := range filters {
|
|
|
|
if !filter.Filter(segment) {
|
|
|
|
return nil, false
|
|
|
|
}
|
2023-11-14 07:08:19 +00:00
|
|
|
}
|
|
|
|
return segment, ok
|
|
|
|
}
|
|
|
|
|
2023-10-28 03:12:11 +00:00
|
|
|
func (c *metaCacheImpl) GetSegmentIDsBy(filters ...SegmentFilter) []int64 {
|
|
|
|
segments := c.GetSegmentsBy(filters...)
|
|
|
|
return lo.Map(segments, func(info *SegmentInfo, _ int) int64 { return info.SegmentID() })
|
2023-10-23 11:42:10 +00:00
|
|
|
}
|
|
|
|
|
2023-10-28 03:12:11 +00:00
|
|
|
func (c *metaCacheImpl) UpdateSegments(action SegmentAction, filters ...SegmentFilter) {
|
2023-10-23 11:42:10 +00:00
|
|
|
c.mu.Lock()
|
|
|
|
defer c.mu.Unlock()
|
|
|
|
|
2024-03-11 02:55:02 +00:00
|
|
|
c.rangeWithFilter(func(id int64, info *SegmentInfo) {
|
2023-10-28 03:12:11 +00:00
|
|
|
nInfo := info.Clone()
|
|
|
|
action(nInfo)
|
|
|
|
c.segmentInfos[id] = nInfo
|
2024-05-28 06:19:42 +00:00
|
|
|
delete(c.stateSegments[info.State()], info.SegmentID())
|
|
|
|
c.stateSegments[nInfo.State()][nInfo.SegmentID()] = nInfo
|
2024-03-11 02:55:02 +00:00
|
|
|
}, filters...)
|
2023-10-28 03:12:11 +00:00
|
|
|
}
|
|
|
|
|
2023-11-14 07:08:19 +00:00
|
|
|
func (c *metaCacheImpl) PredictSegments(pk storage.PrimaryKey, filters ...SegmentFilter) ([]int64, bool) {
|
|
|
|
var predicts []int64
|
2024-05-13 02:15:32 +00:00
|
|
|
lc := storage.NewLocationsCache(pk)
|
2023-11-14 07:08:19 +00:00
|
|
|
segments := c.GetSegmentsBy(filters...)
|
|
|
|
for _, segment := range segments {
|
2024-05-13 02:15:32 +00:00
|
|
|
if segment.GetBloomFilterSet().PkExists(lc) {
|
2023-11-14 07:08:19 +00:00
|
|
|
predicts = append(predicts, segment.segmentID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return predicts, len(predicts) > 0
|
|
|
|
}
|
|
|
|
|
2024-03-11 02:55:02 +00:00
|
|
|
func (c *metaCacheImpl) rangeWithFilter(fn func(id int64, info *SegmentInfo), filters ...SegmentFilter) {
|
2024-05-28 06:19:42 +00:00
|
|
|
criterion := &segmentCriterion{}
|
2024-03-11 02:55:02 +00:00
|
|
|
for _, filter := range filters {
|
2024-05-28 06:19:42 +00:00
|
|
|
filter.AddFilter(criterion)
|
2024-03-11 02:55:02 +00:00
|
|
|
}
|
2024-05-28 06:19:42 +00:00
|
|
|
|
|
|
|
var candidates []map[int64]*SegmentInfo
|
|
|
|
if criterion.states != nil {
|
|
|
|
candidates = lo.Map(criterion.states.Collect(), func(state commonpb.SegmentState, _ int) map[int64]*SegmentInfo {
|
|
|
|
return c.stateSegments[state]
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
candidates = []map[int64]*SegmentInfo{
|
|
|
|
c.segmentInfos,
|
2023-10-23 11:42:10 +00:00
|
|
|
}
|
|
|
|
}
|
2024-03-11 02:55:02 +00:00
|
|
|
|
2024-05-28 06:19:42 +00:00
|
|
|
for _, candidate := range candidates {
|
|
|
|
var segments map[int64]*SegmentInfo
|
|
|
|
if criterion.ids != nil {
|
|
|
|
segments = lo.SliceToMap(lo.FilterMap(criterion.ids.Collect(), func(id int64, _ int) (*SegmentInfo, bool) {
|
|
|
|
segment, ok := candidate[id]
|
|
|
|
return segment, ok
|
|
|
|
}), func(segment *SegmentInfo) (int64, *SegmentInfo) {
|
|
|
|
return segment.SegmentID(), segment
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
segments = candidate
|
2024-03-11 02:55:02 +00:00
|
|
|
}
|
2024-05-28 06:19:42 +00:00
|
|
|
|
|
|
|
for id, segment := range segments {
|
|
|
|
if criterion.Match(segment) {
|
|
|
|
fn(id, segment)
|
2024-03-11 02:55:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-10-23 11:42:10 +00:00
|
|
|
}
|
2024-05-30 05:37:44 +00:00
|
|
|
|
|
|
|
func (c *metaCacheImpl) DetectMissingSegments(segments map[int64]struct{}) []int64 {
|
|
|
|
c.mu.RLock()
|
|
|
|
defer c.mu.RUnlock()
|
|
|
|
|
|
|
|
missingSegments := make([]int64, 0)
|
|
|
|
|
|
|
|
for segID := range segments {
|
|
|
|
if _, ok := c.segmentInfos[segID]; !ok {
|
|
|
|
missingSegments = append(missingSegments, segID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return missingSegments
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *metaCacheImpl) UpdateSegmentView(partitionID int64,
|
|
|
|
newSegments []*datapb.SyncSegmentInfo,
|
2024-08-22 11:42:57 +00:00
|
|
|
newSegmentsBF []*pkoracle.BloomFilterSet,
|
2024-05-30 05:37:44 +00:00
|
|
|
allSegments map[int64]struct{},
|
|
|
|
) {
|
|
|
|
c.mu.Lock()
|
|
|
|
defer c.mu.Unlock()
|
|
|
|
|
|
|
|
for i, info := range newSegments {
|
|
|
|
// check again
|
|
|
|
if _, ok := c.segmentInfos[info.GetSegmentId()]; !ok {
|
|
|
|
segInfo := &SegmentInfo{
|
|
|
|
segmentID: info.GetSegmentId(),
|
|
|
|
partitionID: partitionID,
|
|
|
|
state: info.GetState(),
|
|
|
|
level: info.GetLevel(),
|
|
|
|
flushedRows: info.GetNumOfRows(),
|
|
|
|
startPosRecorded: true,
|
|
|
|
bfs: newSegmentsBF[i],
|
|
|
|
}
|
|
|
|
c.segmentInfos[info.GetSegmentId()] = segInfo
|
|
|
|
c.stateSegments[info.GetState()][info.GetSegmentId()] = segInfo
|
|
|
|
log.Info("metacache does not have segment, add it", zap.Int64("segmentID", info.GetSegmentId()))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for segID, info := range c.segmentInfos {
|
2024-07-01 14:42:08 +00:00
|
|
|
// only check flushed segments
|
|
|
|
// 1. flushing may be compacted on datacoord
|
|
|
|
// 2. growing may doesn't have stats log, it won't include in sync views
|
|
|
|
if info.partitionID != partitionID || info.state != commonpb.SegmentState_Flushed {
|
2024-05-30 05:37:44 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
if _, ok := allSegments[segID]; !ok {
|
|
|
|
log.Info("remove dropped segment", zap.Int64("segmentID", segID))
|
|
|
|
delete(c.segmentInfos, segID)
|
|
|
|
delete(c.stateSegments[info.State()], segID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|