mirror of https://github.com/milvus-io/milvus.git
enhance: Replace PrimaryKey slice with PrimaryKeys saving memory (#37127)
Related to #35303 Slice of `storage.PrimaryKey` will have extra interface cost for each element, which may cause notable memory usage when delta row count number is large. This PR replaces PrimaryKey slice with PrimaryKeys interface saving the extra interface cost. --------- Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>pull/37180/head
parent
9d16b972ea
commit
7774b7275e
|
@ -1509,8 +1509,18 @@ func (s *DelegatorDataSuite) TestSyncTargetVersion() {
|
|||
func (s *DelegatorDataSuite) TestLevel0Deletions() {
|
||||
delegator := s.delegator
|
||||
partitionID := int64(10)
|
||||
partitionDeleteData := storage.NewDeleteData([]storage.PrimaryKey{storage.NewInt64PrimaryKey(1)}, []storage.Timestamp{100})
|
||||
allPartitionDeleteData := storage.NewDeleteData([]storage.PrimaryKey{storage.NewInt64PrimaryKey(2)}, []storage.Timestamp{101})
|
||||
partitionDelPks := storage.NewInt64PrimaryKeys(1)
|
||||
partitionDelPks.AppendRaw(1)
|
||||
allPartitionDelPks := storage.NewInt64PrimaryKeys(1)
|
||||
allPartitionDelPks.AppendRaw(2)
|
||||
partitionDeleteData := &storage.DeltaData{
|
||||
DeletePks: partitionDelPks,
|
||||
DeleteTimestamps: []storage.Timestamp{100},
|
||||
}
|
||||
allPartitionDeleteData := &storage.DeltaData{
|
||||
DeletePks: allPartitionDelPks,
|
||||
DeleteTimestamps: []storage.Timestamp{101},
|
||||
}
|
||||
|
||||
schema := segments.GenTestCollectionSchema("test_stop", schemapb.DataType_Int64, true)
|
||||
collection := segments.NewCollection(1, schema, nil, &querypb.LoadMetaInfo{
|
||||
|
@ -1539,29 +1549,29 @@ func (s *DelegatorDataSuite) TestLevel0Deletions() {
|
|||
l0Global.LoadDeltaData(context.TODO(), allPartitionDeleteData)
|
||||
|
||||
pks, _ := delegator.GetLevel0Deletions(partitionID, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing))
|
||||
s.True(pks[0].EQ(partitionDeleteData.Pks[0]))
|
||||
s.True(pks[0].EQ(partitionDeleteData.DeletePks.Get(0)))
|
||||
|
||||
pks, _ = delegator.GetLevel0Deletions(partitionID+1, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing))
|
||||
s.Empty(pks)
|
||||
|
||||
delegator.segmentManager.Put(context.TODO(), segments.SegmentTypeSealed, l0Global)
|
||||
pks, _ = delegator.GetLevel0Deletions(partitionID, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing))
|
||||
s.ElementsMatch(pks, []storage.PrimaryKey{partitionDeleteData.Pks[0], allPartitionDeleteData.Pks[0]})
|
||||
s.ElementsMatch(pks, []storage.PrimaryKey{partitionDeleteData.DeletePks.Get(0), allPartitionDeleteData.DeletePks.Get(0)})
|
||||
|
||||
bfs := pkoracle.NewBloomFilterSet(3, l0.Partition(), commonpb.SegmentState_Sealed)
|
||||
bfs.UpdateBloomFilter(allPartitionDeleteData.Pks)
|
||||
bfs.UpdateBloomFilter([]storage.PrimaryKey{allPartitionDeleteData.DeletePks.Get(0)})
|
||||
|
||||
pks, _ = delegator.GetLevel0Deletions(partitionID, bfs)
|
||||
// bf filtered segment
|
||||
s.Equal(len(pks), 1)
|
||||
s.True(pks[0].EQ(allPartitionDeleteData.Pks[0]))
|
||||
s.True(pks[0].EQ(allPartitionDeleteData.DeletePks.Get(0)))
|
||||
|
||||
delegator.segmentManager.Remove(context.TODO(), l0.ID(), querypb.DataScope_All)
|
||||
pks, _ = delegator.GetLevel0Deletions(partitionID, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing))
|
||||
s.True(pks[0].EQ(allPartitionDeleteData.Pks[0]))
|
||||
s.True(pks[0].EQ(allPartitionDeleteData.DeletePks.Get(0)))
|
||||
|
||||
pks, _ = delegator.GetLevel0Deletions(partitionID+1, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing))
|
||||
s.True(pks[0].EQ(allPartitionDeleteData.Pks[0]))
|
||||
s.True(pks[0].EQ(allPartitionDeleteData.DeletePks.Get(0)))
|
||||
|
||||
delegator.segmentManager.Remove(context.TODO(), l0Global.ID(), querypb.DataScope_All)
|
||||
pks, _ = delegator.GetLevel0Deletions(partitionID+1, pkoracle.NewCandidateKey(l0.ID(), l0.Partition(), segments.SegmentTypeGrowing))
|
||||
|
|
|
@ -822,7 +822,7 @@ func (_c *MockSegment_Level_Call) RunAndReturn(run func() datapb.SegmentLevel) *
|
|||
}
|
||||
|
||||
// LoadDeltaData provides a mock function with given fields: ctx, deltaData
|
||||
func (_m *MockSegment) LoadDeltaData(ctx context.Context, deltaData *storage.DeleteData) error {
|
||||
func (_m *MockSegment) LoadDeltaData(ctx context.Context, deltaData *storage.DeltaData) error {
|
||||
ret := _m.Called(ctx, deltaData)
|
||||
|
||||
if len(ret) == 0 {
|
||||
|
@ -830,7 +830,7 @@ func (_m *MockSegment) LoadDeltaData(ctx context.Context, deltaData *storage.Del
|
|||
}
|
||||
|
||||
var r0 error
|
||||
if rf, ok := ret.Get(0).(func(context.Context, *storage.DeleteData) error); ok {
|
||||
if rf, ok := ret.Get(0).(func(context.Context, *storage.DeltaData) error); ok {
|
||||
r0 = rf(ctx, deltaData)
|
||||
} else {
|
||||
r0 = ret.Error(0)
|
||||
|
@ -846,14 +846,14 @@ type MockSegment_LoadDeltaData_Call struct {
|
|||
|
||||
// LoadDeltaData is a helper method to define mock.On call
|
||||
// - ctx context.Context
|
||||
// - deltaData *storage.DeleteData
|
||||
// - deltaData *storage.DeltaData
|
||||
func (_e *MockSegment_Expecter) LoadDeltaData(ctx interface{}, deltaData interface{}) *MockSegment_LoadDeltaData_Call {
|
||||
return &MockSegment_LoadDeltaData_Call{Call: _e.mock.On("LoadDeltaData", ctx, deltaData)}
|
||||
}
|
||||
|
||||
func (_c *MockSegment_LoadDeltaData_Call) Run(run func(ctx context.Context, deltaData *storage.DeleteData)) *MockSegment_LoadDeltaData_Call {
|
||||
func (_c *MockSegment_LoadDeltaData_Call) Run(run func(ctx context.Context, deltaData *storage.DeltaData)) *MockSegment_LoadDeltaData_Call {
|
||||
_c.Call.Run(func(args mock.Arguments) {
|
||||
run(args[0].(context.Context), args[1].(*storage.DeleteData))
|
||||
run(args[0].(context.Context), args[1].(*storage.DeltaData))
|
||||
})
|
||||
return _c
|
||||
}
|
||||
|
@ -863,7 +863,7 @@ func (_c *MockSegment_LoadDeltaData_Call) Return(_a0 error) *MockSegment_LoadDel
|
|||
return _c
|
||||
}
|
||||
|
||||
func (_c *MockSegment_LoadDeltaData_Call) RunAndReturn(run func(context.Context, *storage.DeleteData) error) *MockSegment_LoadDeltaData_Call {
|
||||
func (_c *MockSegment_LoadDeltaData_Call) RunAndReturn(run func(context.Context, *storage.DeltaData) error) *MockSegment_LoadDeltaData_Call {
|
||||
_c.Call.Return(run)
|
||||
return _c
|
||||
}
|
||||
|
|
|
@ -1018,9 +1018,9 @@ func (s *LocalSegment) AddFieldDataInfo(ctx context.Context, rowCount int64, fie
|
|||
return nil
|
||||
}
|
||||
|
||||
func (s *LocalSegment) LoadDeltaData(ctx context.Context, deltaData *storage.DeleteData) error {
|
||||
pks, tss := deltaData.Pks, deltaData.Tss
|
||||
rowNum := deltaData.RowCount
|
||||
func (s *LocalSegment) LoadDeltaData(ctx context.Context, deltaData *storage.DeltaData) error {
|
||||
pks, tss := deltaData.DeletePks, deltaData.DeleteTimestamps
|
||||
rowNum := deltaData.DelRowCount
|
||||
|
||||
if !s.ptrLock.RLockIf(state.IsNotReleased) {
|
||||
return merr.WrapErrSegmentNotLoaded(s.ID(), "segment released")
|
||||
|
@ -1033,31 +1033,9 @@ func (s *LocalSegment) LoadDeltaData(ctx context.Context, deltaData *storage.Del
|
|||
zap.Int64("segmentID", s.ID()),
|
||||
)
|
||||
|
||||
pkType := pks[0].Type()
|
||||
ids := &schemapb.IDs{}
|
||||
switch pkType {
|
||||
case schemapb.DataType_Int64:
|
||||
int64Pks := make([]int64, len(pks))
|
||||
for index, pk := range pks {
|
||||
int64Pks[index] = pk.(*storage.Int64PrimaryKey).Value
|
||||
}
|
||||
ids.IdField = &schemapb.IDs_IntId{
|
||||
IntId: &schemapb.LongArray{
|
||||
Data: int64Pks,
|
||||
},
|
||||
}
|
||||
case schemapb.DataType_VarChar:
|
||||
varCharPks := make([]string, len(pks))
|
||||
for index, pk := range pks {
|
||||
varCharPks[index] = pk.(*storage.VarCharPrimaryKey).Value
|
||||
}
|
||||
ids.IdField = &schemapb.IDs_StrId{
|
||||
StrId: &schemapb.StringArray{
|
||||
Data: varCharPks,
|
||||
},
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("invalid data type of primary keys")
|
||||
ids, err := storage.ParsePrimaryKeysBatch2IDs(pks)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
idsBlob, err := proto.Marshal(ids)
|
||||
|
|
|
@ -78,7 +78,7 @@ type Segment interface {
|
|||
// Modification related
|
||||
Insert(ctx context.Context, rowIDs []int64, timestamps []typeutil.Timestamp, record *segcorepb.InsertRecord) error
|
||||
Delete(ctx context.Context, primaryKeys []storage.PrimaryKey, timestamps []typeutil.Timestamp) error
|
||||
LoadDeltaData(ctx context.Context, deltaData *storage.DeleteData) error
|
||||
LoadDeltaData(ctx context.Context, deltaData *storage.DeltaData) error
|
||||
LastDeltaTimestamp() uint64
|
||||
Release(ctx context.Context, opts ...releaseOption)
|
||||
|
||||
|
|
|
@ -151,12 +151,14 @@ func (s *L0Segment) Delete(ctx context.Context, primaryKeys []storage.PrimaryKey
|
|||
return merr.WrapErrIoFailedReason("delete not supported for L0 segment")
|
||||
}
|
||||
|
||||
func (s *L0Segment) LoadDeltaData(ctx context.Context, deltaData *storage.DeleteData) error {
|
||||
func (s *L0Segment) LoadDeltaData(ctx context.Context, deltaData *storage.DeltaData) error {
|
||||
s.dataGuard.Lock()
|
||||
defer s.dataGuard.Unlock()
|
||||
|
||||
s.pks = append(s.pks, deltaData.Pks...)
|
||||
s.tss = append(s.tss, deltaData.Tss...)
|
||||
for i := 0; i < deltaData.DeletePks.Len(); i++ {
|
||||
s.pks = append(s.pks, deltaData.DeletePks.Get(i))
|
||||
}
|
||||
s.tss = append(s.tss, deltaData.DeleteTimestamps...)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -1207,9 +1207,23 @@ func (loader *segmentLoader) LoadDeltaLogs(ctx context.Context, segment Segment,
|
|||
rowNums := lo.SumBy(blobs, func(blob *storage.Blob) int64 {
|
||||
return blob.RowNum
|
||||
})
|
||||
deltaData := &storage.DeleteData{
|
||||
Pks: make([]storage.PrimaryKey, 0, rowNums),
|
||||
Tss: make([]uint64, 0, rowNums),
|
||||
|
||||
var deltaData *storage.DeltaData
|
||||
collection := loader.manager.Collection.Get(segment.Collection())
|
||||
|
||||
helper, _ := typeutil.CreateSchemaHelper(collection.Schema())
|
||||
pkField, _ := helper.GetPrimaryKeyField()
|
||||
switch pkField.DataType {
|
||||
case schemapb.DataType_Int64:
|
||||
deltaData = &storage.DeltaData{
|
||||
DeletePks: storage.NewInt64PrimaryKeys(int(rowNums)),
|
||||
DeleteTimestamps: make([]uint64, 0, rowNums),
|
||||
}
|
||||
case schemapb.DataType_VarChar:
|
||||
deltaData = &storage.DeltaData{
|
||||
DeletePks: storage.NewVarcharPrimaryKeys(int(rowNums)),
|
||||
DeleteTimestamps: make([]uint64, 0, rowNums),
|
||||
}
|
||||
}
|
||||
|
||||
reader, err := storage.CreateDeltalogReader(blobs)
|
||||
|
@ -1226,7 +1240,9 @@ func (loader *segmentLoader) LoadDeltaLogs(ctx context.Context, segment Segment,
|
|||
return err
|
||||
}
|
||||
dl := reader.Value()
|
||||
deltaData.Append(dl.Pk, dl.Ts)
|
||||
deltaData.DeletePks.MustAppend(dl.Pk)
|
||||
deltaData.DeleteTimestamps = append(deltaData.DeleteTimestamps, dl.Ts)
|
||||
deltaData.DelRowCount++
|
||||
}
|
||||
|
||||
err = segment.LoadDeltaData(ctx, deltaData)
|
||||
|
@ -1234,7 +1250,7 @@ func (loader *segmentLoader) LoadDeltaLogs(ctx context.Context, segment Segment,
|
|||
return err
|
||||
}
|
||||
|
||||
log.Info("load delta logs done", zap.Int64("deleteCount", deltaData.RowCount))
|
||||
log.Info("load delta logs done", zap.Int64("deleteCount", deltaData.DelRowCount))
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -34,14 +34,14 @@ var parserPool = &fastjson.ParserPool{}
|
|||
// DeltaData stores delta data
|
||||
// currently only delete tuples are stored
|
||||
type DeltaData struct {
|
||||
pkType schemapb.DataType
|
||||
PkType schemapb.DataType
|
||||
// delete tuples
|
||||
delPks PrimaryKeys
|
||||
delTss []Timestamp
|
||||
DeletePks PrimaryKeys
|
||||
DeleteTimestamps []Timestamp
|
||||
|
||||
// stats
|
||||
delRowCount int64
|
||||
memSize int64
|
||||
DelRowCount int64
|
||||
MemSize int64
|
||||
}
|
||||
|
||||
type DeleteLog struct {
|
||||
|
|
|
@ -23,6 +23,7 @@ import (
|
|||
|
||||
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
||||
"github.com/milvus-io/milvus/pkg/log"
|
||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||
)
|
||||
|
||||
type PrimaryKey interface {
|
||||
|
@ -350,6 +351,33 @@ func ParseIDs2PrimaryKeys(ids *schemapb.IDs) []PrimaryKey {
|
|||
return ret
|
||||
}
|
||||
|
||||
func ParsePrimaryKeysBatch2IDs(pks PrimaryKeys) (*schemapb.IDs, error) {
|
||||
ret := &schemapb.IDs{}
|
||||
if pks.Len() == 0 {
|
||||
return ret, nil
|
||||
}
|
||||
switch pks.Type() {
|
||||
case schemapb.DataType_Int64:
|
||||
int64Pks := pks.(*Int64PrimaryKeys)
|
||||
ret.IdField = &schemapb.IDs_IntId{
|
||||
IntId: &schemapb.LongArray{
|
||||
Data: int64Pks.values,
|
||||
},
|
||||
}
|
||||
case schemapb.DataType_VarChar:
|
||||
varcharPks := pks.(*VarcharPrimaryKeys)
|
||||
ret.IdField = &schemapb.IDs_StrId{
|
||||
StrId: &schemapb.StringArray{
|
||||
Data: varcharPks.values,
|
||||
},
|
||||
}
|
||||
default:
|
||||
return nil, merr.WrapErrServiceInternal("parsing unsupported pk type", pks.Type().String())
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func ParsePrimaryKeys2IDs(pks []PrimaryKey) *schemapb.IDs {
|
||||
ret := &schemapb.IDs{}
|
||||
if len(pks) == 0 {
|
||||
|
|
|
@ -177,3 +177,37 @@ func TestParsePrimaryKeysAndIDs(t *testing.T) {
|
|||
assert.ElementsMatch(t, c.pks, testPks)
|
||||
}
|
||||
}
|
||||
|
||||
type badPks struct {
|
||||
PrimaryKeys
|
||||
}
|
||||
|
||||
func (pks *badPks) Type() schemapb.DataType {
|
||||
return schemapb.DataType_None
|
||||
}
|
||||
|
||||
func TestParsePrimaryKeysBatch2IDs(t *testing.T) {
|
||||
t.Run("success_cases", func(t *testing.T) {
|
||||
intPks := NewInt64PrimaryKeys(3)
|
||||
intPks.AppendRaw(1, 2, 3)
|
||||
|
||||
ids, err := ParsePrimaryKeysBatch2IDs(intPks)
|
||||
assert.NoError(t, err)
|
||||
assert.ElementsMatch(t, []int64{1, 2, 3}, ids.GetIntId().GetData())
|
||||
|
||||
strPks := NewVarcharPrimaryKeys(3)
|
||||
strPks.AppendRaw("1", "2", "3")
|
||||
|
||||
ids, err = ParsePrimaryKeysBatch2IDs(strPks)
|
||||
assert.NoError(t, err)
|
||||
assert.ElementsMatch(t, []string{"1", "2", "3"}, ids.GetStrId().GetData())
|
||||
})
|
||||
|
||||
t.Run("unsupport_type", func(t *testing.T) {
|
||||
intPks := NewInt64PrimaryKeys(3)
|
||||
intPks.AppendRaw(1, 2, 3)
|
||||
|
||||
_, err := ParsePrimaryKeysBatch2IDs(&badPks{PrimaryKeys: intPks})
|
||||
assert.Error(t, err)
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue