mirror of https://github.com/milvus-io/milvus.git
enhance: Add memory size for binlog (#33025)
issue: #33005 1. add `MemorySize` field for insert binlog. 2. `LogSize` means the file size in the storage object. 3. `MemorySize` means the size of the data in the memory. --------- Signed-off-by: Cai Zhang <cai.zhang@zilliz.com> Signed-off-by: cai.zhang <cai.zhang@zilliz.com>pull/33076/head
parent
d4837307b3
commit
6ea7633bd5
|
@ -813,7 +813,7 @@ func (t *compactionTrigger) ShouldDoSingleCompaction(segment *SegmentInfo, compa
|
||||||
zap.Uint64("binlogTimestampTo", l.TimestampTo),
|
zap.Uint64("binlogTimestampTo", l.TimestampTo),
|
||||||
zap.Uint64("compactExpireTime", compactTime.expireTime))
|
zap.Uint64("compactExpireTime", compactTime.expireTime))
|
||||||
totalExpiredRows += int(l.GetEntriesNum())
|
totalExpiredRows += int(l.GetEntriesNum())
|
||||||
totalExpiredSize += l.GetLogSize()
|
totalExpiredSize += l.GetMemorySize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -831,7 +831,7 @@ func (t *compactionTrigger) ShouldDoSingleCompaction(segment *SegmentInfo, compa
|
||||||
for _, deltaLogs := range segment.GetDeltalogs() {
|
for _, deltaLogs := range segment.GetDeltalogs() {
|
||||||
for _, l := range deltaLogs.GetBinlogs() {
|
for _, l := range deltaLogs.GetBinlogs() {
|
||||||
totalDeletedRows += int(l.GetEntriesNum())
|
totalDeletedRows += int(l.GetEntriesNum())
|
||||||
totalDeleteLogSize += l.GetLogSize()
|
totalDeleteLogSize += l.GetMemorySize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -893,7 +893,7 @@ func Test_compactionTrigger_noplan(t *testing.T) {
|
||||||
Binlogs: []*datapb.FieldBinlog{
|
Binlogs: []*datapb.FieldBinlog{
|
||||||
{
|
{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
|
{EntriesNum: 5, LogPath: "log1", LogSize: 100, MemorySize: 100},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -913,7 +913,7 @@ func Test_compactionTrigger_noplan(t *testing.T) {
|
||||||
Binlogs: []*datapb.FieldBinlog{
|
Binlogs: []*datapb.FieldBinlog{
|
||||||
{
|
{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{EntriesNum: 5, LogPath: "log2", LogSize: Params.DataCoordCfg.SegmentMaxSize.GetAsInt64()*1024*1024 - 1},
|
{EntriesNum: 5, LogPath: "log2", LogSize: Params.DataCoordCfg.SegmentMaxSize.GetAsInt64()*1024*1024 - 1, MemorySize: Params.DataCoordCfg.SegmentMaxSize.GetAsInt64()*1024*1024 - 1},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -1015,7 +1015,7 @@ func Test_compactionTrigger_PrioritizedCandi(t *testing.T) {
|
||||||
Binlogs: []*datapb.FieldBinlog{
|
Binlogs: []*datapb.FieldBinlog{
|
||||||
{
|
{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{EntriesNum: numRows, LogPath: "log1", LogSize: 100},
|
{EntriesNum: numRows, LogPath: "log1", LogSize: 100, MemorySize: 100},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -1207,7 +1207,7 @@ func Test_compactionTrigger_SmallCandi(t *testing.T) {
|
||||||
Binlogs: []*datapb.FieldBinlog{
|
Binlogs: []*datapb.FieldBinlog{
|
||||||
{
|
{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{EntriesNum: 5, LogPath: "log1", LogSize: numRows * 1024 * 1024},
|
{EntriesNum: 5, LogPath: "log1", LogSize: numRows * 1024 * 1024, MemorySize: numRows * 1024 * 1024},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -1400,7 +1400,7 @@ func Test_compactionTrigger_SqueezeNonPlannedSegs(t *testing.T) {
|
||||||
Binlogs: []*datapb.FieldBinlog{
|
Binlogs: []*datapb.FieldBinlog{
|
||||||
{
|
{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{EntriesNum: 5, LogPath: "log1", LogSize: numRows * 1024 * 1024},
|
{EntriesNum: 5, LogPath: "log1", LogSize: numRows * 1024 * 1024, MemorySize: numRows * 1024 * 1024},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -1627,7 +1627,7 @@ func Test_compactionTrigger_noplan_random_size(t *testing.T) {
|
||||||
Binlogs: []*datapb.FieldBinlog{
|
Binlogs: []*datapb.FieldBinlog{
|
||||||
{
|
{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{EntriesNum: 5, LogPath: "log1", LogSize: size[i] * 2 * 1024 * 1024},
|
{EntriesNum: 5, LogPath: "log1", LogSize: size[i] * 2 * 1024 * 1024, MemorySize: size[i] * 2 * 1024 * 1024},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -1766,7 +1766,7 @@ func Test_compactionTrigger_shouldDoSingleCompaction(t *testing.T) {
|
||||||
for i := UniqueID(0); i < 1000; i++ {
|
for i := UniqueID(0); i < 1000; i++ {
|
||||||
binlogs = append(binlogs, &datapb.FieldBinlog{
|
binlogs = append(binlogs, &datapb.FieldBinlog{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
|
{EntriesNum: 5, LogPath: "log1", LogSize: 100, MemorySize: 100},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -1810,7 +1810,7 @@ func Test_compactionTrigger_shouldDoSingleCompaction(t *testing.T) {
|
||||||
for i := UniqueID(0); i < 100; i++ {
|
for i := UniqueID(0); i < 100; i++ {
|
||||||
binlogs2 = append(binlogs2, &datapb.FieldBinlog{
|
binlogs2 = append(binlogs2, &datapb.FieldBinlog{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{EntriesNum: 5, LogPath: "log1", LogSize: 100000, TimestampFrom: 300, TimestampTo: 500},
|
{EntriesNum: 5, LogPath: "log1", LogSize: 100000, TimestampFrom: 300, TimestampTo: 500, MemorySize: 100000},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -1818,7 +1818,7 @@ func Test_compactionTrigger_shouldDoSingleCompaction(t *testing.T) {
|
||||||
for i := UniqueID(0); i < 100; i++ {
|
for i := UniqueID(0); i < 100; i++ {
|
||||||
binlogs2 = append(binlogs2, &datapb.FieldBinlog{
|
binlogs2 = append(binlogs2, &datapb.FieldBinlog{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{EntriesNum: 5, LogPath: "log1", LogSize: 1000000, TimestampFrom: 300, TimestampTo: 1000},
|
{EntriesNum: 5, LogPath: "log1", LogSize: 1000000, TimestampFrom: 300, TimestampTo: 1000, MemorySize: 1000000},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -1853,7 +1853,7 @@ func Test_compactionTrigger_shouldDoSingleCompaction(t *testing.T) {
|
||||||
for i := UniqueID(0); i < 100; i++ {
|
for i := UniqueID(0); i < 100; i++ {
|
||||||
binlogs3 = append(binlogs2, &datapb.FieldBinlog{
|
binlogs3 = append(binlogs2, &datapb.FieldBinlog{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{EntriesNum: 5, LogPath: "log1", LogSize: 100000, TimestampFrom: 300, TimestampTo: 500},
|
{EntriesNum: 5, LogPath: "log1", LogSize: 100000, TimestampFrom: 300, TimestampTo: 500, MemorySize: 100000},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -2162,7 +2162,7 @@ func (s *CompactionTriggerSuite) genSeg(segID, numRows int64) *datapb.SegmentInf
|
||||||
Binlogs: []*datapb.FieldBinlog{
|
Binlogs: []*datapb.FieldBinlog{
|
||||||
{
|
{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{EntriesNum: 5, LogPath: "log1", LogSize: 100},
|
{EntriesNum: 5, LogPath: "log1", LogSize: 100, MemorySize: 100},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -4,12 +4,10 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
"go.uber.org/zap"
|
|
||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type CompactionView interface {
|
type CompactionView interface {
|
||||||
|
@ -166,29 +164,11 @@ func GetBinlogCount(fieldBinlogs []*datapb.FieldBinlog) int {
|
||||||
return num
|
return num
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetExpiredSizeAsBytes(expireTime Timestamp, fieldBinlogs []*datapb.FieldBinlog) float64 {
|
|
||||||
var expireSize float64
|
|
||||||
for _, binlogs := range fieldBinlogs {
|
|
||||||
for _, l := range binlogs.GetBinlogs() {
|
|
||||||
// TODO, we should probably estimate expired log entries by total rows
|
|
||||||
// in binlog and the ralationship of timeTo, timeFrom and expire time
|
|
||||||
if l.TimestampTo < expireTime {
|
|
||||||
log.Info("mark binlog as expired",
|
|
||||||
zap.Int64("binlogID", l.GetLogID()),
|
|
||||||
zap.Uint64("binlogTimestampTo", l.TimestampTo),
|
|
||||||
zap.Uint64("compactExpireTime", expireTime))
|
|
||||||
expireSize += float64(l.GetLogSize())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return expireSize
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetBinlogSizeAsBytes(deltaBinlogs []*datapb.FieldBinlog) float64 {
|
func GetBinlogSizeAsBytes(deltaBinlogs []*datapb.FieldBinlog) float64 {
|
||||||
var deltaSize float64
|
var deltaSize float64
|
||||||
for _, deltaLogs := range deltaBinlogs {
|
for _, deltaLogs := range deltaBinlogs {
|
||||||
for _, l := range deltaLogs.GetBinlogs() {
|
for _, l := range deltaLogs.GetBinlogs() {
|
||||||
deltaSize += float64(l.GetLogSize())
|
deltaSize += float64(l.GetMemorySize())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return deltaSize
|
return deltaSize
|
||||||
|
|
|
@ -330,6 +330,7 @@ func genTestDeltalogs(logCount int, logSize int64) []*datapb.FieldBinlog {
|
||||||
for i := 0; i < logCount; i++ {
|
for i := 0; i < logCount; i++ {
|
||||||
binlog := &datapb.Binlog{
|
binlog := &datapb.Binlog{
|
||||||
LogSize: logSize,
|
LogSize: logSize,
|
||||||
|
MemorySize: logSize,
|
||||||
}
|
}
|
||||||
binlogs = append(binlogs, binlog)
|
binlogs = append(binlogs, binlog)
|
||||||
}
|
}
|
||||||
|
|
|
@ -414,19 +414,19 @@ func (s *SegmentInfo) getSegmentSize() int64 {
|
||||||
var size int64
|
var size int64
|
||||||
for _, binlogs := range s.GetBinlogs() {
|
for _, binlogs := range s.GetBinlogs() {
|
||||||
for _, l := range binlogs.GetBinlogs() {
|
for _, l := range binlogs.GetBinlogs() {
|
||||||
size += l.GetLogSize()
|
size += l.GetMemorySize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, deltaLogs := range s.GetDeltalogs() {
|
for _, deltaLogs := range s.GetDeltalogs() {
|
||||||
for _, l := range deltaLogs.GetBinlogs() {
|
for _, l := range deltaLogs.GetBinlogs() {
|
||||||
size += l.GetLogSize()
|
size += l.GetMemorySize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, statsLogs := range s.GetStatslogs() {
|
for _, statsLogs := range s.GetStatslogs() {
|
||||||
for _, l := range statsLogs.GetBinlogs() {
|
for _, l := range statsLogs.GetBinlogs() {
|
||||||
size += l.GetLogSize()
|
size += l.GetMemorySize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if size > 0 {
|
if size > 0 {
|
||||||
|
|
|
@ -148,19 +148,19 @@ func getCompactedSegmentSize(s *datapb.CompactionSegment) int64 {
|
||||||
if s != nil {
|
if s != nil {
|
||||||
for _, binlogs := range s.GetInsertLogs() {
|
for _, binlogs := range s.GetInsertLogs() {
|
||||||
for _, l := range binlogs.GetBinlogs() {
|
for _, l := range binlogs.GetBinlogs() {
|
||||||
segmentSize += l.GetLogSize()
|
segmentSize += l.GetMemorySize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, deltaLogs := range s.GetDeltalogs() {
|
for _, deltaLogs := range s.GetDeltalogs() {
|
||||||
for _, l := range deltaLogs.GetBinlogs() {
|
for _, l := range deltaLogs.GetBinlogs() {
|
||||||
segmentSize += l.GetLogSize()
|
segmentSize += l.GetMemorySize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, statsLogs := range s.GetDeltalogs() {
|
for _, statsLogs := range s.GetField2StatslogPaths() {
|
||||||
for _, l := range statsLogs.GetBinlogs() {
|
for _, l := range statsLogs.GetBinlogs() {
|
||||||
segmentSize += l.GetLogSize()
|
segmentSize += l.GetMemorySize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -232,7 +232,7 @@ func calculateL0SegmentSize(fields []*datapb.FieldBinlog) float64 {
|
||||||
size := int64(0)
|
size := int64(0)
|
||||||
for _, field := range fields {
|
for _, field := range fields {
|
||||||
for _, binlog := range field.GetBinlogs() {
|
for _, binlog := range field.GetBinlogs() {
|
||||||
size += binlog.GetLogSize()
|
size += binlog.GetMemorySize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return float64(size)
|
return float64(size)
|
||||||
|
|
|
@ -194,7 +194,7 @@ func (suite *UtilSuite) TestCalculateL0SegmentSize() {
|
||||||
logsize := int64(100)
|
logsize := int64(100)
|
||||||
fields := []*datapb.FieldBinlog{{
|
fields := []*datapb.FieldBinlog{{
|
||||||
FieldID: 102,
|
FieldID: 102,
|
||||||
Binlogs: []*datapb.Binlog{{LogSize: logsize}},
|
Binlogs: []*datapb.Binlog{{LogSize: logsize, MemorySize: logsize}},
|
||||||
}}
|
}}
|
||||||
|
|
||||||
suite.Equal(calculateL0SegmentSize(fields), float64(logsize))
|
suite.Equal(calculateL0SegmentSize(fields), float64(logsize))
|
||||||
|
|
|
@ -106,7 +106,7 @@ func genInsertBlobs(b io.BinlogIO, allocator allocator.Allocator, data *InsertDa
|
||||||
kvs[key] = value
|
kvs[key] = value
|
||||||
inpaths[fID] = &datapb.FieldBinlog{
|
inpaths[fID] = &datapb.FieldBinlog{
|
||||||
FieldID: fID,
|
FieldID: fID,
|
||||||
Binlogs: []*datapb.Binlog{{LogSize: int64(fileLen), LogPath: key, EntriesNum: blob.RowNum}},
|
Binlogs: []*datapb.Binlog{{LogSize: int64(fileLen), LogPath: key, EntriesNum: blob.RowNum, MemorySize: blob.GetMemorySize()}},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,7 +135,7 @@ func genStatBlobs(b io.BinlogIO, allocator allocator.Allocator, stats *storage.P
|
||||||
|
|
||||||
statPaths[fID] = &datapb.FieldBinlog{
|
statPaths[fID] = &datapb.FieldBinlog{
|
||||||
FieldID: fID,
|
FieldID: fID,
|
||||||
Binlogs: []*datapb.Binlog{{LogSize: int64(fileLen), LogPath: key, EntriesNum: totRows}},
|
Binlogs: []*datapb.Binlog{{LogSize: int64(fileLen), LogPath: key, EntriesNum: totRows, MemorySize: int64(fileLen)}},
|
||||||
}
|
}
|
||||||
return statPaths, nil
|
return statPaths, nil
|
||||||
}
|
}
|
||||||
|
@ -238,6 +238,7 @@ func uploadDeltaLog(
|
||||||
EntriesNum: dData.RowCount,
|
EntriesNum: dData.RowCount,
|
||||||
LogPath: k,
|
LogPath: k,
|
||||||
LogSize: int64(len(v)),
|
LogSize: int64(len(v)),
|
||||||
|
MemorySize: dData.Size(),
|
||||||
}},
|
}},
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -154,7 +154,7 @@ func (t *levelZeroCompactionTask) compact() (*datapb.CompactionPlanResult, error
|
||||||
for _, d := range s.GetDeltalogs() {
|
for _, d := range s.GetDeltalogs() {
|
||||||
for _, l := range d.GetBinlogs() {
|
for _, l := range d.GetBinlogs() {
|
||||||
paths = append(paths, l.GetLogPath())
|
paths = append(paths, l.GetLogPath())
|
||||||
totalSize += l.GetLogSize()
|
totalSize += l.GetMemorySize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(paths) > 0 {
|
if len(paths) > 0 {
|
||||||
|
@ -349,6 +349,7 @@ func (t *levelZeroCompactionTask) composeDeltalog(segmentID int64, dData *storag
|
||||||
LogID: logID,
|
LogID: logID,
|
||||||
TimestampFrom: minTs,
|
TimestampFrom: minTs,
|
||||||
TimestampTo: maxTs,
|
TimestampTo: maxTs,
|
||||||
|
MemorySize: dData.Size(),
|
||||||
}
|
}
|
||||||
|
|
||||||
return uploadKv, deltalog, nil
|
return uploadKv, deltalog, nil
|
||||||
|
|
|
@ -259,7 +259,8 @@ func (t *SyncTask) processInsertBlobs() {
|
||||||
TimestampFrom: t.tsFrom,
|
TimestampFrom: t.tsFrom,
|
||||||
TimestampTo: t.tsTo,
|
TimestampTo: t.tsTo,
|
||||||
LogPath: key,
|
LogPath: key,
|
||||||
LogSize: t.binlogMemsize[fieldID],
|
LogSize: int64(len(blob.GetValue())),
|
||||||
|
MemorySize: t.binlogMemsize[fieldID],
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -288,6 +289,7 @@ func (t *SyncTask) processDeltaBlob() {
|
||||||
data.TimestampFrom = t.tsFrom
|
data.TimestampFrom = t.tsFrom
|
||||||
data.TimestampTo = t.tsTo
|
data.TimestampTo = t.tsTo
|
||||||
data.EntriesNum = t.deltaRowCount
|
data.EntriesNum = t.deltaRowCount
|
||||||
|
data.MemorySize = t.deltaBlob.GetMemorySize()
|
||||||
t.appendDeltalog(data)
|
t.appendDeltalog(data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -304,6 +306,7 @@ func (t *SyncTask) convertBlob2StatsBinlog(blob *storage.Blob, fieldID, logID in
|
||||||
TimestampTo: t.tsTo,
|
TimestampTo: t.tsTo,
|
||||||
LogPath: key,
|
LogPath: key,
|
||||||
LogSize: int64(len(value)),
|
LogSize: int64(len(value)),
|
||||||
|
MemorySize: int64(len(value)),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -193,6 +193,13 @@ func (kc *Catalog) listBinlogs(binlogType storage.BinlogType) (map[typeutil.Uniq
|
||||||
return fmt.Errorf("prefix:%s, %w", path.Join(kc.metaRootpath, logPathPrefix), err)
|
return fmt.Errorf("prefix:%s, %w", path.Join(kc.metaRootpath, logPathPrefix), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// set log size to memory size if memory size is zero for old segment before v2.4.3
|
||||||
|
for i, b := range fieldBinlog.GetBinlogs() {
|
||||||
|
if b.GetMemorySize() == 0 {
|
||||||
|
fieldBinlog.Binlogs[i].MemorySize = b.GetLogSize()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// no need to set log path and only store log id
|
// no need to set log path and only store log id
|
||||||
ret[segmentID] = append(ret[segmentID], fieldBinlog)
|
ret[segmentID] = append(ret[segmentID], fieldBinlog)
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -405,6 +405,10 @@ message Binlog {
|
||||||
string log_path = 4;
|
string log_path = 4;
|
||||||
int64 log_size = 5;
|
int64 log_size = 5;
|
||||||
int64 logID = 6;
|
int64 logID = 6;
|
||||||
|
// memory_size represents the size occupied by loading data into memory.
|
||||||
|
// log_size represents the size after data serialized.
|
||||||
|
// for stats_log, the memory_size always equal log_size.
|
||||||
|
int64 memory_size = 7;
|
||||||
}
|
}
|
||||||
|
|
||||||
message GetRecoveryInfoResponse {
|
message GetRecoveryInfoResponse {
|
||||||
|
|
|
@ -124,6 +124,22 @@ func (broker *CoordinatorBroker) GetRecoveryInfo(ctx context.Context, collection
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fallback binlog memory size to log size when it is zero
|
||||||
|
fallbackBinlogMemorySize := func(binlogs []*datapb.FieldBinlog) {
|
||||||
|
for _, insertBinlogs := range binlogs {
|
||||||
|
for _, b := range insertBinlogs.GetBinlogs() {
|
||||||
|
if b.GetMemorySize() == 0 {
|
||||||
|
b.MemorySize = b.GetLogSize()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, segBinlogs := range recoveryInfo.GetBinlogs() {
|
||||||
|
fallbackBinlogMemorySize(segBinlogs.GetFieldBinlogs())
|
||||||
|
fallbackBinlogMemorySize(segBinlogs.GetStatslogs())
|
||||||
|
fallbackBinlogMemorySize(segBinlogs.GetDeltalogs())
|
||||||
|
}
|
||||||
|
|
||||||
return recoveryInfo.Channels, recoveryInfo.Binlogs, nil
|
return recoveryInfo.Channels, recoveryInfo.Binlogs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,7 @@ func (c *IndexAttrCache) GetIndexResourceUsage(indexInfo *querypb.FieldIndexInfo
|
||||||
if indexType == indexparamcheck.IndexINVERTED {
|
if indexType == indexparamcheck.IndexINVERTED {
|
||||||
neededMemSize := 0
|
neededMemSize := 0
|
||||||
// we will mmap the binlog if the index type is inverted index.
|
// we will mmap the binlog if the index type is inverted index.
|
||||||
neededDiskSize := indexInfo.IndexSize + getBinlogDataSize(fieldBinlog)
|
neededDiskSize := indexInfo.IndexSize + getBinlogDataDiskSize(fieldBinlog)
|
||||||
return uint64(neededMemSize), uint64(neededDiskSize), nil
|
return uint64(neededMemSize), uint64(neededDiskSize), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1422,7 +1422,7 @@ func (s *LocalSegment) UpdateFieldRawDataSize(ctx context.Context, numRows int64
|
||||||
fieldID := fieldBinlog.FieldID
|
fieldID := fieldBinlog.FieldID
|
||||||
fieldDataSize := int64(0)
|
fieldDataSize := int64(0)
|
||||||
for _, binlog := range fieldBinlog.GetBinlogs() {
|
for _, binlog := range fieldBinlog.GetBinlogs() {
|
||||||
fieldDataSize += binlog.LogSize
|
fieldDataSize += binlog.GetMemorySize()
|
||||||
}
|
}
|
||||||
GetDynamicPool().Submit(func() (any, error) {
|
GetDynamicPool().Submit(func() (any, error) {
|
||||||
status = C.UpdateFieldRawDataSize(s.ptr, C.int64_t(fieldID), C.int64_t(numRows), C.int64_t(fieldDataSize))
|
status = C.UpdateFieldRawDataSize(s.ptr, C.int64_t(fieldID), C.int64_t(numRows), C.int64_t(fieldDataSize))
|
||||||
|
|
|
@ -1612,10 +1612,10 @@ func getResourceUsageEstimateOfSegment(schema *schemapb.CollectionSchema, loadIn
|
||||||
} else {
|
} else {
|
||||||
mmapEnabled = common.IsFieldMmapEnabled(schema, fieldID) ||
|
mmapEnabled = common.IsFieldMmapEnabled(schema, fieldID) ||
|
||||||
(!common.FieldHasMmapKey(schema, fieldID) && params.Params.QueryNodeCfg.MmapEnabled.GetAsBool())
|
(!common.FieldHasMmapKey(schema, fieldID) && params.Params.QueryNodeCfg.MmapEnabled.GetAsBool())
|
||||||
binlogSize := uint64(getBinlogDataSize(fieldBinlog))
|
binlogSize := uint64(getBinlogDataMemorySize(fieldBinlog))
|
||||||
segmentMemorySize += binlogSize
|
segmentMemorySize += binlogSize
|
||||||
if mmapEnabled {
|
if mmapEnabled {
|
||||||
segmentDiskSize += binlogSize
|
segmentDiskSize += uint64(getBinlogDataDiskSize(fieldBinlog))
|
||||||
} else {
|
} else {
|
||||||
if multiplyFactor.enableTempSegmentIndex {
|
if multiplyFactor.enableTempSegmentIndex {
|
||||||
segmentMemorySize += uint64(float64(binlogSize) * multiplyFactor.tempSegmentIndexFactor)
|
segmentMemorySize += uint64(float64(binlogSize) * multiplyFactor.tempSegmentIndexFactor)
|
||||||
|
@ -1629,7 +1629,7 @@ func getResourceUsageEstimateOfSegment(schema *schemapb.CollectionSchema, loadIn
|
||||||
|
|
||||||
// get size of stats data
|
// get size of stats data
|
||||||
for _, fieldBinlog := range loadInfo.Statslogs {
|
for _, fieldBinlog := range loadInfo.Statslogs {
|
||||||
segmentMemorySize += uint64(getBinlogDataSize(fieldBinlog))
|
segmentMemorySize += uint64(getBinlogDataMemorySize(fieldBinlog))
|
||||||
}
|
}
|
||||||
|
|
||||||
// binlog & statslog use general load factor
|
// binlog & statslog use general load factor
|
||||||
|
@ -1637,7 +1637,7 @@ func getResourceUsageEstimateOfSegment(schema *schemapb.CollectionSchema, loadIn
|
||||||
|
|
||||||
// get size of delete data
|
// get size of delete data
|
||||||
for _, fieldBinlog := range loadInfo.Deltalogs {
|
for _, fieldBinlog := range loadInfo.Deltalogs {
|
||||||
segmentMemorySize += uint64(float64(getBinlogDataSize(fieldBinlog)) * multiplyFactor.deltaDataExpansionFactor)
|
segmentMemorySize += uint64(float64(getBinlogDataMemorySize(fieldBinlog)) * multiplyFactor.deltaDataExpansionFactor)
|
||||||
}
|
}
|
||||||
return &ResourceUsage{
|
return &ResourceUsage{
|
||||||
MemorySize: segmentMemorySize,
|
MemorySize: segmentMemorySize,
|
||||||
|
@ -1725,10 +1725,19 @@ func (loader *segmentLoader) LoadIndex(ctx context.Context, segment *LocalSegmen
|
||||||
return loader.waitSegmentLoadDone(ctx, commonpb.SegmentState_SegmentStateNone, []int64{loadInfo.GetSegmentID()}, version)
|
return loader.waitSegmentLoadDone(ctx, commonpb.SegmentState_SegmentStateNone, []int64{loadInfo.GetSegmentID()}, version)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getBinlogDataSize(fieldBinlog *datapb.FieldBinlog) int64 {
|
func getBinlogDataDiskSize(fieldBinlog *datapb.FieldBinlog) int64 {
|
||||||
fieldSize := int64(0)
|
fieldSize := int64(0)
|
||||||
for _, binlog := range fieldBinlog.Binlogs {
|
for _, binlog := range fieldBinlog.Binlogs {
|
||||||
fieldSize += binlog.LogSize
|
fieldSize += binlog.GetLogSize()
|
||||||
|
}
|
||||||
|
|
||||||
|
return fieldSize
|
||||||
|
}
|
||||||
|
|
||||||
|
func getBinlogDataMemorySize(fieldBinlog *datapb.FieldBinlog) int64 {
|
||||||
|
fieldSize := int64(0)
|
||||||
|
for _, binlog := range fieldBinlog.Binlogs {
|
||||||
|
fieldSize += binlog.GetMemorySize()
|
||||||
}
|
}
|
||||||
|
|
||||||
return fieldSize
|
return fieldSize
|
||||||
|
|
|
@ -518,6 +518,7 @@ func (suite *SegmentLoaderSuite) TestLoadIndexWithLimitedResource() {
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{
|
{
|
||||||
LogSize: 1000000000,
|
LogSize: 1000000000,
|
||||||
|
MemorySize: 1000000000,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -860,8 +861,8 @@ func (suite *SegmentLoaderDetailSuite) TestRequestResource() {
|
||||||
Deltalogs: []*datapb.FieldBinlog{
|
Deltalogs: []*datapb.FieldBinlog{
|
||||||
{
|
{
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{LogSize: 10000},
|
{LogSize: 10000, MemorySize: 10000},
|
||||||
{LogSize: 12000},
|
{LogSize: 12000, MemorySize: 12000},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -79,6 +79,7 @@ func (suite *SegmentSuite) SetupTest() {
|
||||||
Binlogs: []*datapb.Binlog{
|
Binlogs: []*datapb.Binlog{
|
||||||
{
|
{
|
||||||
LogSize: 10086,
|
LogSize: 10086,
|
||||||
|
MemorySize: 10086,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -422,6 +422,22 @@ func (node *QueryNode) LoadSegments(ctx context.Context, req *querypb.LoadSegmen
|
||||||
return merr.Status(err), nil
|
return merr.Status(err), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fallback binlog memory size to log size when it is zero
|
||||||
|
fallbackBinlogMemorySize := func(binlogs []*datapb.FieldBinlog) {
|
||||||
|
for _, insertBinlogs := range binlogs {
|
||||||
|
for _, b := range insertBinlogs.GetBinlogs() {
|
||||||
|
if b.GetMemorySize() == 0 {
|
||||||
|
b.MemorySize = b.GetLogSize()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, s := range req.GetInfos() {
|
||||||
|
fallbackBinlogMemorySize(s.GetBinlogPaths())
|
||||||
|
fallbackBinlogMemorySize(s.GetStatslogs())
|
||||||
|
fallbackBinlogMemorySize(s.GetDeltalogs())
|
||||||
|
}
|
||||||
|
|
||||||
// Delegates request to workers
|
// Delegates request to workers
|
||||||
if req.GetNeedTransfer() {
|
if req.GetNeedTransfer() {
|
||||||
delegator, ok := node.delegators.Get(segment.GetInsertChannel())
|
delegator, ok := node.delegators.Get(segment.GetInsertChannel())
|
||||||
|
|
|
@ -70,7 +70,7 @@ const InvalidUniqueID = UniqueID(-1)
|
||||||
type Blob struct {
|
type Blob struct {
|
||||||
Key string
|
Key string
|
||||||
Value []byte
|
Value []byte
|
||||||
Size int64
|
MemorySize int64
|
||||||
RowNum int64
|
RowNum int64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,6 +110,11 @@ func (b Blob) GetValue() []byte {
|
||||||
return b.Value
|
return b.Value
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetMemorySize returns the memory size of blob
|
||||||
|
func (b Blob) GetMemorySize() int64 {
|
||||||
|
return b.MemorySize
|
||||||
|
}
|
||||||
|
|
||||||
// InsertCodec serializes and deserializes the insert data
|
// InsertCodec serializes and deserializes the insert data
|
||||||
// Blob key example:
|
// Blob key example:
|
||||||
// ${tenant}/insert_log/${collection_id}/${partition_id}/${segment_id}/${field_id}/${log_idx}
|
// ${tenant}/insert_log/${collection_id}/${partition_id}/${segment_id}/${field_id}/${log_idx}
|
||||||
|
@ -421,6 +426,7 @@ func (insertCodec *InsertCodec) Serialize(partitionID UniqueID, segmentID Unique
|
||||||
Key: blobKey,
|
Key: blobKey,
|
||||||
Value: buffer,
|
Value: buffer,
|
||||||
RowNum: rowNum,
|
RowNum: rowNum,
|
||||||
|
MemorySize: int64(singleData.GetMemorySize()),
|
||||||
})
|
})
|
||||||
eventWriter.Close()
|
eventWriter.Close()
|
||||||
writer.Close()
|
writer.Close()
|
||||||
|
@ -1019,6 +1025,7 @@ func (deleteCodec *DeleteCodec) Serialize(collectionID UniqueID, partitionID Uni
|
||||||
}
|
}
|
||||||
blob := &Blob{
|
blob := &Blob{
|
||||||
Value: buffer,
|
Value: buffer,
|
||||||
|
MemorySize: data.Size(),
|
||||||
}
|
}
|
||||||
return blob, nil
|
return blob, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,17 +122,17 @@ func TestIndexCodec(t *testing.T) {
|
||||||
{
|
{
|
||||||
Key: "12345",
|
Key: "12345",
|
||||||
Value: []byte{1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7},
|
Value: []byte{1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7},
|
||||||
Size: 14,
|
MemorySize: 14,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Key: "6666",
|
Key: "6666",
|
||||||
Value: []byte{6, 6, 6, 6, 6, 1, 2, 3, 4, 5, 6, 7},
|
Value: []byte{6, 6, 6, 6, 6, 1, 2, 3, 4, 5, 6, 7},
|
||||||
Size: 12,
|
MemorySize: 12,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Key: "8885",
|
Key: "8885",
|
||||||
Value: []byte{8, 8, 8, 8, 8, 8, 8, 8, 2, 3, 4, 5, 6, 7},
|
Value: []byte{8, 8, 8, 8, 8, 8, 8, 8, 2, 3, 4, 5, 6, 7},
|
||||||
Size: 14,
|
MemorySize: 14,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
indexParams := map[string]string{
|
indexParams := map[string]string{
|
||||||
|
|
|
@ -286,7 +286,7 @@ func (index *CgoIndex) Serialize() ([]*Blob, error) {
|
||||||
blob := &Blob{
|
blob := &Blob{
|
||||||
Key: key,
|
Key: key,
|
||||||
Value: value,
|
Value: value,
|
||||||
Size: size,
|
MemorySize: size,
|
||||||
}
|
}
|
||||||
ret = append(ret, blob)
|
ret = append(ret, blob)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue