mirror of https://github.com/milvus-io/milvus.git
172 lines
4.1 KiB
Go
172 lines
4.1 KiB
Go
package writebuffer
|
|
|
|
import (
|
|
"math"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
"github.com/milvus-io/milvus/pkg/mq/msgstream"
|
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
|
)
|
|
|
|
const (
|
|
noLimit int64 = -1
|
|
)
|
|
|
|
type BufferBase struct {
|
|
rows int64
|
|
rowLimit int64
|
|
size int64
|
|
sizeLimit int64
|
|
|
|
TimestampFrom typeutil.Timestamp
|
|
TimestampTo typeutil.Timestamp
|
|
|
|
startPos *msgpb.MsgPosition
|
|
endPos *msgpb.MsgPosition
|
|
}
|
|
|
|
func (b *BufferBase) UpdateStatistics(entryNum, size int64, tr TimeRange, startPos, endPos *msgpb.MsgPosition) {
|
|
b.rows += entryNum
|
|
b.size += size
|
|
|
|
if tr.timestampMin < b.TimestampFrom {
|
|
b.TimestampFrom = tr.timestampMin
|
|
}
|
|
if tr.timestampMax > b.TimestampTo {
|
|
b.TimestampTo = tr.timestampMax
|
|
}
|
|
|
|
if b.startPos == nil || startPos.Timestamp < b.startPos.Timestamp {
|
|
b.startPos = startPos
|
|
}
|
|
if b.endPos == nil || endPos.Timestamp > b.endPos.Timestamp {
|
|
b.endPos = endPos
|
|
}
|
|
}
|
|
|
|
func (b *BufferBase) IsFull() bool {
|
|
return (b.rowLimit != noLimit && b.rows >= b.rowLimit) ||
|
|
(b.sizeLimit != noLimit && b.size >= b.sizeLimit)
|
|
}
|
|
|
|
func (b *BufferBase) IsEmpty() bool {
|
|
return b.rows == 0
|
|
}
|
|
|
|
func (b *BufferBase) MinTimestamp() typeutil.Timestamp {
|
|
if b.startPos == nil {
|
|
return math.MaxUint64
|
|
}
|
|
return b.startPos.GetTimestamp()
|
|
}
|
|
|
|
type InsertBuffer struct {
|
|
BufferBase
|
|
collSchema *schemapb.CollectionSchema
|
|
|
|
buffer *storage.InsertData
|
|
}
|
|
|
|
func NewInsertBuffer(sch *schemapb.CollectionSchema) (*InsertBuffer, error) {
|
|
size, err := typeutil.EstimateSizePerRecord(sch)
|
|
if err != nil {
|
|
log.Warn("failed to estimate size per record", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
|
|
if size == 0 {
|
|
return nil, errors.New("Invalid schema")
|
|
}
|
|
buffer, err := storage.NewInsertData(sch)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
limit := paramtable.Get().DataNodeCfg.FlushInsertBufferSize.GetAsInt64() / int64(size)
|
|
if paramtable.Get().DataNodeCfg.FlushInsertBufferSize.GetAsInt64()%int64(size) != 0 {
|
|
limit++
|
|
}
|
|
|
|
return &InsertBuffer{
|
|
BufferBase: BufferBase{
|
|
rowLimit: limit,
|
|
sizeLimit: noLimit,
|
|
TimestampFrom: math.MaxUint64,
|
|
TimestampTo: 0,
|
|
},
|
|
collSchema: sch,
|
|
buffer: buffer,
|
|
}, nil
|
|
}
|
|
|
|
func (ib *InsertBuffer) Renew() *storage.InsertData {
|
|
if ib.IsEmpty() {
|
|
return nil
|
|
}
|
|
result := ib.buffer
|
|
|
|
// no error since validated in constructor
|
|
ib.buffer, _ = storage.NewInsertData(ib.collSchema)
|
|
ib.BufferBase.rows = 0
|
|
ib.BufferBase.TimestampFrom = math.MaxUint64
|
|
ib.BufferBase.TimestampTo = 0
|
|
|
|
return result
|
|
}
|
|
|
|
func (ib *InsertBuffer) Buffer(msgs []*msgstream.InsertMsg, startPos, endPos *msgpb.MsgPosition) ([]storage.FieldData, error) {
|
|
pkData := make([]storage.FieldData, 0, len(msgs))
|
|
for _, msg := range msgs {
|
|
tmpBuffer, err := storage.InsertMsgToInsertData(msg, ib.collSchema)
|
|
if err != nil {
|
|
log.Warn("failed to transfer insert msg to insert data", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
|
|
pkFieldData, err := storage.GetPkFromInsertData(ib.collSchema, tmpBuffer)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if pkFieldData.RowNum() != tmpBuffer.GetRowNum() {
|
|
return nil, merr.WrapErrServiceInternal("pk column row num not match")
|
|
}
|
|
pkData = append(pkData, pkFieldData)
|
|
|
|
storage.MergeInsertData(ib.buffer, tmpBuffer)
|
|
|
|
tsData, err := storage.GetTimestampFromInsertData(tmpBuffer)
|
|
if err != nil {
|
|
log.Warn("no timestamp field found in insert msg", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
|
|
// update buffer size
|
|
ib.UpdateStatistics(int64(tmpBuffer.GetRowNum()), 0, ib.getTimestampRange(tsData), startPos, endPos)
|
|
}
|
|
return pkData, nil
|
|
}
|
|
|
|
func (ib *InsertBuffer) getTimestampRange(tsData *storage.Int64FieldData) TimeRange {
|
|
tr := TimeRange{
|
|
timestampMin: math.MaxUint64,
|
|
timestampMax: 0,
|
|
}
|
|
|
|
for _, data := range tsData.Data {
|
|
if uint64(data) < tr.timestampMin {
|
|
tr.timestampMin = typeutil.Timestamp(data)
|
|
}
|
|
if uint64(data) > tr.timestampMax {
|
|
tr.timestampMax = typeutil.Timestamp(data)
|
|
}
|
|
}
|
|
return tr
|
|
}
|