mirror of https://github.com/milvus-io/milvus.git
Recude IndexCodec Load Memory (#20621)
Signed-off-by: xiaofan-luan <xiaofan.luan@zilliz.com> Signed-off-by: xiaofan-luan <xiaofan.luan@zilliz.com>pull/20704/head
parent
2390095232
commit
633a749880
|
@ -29,6 +29,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/common"
|
||||
"github.com/milvus-io/milvus/internal/util/funcutil"
|
||||
"github.com/milvus-io/milvus/internal/util/tsoutil"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/milvus-io/milvus/internal/util/uniquegenerator"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
@ -1034,9 +1035,10 @@ func TestIndexFileBinlog(t *testing.T) {
|
|||
key := funcutil.GenRandomStr()
|
||||
|
||||
timestamp := Timestamp(time.Now().UnixNano())
|
||||
payload := funcutil.GenRandomBytes()
|
||||
payload := funcutil.GenRandomBytesWithLength(10000)
|
||||
|
||||
w := NewIndexFileBinlogWriter(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, key)
|
||||
w.PayloadDataType = schemapb.DataType_Int8
|
||||
|
||||
e, err := w.NextIndexFileEventWriter()
|
||||
assert.Nil(t, err)
|
||||
|
@ -1150,6 +1152,135 @@ func TestIndexFileBinlog(t *testing.T) {
|
|||
assert.NotNil(t, err)
|
||||
}
|
||||
|
||||
/* #nosec G103 */
|
||||
func TestIndexFileBinlogV2(t *testing.T) {
|
||||
indexBuildID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
version := int64(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
collectionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
partitionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
segmentID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
fieldID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
indexName := funcutil.GenRandomStr()
|
||||
indexID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
key := funcutil.GenRandomStr()
|
||||
|
||||
timestamp := Timestamp(time.Now().UnixNano())
|
||||
payload := funcutil.GenRandomBytes()
|
||||
|
||||
w := NewIndexFileBinlogWriter(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, key)
|
||||
|
||||
e, err := w.NextIndexFileEventWriter()
|
||||
assert.Nil(t, err)
|
||||
err = e.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload))
|
||||
assert.Nil(t, err)
|
||||
e.SetEventTimestamp(timestamp, timestamp)
|
||||
|
||||
w.SetEventTimeStamp(timestamp, timestamp)
|
||||
|
||||
sizeTotal := 2000000
|
||||
w.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal))
|
||||
|
||||
_, err = w.GetBuffer()
|
||||
assert.NotNil(t, err)
|
||||
err = w.Finish()
|
||||
assert.Nil(t, err)
|
||||
buf, err := w.GetBuffer()
|
||||
assert.Nil(t, err)
|
||||
|
||||
w.Close()
|
||||
|
||||
//magic number
|
||||
magicNum := UnsafeReadInt32(buf, 0)
|
||||
assert.Equal(t, magicNum, MagicNumber)
|
||||
pos := int(unsafe.Sizeof(MagicNumber))
|
||||
|
||||
//descriptor header, timestamp
|
||||
ts := UnsafeReadInt64(buf, pos)
|
||||
assert.Greater(t, ts, int64(0))
|
||||
pos += int(unsafe.Sizeof(ts))
|
||||
|
||||
//descriptor header, type code
|
||||
tc := UnsafeReadInt8(buf, pos)
|
||||
assert.Equal(t, EventTypeCode(tc), DescriptorEventType)
|
||||
pos += int(unsafe.Sizeof(tc))
|
||||
|
||||
//descriptor header, event length
|
||||
descEventLen := UnsafeReadInt32(buf, pos)
|
||||
pos += int(unsafe.Sizeof(descEventLen))
|
||||
|
||||
//descriptor header, next position
|
||||
descNxtPos := UnsafeReadInt32(buf, pos)
|
||||
assert.Equal(t, descEventLen+int32(unsafe.Sizeof(MagicNumber)), descNxtPos)
|
||||
pos += int(unsafe.Sizeof(descNxtPos))
|
||||
|
||||
//descriptor data fix, collection id
|
||||
collID := UnsafeReadInt64(buf, pos)
|
||||
assert.Equal(t, collID, collectionID)
|
||||
pos += int(unsafe.Sizeof(collID))
|
||||
|
||||
//descriptor data fix, partition id
|
||||
partID := UnsafeReadInt64(buf, pos)
|
||||
assert.Equal(t, partID, partitionID)
|
||||
pos += int(unsafe.Sizeof(partID))
|
||||
|
||||
//descriptor data fix, segment id
|
||||
segID := UnsafeReadInt64(buf, pos)
|
||||
assert.Equal(t, segID, segmentID)
|
||||
pos += int(unsafe.Sizeof(segID))
|
||||
|
||||
//descriptor data fix, field id
|
||||
fID := UnsafeReadInt64(buf, pos)
|
||||
assert.Equal(t, fieldID, fieldID)
|
||||
pos += int(unsafe.Sizeof(fID))
|
||||
|
||||
//descriptor data fix, start time stamp
|
||||
startts := UnsafeReadInt64(buf, pos)
|
||||
assert.Equal(t, startts, int64(timestamp))
|
||||
pos += int(unsafe.Sizeof(startts))
|
||||
|
||||
//descriptor data fix, end time stamp
|
||||
endts := UnsafeReadInt64(buf, pos)
|
||||
assert.Equal(t, endts, int64(timestamp))
|
||||
pos += int(unsafe.Sizeof(endts))
|
||||
|
||||
//descriptor data fix, payload type
|
||||
colType := UnsafeReadInt32(buf, pos)
|
||||
assert.Equal(t, schemapb.DataType(colType), schemapb.DataType_String)
|
||||
pos += int(unsafe.Sizeof(colType))
|
||||
|
||||
//descriptor data, post header lengths
|
||||
for i := DescriptorEventType; i < EventTypeEnd; i++ {
|
||||
size := getEventFixPartSize(i)
|
||||
assert.Equal(t, uint8(size), buf[pos])
|
||||
pos++
|
||||
}
|
||||
|
||||
//descriptor data, extra length
|
||||
extraLength := UnsafeReadInt32(buf, pos)
|
||||
assert.Equal(t, extraLength, w.baseBinlogWriter.descriptorEventData.ExtraLength)
|
||||
pos += int(unsafe.Sizeof(extraLength))
|
||||
|
||||
multiBytes := make([]byte, extraLength)
|
||||
for i := 0; i < int(extraLength); i++ {
|
||||
singleByte := UnsafeReadByte(buf, pos)
|
||||
multiBytes[i] = singleByte
|
||||
pos++
|
||||
}
|
||||
j := make(map[string]interface{})
|
||||
err = json.Unmarshal(multiBytes, &j)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, fmt.Sprintf("%v", indexBuildID), fmt.Sprintf("%v", j["indexBuildID"]))
|
||||
assert.Equal(t, fmt.Sprintf("%v", version), fmt.Sprintf("%v", j["version"]))
|
||||
assert.Equal(t, fmt.Sprintf("%v", indexName), fmt.Sprintf("%v", j["indexName"]))
|
||||
assert.Equal(t, fmt.Sprintf("%v", indexID), fmt.Sprintf("%v", j["indexID"]))
|
||||
assert.Equal(t, fmt.Sprintf("%v", key), fmt.Sprintf("%v", j["key"]))
|
||||
assert.Equal(t, fmt.Sprintf("%v", sizeTotal), fmt.Sprintf("%v", j[originalSizeKey]))
|
||||
|
||||
// NextIndexFileBinlogWriter after close
|
||||
_, err = w.NextIndexFileEventWriter()
|
||||
assert.NotNil(t, err)
|
||||
}
|
||||
|
||||
func TestNewBinlogReaderError(t *testing.T) {
|
||||
data := []byte{}
|
||||
reader, err := NewBinlogReader(data)
|
||||
|
|
|
@ -261,7 +261,7 @@ func (writer *IndexFileBinlogWriter) NextIndexFileEventWriter() (*indexFileEvent
|
|||
if writer.isClosed() {
|
||||
return nil, fmt.Errorf("binlog has closed")
|
||||
}
|
||||
event, err := newIndexFileEventWriter()
|
||||
event, err := newIndexFileEventWriter(writer.PayloadDataType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -326,38 +326,3 @@ func NewDDLBinlogWriter(dataType schemapb.DataType, collectionID int64) *DDLBinl
|
|||
}
|
||||
return w
|
||||
}
|
||||
|
||||
// NewIndexFileBinlogWriter returns a new IndexFileBinlogWriter with provided parameters
|
||||
func NewIndexFileBinlogWriter(
|
||||
indexBuildID UniqueID,
|
||||
version int64,
|
||||
collectionID UniqueID,
|
||||
partitionID UniqueID,
|
||||
segmentID UniqueID,
|
||||
fieldID UniqueID,
|
||||
indexName string,
|
||||
indexID UniqueID,
|
||||
key string,
|
||||
) *IndexFileBinlogWriter {
|
||||
descriptorEvent := newDescriptorEvent()
|
||||
descriptorEvent.CollectionID = collectionID
|
||||
descriptorEvent.PartitionID = partitionID
|
||||
descriptorEvent.SegmentID = segmentID
|
||||
descriptorEvent.FieldID = fieldID
|
||||
descriptorEvent.PayloadDataType = schemapb.DataType_Int8
|
||||
descriptorEvent.AddExtra("indexBuildID", fmt.Sprintf("%d", indexBuildID))
|
||||
descriptorEvent.AddExtra("version", fmt.Sprintf("%d", version))
|
||||
descriptorEvent.AddExtra("indexName", indexName)
|
||||
descriptorEvent.AddExtra("indexID", fmt.Sprintf("%d", indexID))
|
||||
descriptorEvent.AddExtra("key", key)
|
||||
w := &IndexFileBinlogWriter{
|
||||
baseBinlogWriter: baseBinlogWriter{
|
||||
descriptorEvent: *descriptorEvent,
|
||||
magicNumber: MagicNumber,
|
||||
binlogType: IndexFileBinlog,
|
||||
eventWriters: make([]EventWriter, 0),
|
||||
buffer: nil,
|
||||
},
|
||||
}
|
||||
return w
|
||||
}
|
||||
|
|
|
@ -19,19 +19,14 @@ package storage
|
|||
import (
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/common"
|
||||
"github.com/milvus-io/milvus/internal/log"
|
||||
"github.com/milvus-io/milvus/internal/proto/etcdpb"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
)
|
||||
|
@ -1160,285 +1155,3 @@ func (dataDefinitionCodec *DataDefinitionCodec) Deserialize(blobs []*Blob) (ts [
|
|||
|
||||
return resultTs, requestsStrings, nil
|
||||
}
|
||||
|
||||
type IndexFileBinlogCodec struct {
|
||||
}
|
||||
|
||||
// NewIndexFileBinlogCodec is constructor for IndexFileBinlogCodec
|
||||
func NewIndexFileBinlogCodec() *IndexFileBinlogCodec {
|
||||
return &IndexFileBinlogCodec{}
|
||||
}
|
||||
|
||||
func (codec *IndexFileBinlogCodec) serializeImpl(
|
||||
indexBuildID UniqueID,
|
||||
version int64,
|
||||
collectionID UniqueID,
|
||||
partitionID UniqueID,
|
||||
segmentID UniqueID,
|
||||
fieldID UniqueID,
|
||||
indexName string,
|
||||
indexID UniqueID,
|
||||
key string,
|
||||
value []byte,
|
||||
ts Timestamp,
|
||||
) (*Blob, error) {
|
||||
writer := NewIndexFileBinlogWriter(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, key)
|
||||
defer writer.Close()
|
||||
|
||||
eventWriter, err := writer.NextIndexFileEventWriter()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer eventWriter.Close()
|
||||
|
||||
err = eventWriter.AddByteToPayload(value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
eventWriter.SetEventTimestamp(ts, ts)
|
||||
|
||||
writer.SetEventTimeStamp(ts, ts)
|
||||
|
||||
// https://github.com/milvus-io/milvus/issues/9620
|
||||
// len(params) is also not accurate, indexParams is a map
|
||||
writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", len(value)))
|
||||
|
||||
err = writer.Finish()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
buffer, err := writer.GetBuffer()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Blob{
|
||||
Key: key,
|
||||
//Key: strconv.Itoa(len(datas)),
|
||||
Value: buffer,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// SerializeIndexParams serilizes index params as blob.
|
||||
func (codec *IndexFileBinlogCodec) SerializeIndexParams(
|
||||
indexBuildID UniqueID,
|
||||
version int64,
|
||||
collectionID UniqueID,
|
||||
partitionID UniqueID,
|
||||
segmentID UniqueID,
|
||||
fieldID UniqueID,
|
||||
indexParams map[string]string,
|
||||
indexName string,
|
||||
indexID UniqueID) (*Blob, error) {
|
||||
ts := Timestamp(time.Now().UnixNano())
|
||||
|
||||
// save index params.
|
||||
// querycoord will parse index extra info from binlog, better to let this key appear first.
|
||||
params, _ := json.Marshal(indexParams)
|
||||
indexParamBlob, err := codec.serializeImpl(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, IndexParamsKey, params, ts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return indexParamBlob, nil
|
||||
}
|
||||
|
||||
// Serialize serilizes data as blobs.
|
||||
func (codec *IndexFileBinlogCodec) Serialize(
|
||||
indexBuildID UniqueID,
|
||||
version int64,
|
||||
collectionID UniqueID,
|
||||
partitionID UniqueID,
|
||||
segmentID UniqueID,
|
||||
fieldID UniqueID,
|
||||
indexParams map[string]string,
|
||||
indexName string,
|
||||
indexID UniqueID,
|
||||
datas []*Blob,
|
||||
) ([]*Blob, error) {
|
||||
|
||||
var err error
|
||||
|
||||
var blobs []*Blob
|
||||
|
||||
ts := Timestamp(time.Now().UnixNano())
|
||||
|
||||
// save index params.
|
||||
// querycoord will parse index extra info from binlog, better to let this key appear first.
|
||||
indexParamBlob, err := codec.SerializeIndexParams(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
blobs = append(blobs, indexParamBlob)
|
||||
|
||||
for pos := range datas {
|
||||
blob, err := codec.serializeImpl(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, datas[pos].Key, datas[pos].Value, ts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
blobs = append(blobs, blob)
|
||||
}
|
||||
|
||||
return blobs, nil
|
||||
}
|
||||
|
||||
func (codec *IndexFileBinlogCodec) DeserializeImpl(blobs []*Blob) (
|
||||
indexBuildID UniqueID,
|
||||
version int64,
|
||||
collectionID UniqueID,
|
||||
partitionID UniqueID,
|
||||
segmentID UniqueID,
|
||||
fieldID UniqueID,
|
||||
indexParams map[string]string,
|
||||
indexName string,
|
||||
indexID UniqueID,
|
||||
datas []*Blob,
|
||||
err error,
|
||||
) {
|
||||
if len(blobs) == 0 {
|
||||
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, errors.New("blobs is empty")
|
||||
}
|
||||
indexParams = make(map[string]string)
|
||||
datas = make([]*Blob, 0)
|
||||
|
||||
for _, blob := range blobs {
|
||||
binlogReader, err := NewBinlogReader(blob.Value)
|
||||
if err != nil {
|
||||
log.Warn("failed to read binlog",
|
||||
zap.Error(err))
|
||||
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
|
||||
}
|
||||
dataType := binlogReader.PayloadDataType
|
||||
|
||||
//desc, err := binlogReader.readDescriptorEvent()
|
||||
//if err != nil {
|
||||
// log.Warn("failed to read descriptor event",
|
||||
// zap.Error(err))
|
||||
// return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
|
||||
//}
|
||||
desc := binlogReader.descriptorEvent
|
||||
extraBytes := desc.ExtraBytes
|
||||
extra := make(map[string]interface{})
|
||||
_ = json.Unmarshal(extraBytes, &extra)
|
||||
|
||||
value, _ := strconv.Atoi(extra["indexBuildID"].(string))
|
||||
indexBuildID = UniqueID(value)
|
||||
|
||||
value, _ = strconv.Atoi(extra["version"].(string))
|
||||
version = int64(value)
|
||||
|
||||
collectionID = desc.CollectionID
|
||||
partitionID = desc.PartitionID
|
||||
segmentID = desc.SegmentID
|
||||
fieldID = desc.FieldID
|
||||
|
||||
indexName = extra["indexName"].(string)
|
||||
|
||||
value, _ = strconv.Atoi(extra["indexID"].(string))
|
||||
indexID = UniqueID(value)
|
||||
|
||||
key := extra["key"].(string)
|
||||
|
||||
for {
|
||||
eventReader, err := binlogReader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("failed to get next event reader",
|
||||
zap.Error(err))
|
||||
binlogReader.Close()
|
||||
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
|
||||
}
|
||||
if eventReader == nil {
|
||||
break
|
||||
}
|
||||
switch dataType {
|
||||
case schemapb.DataType_Int8:
|
||||
content, err := eventReader.GetByteFromPayload()
|
||||
if err != nil {
|
||||
log.Warn("failed to get string from payload",
|
||||
zap.Error(err))
|
||||
eventReader.Close()
|
||||
binlogReader.Close()
|
||||
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
|
||||
}
|
||||
|
||||
if key == IndexParamsKey {
|
||||
_ = json.Unmarshal(content, &indexParams)
|
||||
} else {
|
||||
blob := &Blob{Key: key}
|
||||
blob.Value = make([]byte, len(content))
|
||||
copy(blob.Value, content)
|
||||
datas = append(datas, blob)
|
||||
}
|
||||
}
|
||||
eventReader.Close()
|
||||
}
|
||||
binlogReader.Close()
|
||||
|
||||
}
|
||||
|
||||
return indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas, nil
|
||||
}
|
||||
|
||||
func (codec *IndexFileBinlogCodec) Deserialize(blobs []*Blob) (
|
||||
datas []*Blob,
|
||||
indexParams map[string]string,
|
||||
indexName string,
|
||||
indexID UniqueID,
|
||||
err error,
|
||||
) {
|
||||
_, _, _, _, _, _, indexParams, indexName, indexID, datas, err = codec.DeserializeImpl(blobs)
|
||||
return datas, indexParams, indexName, indexID, err
|
||||
}
|
||||
|
||||
// IndexCodec can serialize and deserialize index
|
||||
type IndexCodec struct {
|
||||
}
|
||||
|
||||
// NewIndexCodec creates IndexCodec
|
||||
func NewIndexCodec() *IndexCodec {
|
||||
return &IndexCodec{}
|
||||
}
|
||||
|
||||
// Serialize serializes index
|
||||
func (indexCodec *IndexCodec) Serialize(blobs []*Blob, params map[string]string, indexName string, indexID UniqueID) ([]*Blob, error) {
|
||||
paramsBytes, err := json.Marshal(struct {
|
||||
Params map[string]string
|
||||
IndexName string
|
||||
IndexID UniqueID
|
||||
}{
|
||||
Params: params,
|
||||
IndexName: indexName,
|
||||
IndexID: indexID,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
blobs = append(blobs, &Blob{Key: IndexParamsKey, Value: paramsBytes})
|
||||
return blobs, nil
|
||||
}
|
||||
|
||||
// Deserialize deserializes index
|
||||
func (indexCodec *IndexCodec) Deserialize(blobs []*Blob) ([]*Blob, map[string]string, string, UniqueID, error) {
|
||||
var file *Blob
|
||||
for i := 0; i < len(blobs); i++ {
|
||||
if blobs[i].Key != IndexParamsKey {
|
||||
continue
|
||||
}
|
||||
file = blobs[i]
|
||||
blobs = append(blobs[:i], blobs[i+1:]...)
|
||||
break
|
||||
}
|
||||
if file == nil {
|
||||
return nil, nil, "", InvalidUniqueID, fmt.Errorf("can not find params blob")
|
||||
}
|
||||
info := struct {
|
||||
Params map[string]string
|
||||
IndexName string
|
||||
IndexID UniqueID
|
||||
}{}
|
||||
if err := json.Unmarshal(file.Value, &info); err != nil {
|
||||
return nil, nil, "", InvalidUniqueID, fmt.Errorf("json unmarshal error: %s", err.Error())
|
||||
}
|
||||
|
||||
return blobs, info.Params, info.IndexName, info.IndexID, nil
|
||||
}
|
||||
|
|
|
@ -21,9 +21,6 @@ import (
|
|||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/util/funcutil"
|
||||
"github.com/milvus-io/milvus/internal/util/uniquegenerator"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/log"
|
||||
"github.com/milvus-io/milvus/internal/proto/etcdpb"
|
||||
|
@ -454,137 +451,6 @@ func TestDDCodec(t *testing.T) {
|
|||
assert.NotNil(t, err)
|
||||
}
|
||||
|
||||
func TestIndexFileBinlogCodec(t *testing.T) {
|
||||
indexBuildID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
version := int64(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
collectionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
partitionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
segmentID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
fieldID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
indexName := funcutil.GenRandomStr()
|
||||
indexID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
indexParams := make(map[string]string)
|
||||
indexParams["index_type"] = "IVF_FLAT"
|
||||
datas := []*Blob{
|
||||
{
|
||||
Key: "ivf1",
|
||||
Value: []byte{1, 2, 3},
|
||||
},
|
||||
{
|
||||
Key: "ivf2",
|
||||
Value: []byte{4, 5, 6},
|
||||
},
|
||||
{
|
||||
Key: "large",
|
||||
Value: funcutil.RandomBytes(maxLengthPerRowOfIndexFile + 1),
|
||||
},
|
||||
}
|
||||
|
||||
codec := NewIndexFileBinlogCodec()
|
||||
|
||||
serializedBlobs, err := codec.Serialize(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas)
|
||||
assert.Nil(t, err)
|
||||
|
||||
idxBuildID, v, collID, parID, segID, fID, params, idxName, idxID, blobs, err := codec.DeserializeImpl(serializedBlobs)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, indexBuildID, idxBuildID)
|
||||
assert.Equal(t, version, v)
|
||||
assert.Equal(t, collectionID, collID)
|
||||
assert.Equal(t, partitionID, parID)
|
||||
assert.Equal(t, segmentID, segID)
|
||||
assert.Equal(t, fieldID, fID)
|
||||
assert.Equal(t, len(indexParams), len(params))
|
||||
for key, value := range indexParams {
|
||||
assert.Equal(t, value, params[key])
|
||||
}
|
||||
assert.Equal(t, indexName, idxName)
|
||||
assert.Equal(t, indexID, idxID)
|
||||
assert.ElementsMatch(t, datas, blobs)
|
||||
|
||||
blobs, indexParams, indexName, indexID, err = codec.Deserialize(serializedBlobs)
|
||||
assert.Nil(t, err)
|
||||
assert.ElementsMatch(t, datas, blobs)
|
||||
for key, value := range indexParams {
|
||||
assert.Equal(t, value, params[key])
|
||||
}
|
||||
assert.Equal(t, indexName, idxName)
|
||||
assert.Equal(t, indexID, idxID)
|
||||
|
||||
// empty
|
||||
_, _, _, _, _, _, _, _, _, _, err = codec.DeserializeImpl(nil)
|
||||
assert.NotNil(t, err)
|
||||
}
|
||||
|
||||
func TestIndexFileBinlogCodecError(t *testing.T) {
|
||||
var err error
|
||||
|
||||
// failed to read binlog
|
||||
codec := NewIndexFileBinlogCodec()
|
||||
_, _, _, _, err = codec.Deserialize([]*Blob{{Key: "key", Value: []byte("not in binlog format")}})
|
||||
assert.NotNil(t, err)
|
||||
|
||||
indexBuildID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
version := int64(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
collectionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
partitionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
segmentID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
fieldID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
indexName := funcutil.GenRandomStr()
|
||||
indexID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
indexParams := make(map[string]string)
|
||||
indexParams["index_type"] = "IVF_FLAT"
|
||||
datas := []*Blob{
|
||||
{
|
||||
Key: "ivf1",
|
||||
Value: []byte{1, 2, 3},
|
||||
},
|
||||
}
|
||||
|
||||
_, err = codec.Serialize(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas)
|
||||
assert.Nil(t, err)
|
||||
}
|
||||
|
||||
func TestIndexCodec(t *testing.T) {
|
||||
indexCodec := NewIndexCodec()
|
||||
blobs := []*Blob{
|
||||
{
|
||||
"12345",
|
||||
[]byte{1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7},
|
||||
14,
|
||||
},
|
||||
{
|
||||
"6666",
|
||||
[]byte{6, 6, 6, 6, 6, 1, 2, 3, 4, 5, 6, 7},
|
||||
12,
|
||||
},
|
||||
{
|
||||
"8885",
|
||||
[]byte{8, 8, 8, 8, 8, 8, 8, 8, 2, 3, 4, 5, 6, 7},
|
||||
14,
|
||||
},
|
||||
}
|
||||
indexParams := map[string]string{
|
||||
"k1": "v1", "k2": "v2",
|
||||
}
|
||||
blobsInput, err := indexCodec.Serialize(blobs, indexParams, "index_test_name", 1234)
|
||||
assert.Nil(t, err)
|
||||
assert.EqualValues(t, 4, len(blobsInput))
|
||||
assert.EqualValues(t, IndexParamsKey, blobsInput[3].Key)
|
||||
blobsOutput, indexParamsOutput, indexName, indexID, err := indexCodec.Deserialize(blobsInput)
|
||||
assert.Nil(t, err)
|
||||
assert.EqualValues(t, 3, len(blobsOutput))
|
||||
for i := 0; i < 3; i++ {
|
||||
assert.EqualValues(t, blobs[i], blobsOutput[i])
|
||||
}
|
||||
assert.EqualValues(t, indexParams, indexParamsOutput)
|
||||
assert.EqualValues(t, "index_test_name", indexName)
|
||||
assert.EqualValues(t, 1234, indexID)
|
||||
|
||||
blobs = []*Blob{}
|
||||
_, _, _, _, err = indexCodec.Deserialize(blobs)
|
||||
assert.NotNil(t, err)
|
||||
}
|
||||
|
||||
func TestTsError(t *testing.T) {
|
||||
insertData := &InsertData{}
|
||||
insertCodec := NewInsertCodec(nil)
|
||||
|
|
|
@ -28,6 +28,7 @@ import (
|
|||
"github.com/milvus-io/milvus/internal/common"
|
||||
"github.com/milvus-io/milvus/internal/util/funcutil"
|
||||
"github.com/milvus-io/milvus/internal/util/tsoutil"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
|
@ -836,8 +837,45 @@ func TestDropPartitionEvent(t *testing.T) {
|
|||
|
||||
/* #nosec G103 */
|
||||
func TestIndexFileEvent(t *testing.T) {
|
||||
t.Run("index_file_timestamp", func(t *testing.T) {
|
||||
w, err := newIndexFileEventWriter()
|
||||
t.Run("index_file_string", func(t *testing.T) {
|
||||
w, err := newIndexFileEventWriter(schemapb.DataType_String)
|
||||
assert.Nil(t, err)
|
||||
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
|
||||
|
||||
payload := funcutil.GenRandomBytes()
|
||||
err = w.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload))
|
||||
assert.Nil(t, err)
|
||||
|
||||
err = w.Finish()
|
||||
assert.Nil(t, err)
|
||||
|
||||
var buf bytes.Buffer
|
||||
err = w.Write(&buf)
|
||||
assert.Nil(t, err)
|
||||
w.Close()
|
||||
|
||||
wBuf := buf.Bytes()
|
||||
st := UnsafeReadInt64(wBuf, binary.Size(eventHeader{}))
|
||||
assert.Equal(t, Timestamp(st), tsoutil.ComposeTS(10, 0))
|
||||
et := UnsafeReadInt64(wBuf, binary.Size(eventHeader{})+int(unsafe.Sizeof(st)))
|
||||
assert.Equal(t, Timestamp(et), tsoutil.ComposeTS(100, 0))
|
||||
|
||||
payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{})
|
||||
pBuf := wBuf[payloadOffset:]
|
||||
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, pR.numRows, int64(1))
|
||||
value, err := pR.GetStringFromPayload()
|
||||
|
||||
assert.Equal(t, len(value), 1)
|
||||
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, payload, typeutil.UnsafeStr2bytes(value[0]))
|
||||
pR.Close()
|
||||
})
|
||||
|
||||
t.Run("index_file_int8", func(t *testing.T) {
|
||||
w, err := newIndexFileEventWriter(schemapb.DataType_Int8)
|
||||
assert.Nil(t, err)
|
||||
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
|
||||
|
||||
|
@ -862,6 +900,41 @@ func TestIndexFileEvent(t *testing.T) {
|
|||
payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{})
|
||||
pBuf := wBuf[payloadOffset:]
|
||||
pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf)
|
||||
assert.Equal(t, pR.numRows, int64(len(payload)))
|
||||
assert.Nil(t, err)
|
||||
value, err := pR.GetByteFromPayload()
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, payload, value)
|
||||
pR.Close()
|
||||
})
|
||||
|
||||
t.Run("index_file_int8_large", func(t *testing.T) {
|
||||
w, err := newIndexFileEventWriter(schemapb.DataType_Int8)
|
||||
assert.Nil(t, err)
|
||||
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
|
||||
|
||||
payload := funcutil.GenRandomBytesWithLength(1000)
|
||||
err = w.AddByteToPayload(payload)
|
||||
assert.Nil(t, err)
|
||||
|
||||
err = w.Finish()
|
||||
assert.Nil(t, err)
|
||||
|
||||
var buf bytes.Buffer
|
||||
err = w.Write(&buf)
|
||||
assert.Nil(t, err)
|
||||
w.Close()
|
||||
|
||||
wBuf := buf.Bytes()
|
||||
st := UnsafeReadInt64(wBuf, binary.Size(eventHeader{}))
|
||||
assert.Equal(t, Timestamp(st), tsoutil.ComposeTS(10, 0))
|
||||
et := UnsafeReadInt64(wBuf, binary.Size(eventHeader{})+int(unsafe.Sizeof(st)))
|
||||
assert.Equal(t, Timestamp(et), tsoutil.ComposeTS(100, 0))
|
||||
|
||||
payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{})
|
||||
pBuf := wBuf[payloadOffset:]
|
||||
pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf)
|
||||
assert.Equal(t, pR.numRows, int64(len(payload)))
|
||||
assert.Nil(t, err)
|
||||
value, err := pR.GetByteFromPayload()
|
||||
assert.Nil(t, err)
|
||||
|
|
|
@ -368,8 +368,8 @@ func newDropPartitionEventWriter(dataType schemapb.DataType) (*dropPartitionEven
|
|||
return writer, nil
|
||||
}
|
||||
|
||||
func newIndexFileEventWriter() (*indexFileEventWriter, error) {
|
||||
payloadWriter, err := NewPayloadWriter(schemapb.DataType_Int8)
|
||||
func newIndexFileEventWriter(dataType schemapb.DataType) (*indexFileEventWriter, error) {
|
||||
payloadWriter, err := NewPayloadWriter(dataType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -0,0 +1,373 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package storage
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/milvus-io/milvus-proto/go-api/schemapb"
|
||||
"github.com/milvus-io/milvus/internal/log"
|
||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||
)
|
||||
|
||||
type IndexFileBinlogCodec struct {
|
||||
}
|
||||
|
||||
// NewIndexFileBinlogCodec is constructor for IndexFileBinlogCodec
|
||||
func NewIndexFileBinlogCodec() *IndexFileBinlogCodec {
|
||||
return &IndexFileBinlogCodec{}
|
||||
}
|
||||
|
||||
func (codec *IndexFileBinlogCodec) serializeImpl(
|
||||
indexBuildID UniqueID,
|
||||
version int64,
|
||||
collectionID UniqueID,
|
||||
partitionID UniqueID,
|
||||
segmentID UniqueID,
|
||||
fieldID UniqueID,
|
||||
indexName string,
|
||||
indexID UniqueID,
|
||||
key string,
|
||||
value []byte,
|
||||
ts Timestamp,
|
||||
) (*Blob, error) {
|
||||
writer := NewIndexFileBinlogWriter(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, key)
|
||||
defer writer.Close()
|
||||
|
||||
eventWriter, err := writer.NextIndexFileEventWriter()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer eventWriter.Close()
|
||||
|
||||
err = eventWriter.AddOneStringToPayload(typeutil.UnsafeBytes2str(value))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
eventWriter.SetEventTimestamp(ts, ts)
|
||||
|
||||
writer.SetEventTimeStamp(ts, ts)
|
||||
|
||||
// https://github.com/milvus-io/milvus/issues/9620
|
||||
// len(params) is also not accurate, indexParams is a map
|
||||
writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", len(value)))
|
||||
|
||||
err = writer.Finish()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
buffer, err := writer.GetBuffer()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Blob{
|
||||
Key: key,
|
||||
//Key: strconv.Itoa(len(datas)),
|
||||
Value: buffer,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// SerializeIndexParams serilizes index params as blob.
|
||||
func (codec *IndexFileBinlogCodec) SerializeIndexParams(
|
||||
indexBuildID UniqueID,
|
||||
version int64,
|
||||
collectionID UniqueID,
|
||||
partitionID UniqueID,
|
||||
segmentID UniqueID,
|
||||
fieldID UniqueID,
|
||||
indexParams map[string]string,
|
||||
indexName string,
|
||||
indexID UniqueID) (*Blob, error) {
|
||||
ts := Timestamp(time.Now().UnixNano())
|
||||
|
||||
// save index params.
|
||||
// querycoord will parse index extra info from binlog, better to let this key appear first.
|
||||
params, _ := json.Marshal(indexParams)
|
||||
indexParamBlob, err := codec.serializeImpl(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, IndexParamsKey, params, ts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return indexParamBlob, nil
|
||||
}
|
||||
|
||||
// Serialize serilizes data as blobs.
|
||||
func (codec *IndexFileBinlogCodec) Serialize(
|
||||
indexBuildID UniqueID,
|
||||
version int64,
|
||||
collectionID UniqueID,
|
||||
partitionID UniqueID,
|
||||
segmentID UniqueID,
|
||||
fieldID UniqueID,
|
||||
indexParams map[string]string,
|
||||
indexName string,
|
||||
indexID UniqueID,
|
||||
datas []*Blob,
|
||||
) ([]*Blob, error) {
|
||||
|
||||
var err error
|
||||
|
||||
var blobs []*Blob
|
||||
|
||||
ts := Timestamp(time.Now().UnixNano())
|
||||
|
||||
// save index params.
|
||||
// querycoord will parse index extra info from binlog, better to let this key appear first.
|
||||
indexParamBlob, err := codec.SerializeIndexParams(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
blobs = append(blobs, indexParamBlob)
|
||||
|
||||
for pos := range datas {
|
||||
blob, err := codec.serializeImpl(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, datas[pos].Key, datas[pos].Value, ts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
blobs = append(blobs, blob)
|
||||
}
|
||||
|
||||
return blobs, nil
|
||||
}
|
||||
|
||||
func (codec *IndexFileBinlogCodec) DeserializeImpl(blobs []*Blob) (
|
||||
indexBuildID UniqueID,
|
||||
version int64,
|
||||
collectionID UniqueID,
|
||||
partitionID UniqueID,
|
||||
segmentID UniqueID,
|
||||
fieldID UniqueID,
|
||||
indexParams map[string]string,
|
||||
indexName string,
|
||||
indexID UniqueID,
|
||||
datas []*Blob,
|
||||
err error,
|
||||
) {
|
||||
if len(blobs) == 0 {
|
||||
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, errors.New("blobs is empty")
|
||||
}
|
||||
indexParams = make(map[string]string)
|
||||
datas = make([]*Blob, 0)
|
||||
|
||||
for _, blob := range blobs {
|
||||
binlogReader, err := NewBinlogReader(blob.Value)
|
||||
if err != nil {
|
||||
log.Warn("failed to read binlog",
|
||||
zap.Error(err))
|
||||
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
|
||||
}
|
||||
dataType := binlogReader.PayloadDataType
|
||||
|
||||
//desc, err := binlogReader.readDescriptorEvent()
|
||||
//if err != nil {
|
||||
// log.Warn("failed to read descriptor event",
|
||||
// zap.Error(err))
|
||||
// return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
|
||||
//}
|
||||
desc := binlogReader.descriptorEvent
|
||||
extraBytes := desc.ExtraBytes
|
||||
extra := make(map[string]interface{})
|
||||
_ = json.Unmarshal(extraBytes, &extra)
|
||||
|
||||
value, _ := strconv.Atoi(extra["indexBuildID"].(string))
|
||||
indexBuildID = UniqueID(value)
|
||||
|
||||
value, _ = strconv.Atoi(extra["version"].(string))
|
||||
version = int64(value)
|
||||
|
||||
collectionID = desc.CollectionID
|
||||
partitionID = desc.PartitionID
|
||||
segmentID = desc.SegmentID
|
||||
fieldID = desc.FieldID
|
||||
|
||||
indexName = extra["indexName"].(string)
|
||||
|
||||
value, _ = strconv.Atoi(extra["indexID"].(string))
|
||||
indexID = UniqueID(value)
|
||||
|
||||
key := extra["key"].(string)
|
||||
|
||||
for {
|
||||
eventReader, err := binlogReader.NextEventReader()
|
||||
if err != nil {
|
||||
log.Warn("failed to get next event reader",
|
||||
zap.Error(err))
|
||||
binlogReader.Close()
|
||||
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
|
||||
}
|
||||
if eventReader == nil {
|
||||
break
|
||||
}
|
||||
switch dataType {
|
||||
// just for backward compatibility
|
||||
case schemapb.DataType_Int8:
|
||||
content, err := eventReader.GetByteFromPayload()
|
||||
if err != nil {
|
||||
log.Warn("failed to get byte from payload",
|
||||
zap.Error(err))
|
||||
eventReader.Close()
|
||||
binlogReader.Close()
|
||||
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
|
||||
}
|
||||
|
||||
if key == IndexParamsKey {
|
||||
_ = json.Unmarshal(content, &indexParams)
|
||||
} else {
|
||||
blob := &Blob{Key: key}
|
||||
blob.Value = content
|
||||
datas = append(datas, blob)
|
||||
}
|
||||
|
||||
case schemapb.DataType_String:
|
||||
content, err := eventReader.GetStringFromPayload()
|
||||
if err != nil {
|
||||
log.Warn("failed to get string from payload", zap.Error(err))
|
||||
eventReader.Close()
|
||||
binlogReader.Close()
|
||||
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
|
||||
}
|
||||
|
||||
// make sure there is one string
|
||||
if len(content) != 1 {
|
||||
err := fmt.Errorf("failed to parse index event because content length is not one %d", len(content))
|
||||
eventReader.Close()
|
||||
binlogReader.Close()
|
||||
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
|
||||
}
|
||||
contentByte := typeutil.UnsafeStr2bytes(content[0])
|
||||
if key == IndexParamsKey {
|
||||
_ = json.Unmarshal(contentByte, &indexParams)
|
||||
} else {
|
||||
blob := &Blob{Key: key}
|
||||
blob.Value = contentByte
|
||||
datas = append(datas, blob)
|
||||
}
|
||||
}
|
||||
eventReader.Close()
|
||||
}
|
||||
binlogReader.Close()
|
||||
|
||||
}
|
||||
|
||||
return indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas, nil
|
||||
}
|
||||
|
||||
func (codec *IndexFileBinlogCodec) Deserialize(blobs []*Blob) (
|
||||
datas []*Blob,
|
||||
indexParams map[string]string,
|
||||
indexName string,
|
||||
indexID UniqueID,
|
||||
err error,
|
||||
) {
|
||||
_, _, _, _, _, _, indexParams, indexName, indexID, datas, err = codec.DeserializeImpl(blobs)
|
||||
return datas, indexParams, indexName, indexID, err
|
||||
}
|
||||
|
||||
// IndexCodec can serialize and deserialize index
|
||||
type IndexCodec struct {
|
||||
}
|
||||
|
||||
// NewIndexCodec creates IndexCodec
|
||||
func NewIndexCodec() *IndexCodec {
|
||||
return &IndexCodec{}
|
||||
}
|
||||
|
||||
// Serialize serializes index
|
||||
func (indexCodec *IndexCodec) Serialize(blobs []*Blob, params map[string]string, indexName string, indexID UniqueID) ([]*Blob, error) {
|
||||
paramsBytes, err := json.Marshal(struct {
|
||||
Params map[string]string
|
||||
IndexName string
|
||||
IndexID UniqueID
|
||||
}{
|
||||
Params: params,
|
||||
IndexName: indexName,
|
||||
IndexID: indexID,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
blobs = append(blobs, &Blob{Key: IndexParamsKey, Value: paramsBytes})
|
||||
return blobs, nil
|
||||
}
|
||||
|
||||
// Deserialize deserializes index
|
||||
func (indexCodec *IndexCodec) Deserialize(blobs []*Blob) ([]*Blob, map[string]string, string, UniqueID, error) {
|
||||
var file *Blob
|
||||
for i := 0; i < len(blobs); i++ {
|
||||
if blobs[i].Key != IndexParamsKey {
|
||||
continue
|
||||
}
|
||||
file = blobs[i]
|
||||
blobs = append(blobs[:i], blobs[i+1:]...)
|
||||
break
|
||||
}
|
||||
if file == nil {
|
||||
return nil, nil, "", InvalidUniqueID, fmt.Errorf("can not find params blob")
|
||||
}
|
||||
info := struct {
|
||||
Params map[string]string
|
||||
IndexName string
|
||||
IndexID UniqueID
|
||||
}{}
|
||||
if err := json.Unmarshal(file.Value, &info); err != nil {
|
||||
return nil, nil, "", InvalidUniqueID, fmt.Errorf("json unmarshal error: %s", err.Error())
|
||||
}
|
||||
|
||||
return blobs, info.Params, info.IndexName, info.IndexID, nil
|
||||
}
|
||||
|
||||
// NewIndexFileBinlogWriter returns a new IndexFileBinlogWriter with provided parameters
|
||||
func NewIndexFileBinlogWriter(
|
||||
indexBuildID UniqueID,
|
||||
version int64,
|
||||
collectionID UniqueID,
|
||||
partitionID UniqueID,
|
||||
segmentID UniqueID,
|
||||
fieldID UniqueID,
|
||||
indexName string,
|
||||
indexID UniqueID,
|
||||
key string,
|
||||
) *IndexFileBinlogWriter {
|
||||
descriptorEvent := newDescriptorEvent()
|
||||
descriptorEvent.CollectionID = collectionID
|
||||
descriptorEvent.PartitionID = partitionID
|
||||
descriptorEvent.SegmentID = segmentID
|
||||
descriptorEvent.FieldID = fieldID
|
||||
descriptorEvent.PayloadDataType = schemapb.DataType_String
|
||||
descriptorEvent.AddExtra("indexBuildID", fmt.Sprintf("%d", indexBuildID))
|
||||
descriptorEvent.AddExtra("version", fmt.Sprintf("%d", version))
|
||||
descriptorEvent.AddExtra("indexName", indexName)
|
||||
descriptorEvent.AddExtra("indexID", fmt.Sprintf("%d", indexID))
|
||||
descriptorEvent.AddExtra("key", key)
|
||||
w := &IndexFileBinlogWriter{
|
||||
baseBinlogWriter: baseBinlogWriter{
|
||||
descriptorEvent: *descriptorEvent,
|
||||
magicNumber: MagicNumber,
|
||||
binlogType: IndexFileBinlog,
|
||||
eventWriters: make([]EventWriter, 0),
|
||||
buffer: nil,
|
||||
},
|
||||
}
|
||||
return w
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package storage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/milvus-io/milvus/internal/util/funcutil"
|
||||
"github.com/milvus-io/milvus/internal/util/uniquegenerator"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestIndexFileBinlogCodec(t *testing.T) {
|
||||
indexBuildID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
version := int64(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
collectionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
partitionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
segmentID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
fieldID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
indexName := funcutil.GenRandomStr()
|
||||
indexID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
indexParams := make(map[string]string)
|
||||
indexParams["index_type"] = "IVF_FLAT"
|
||||
datas := []*Blob{
|
||||
{
|
||||
Key: "ivf1",
|
||||
Value: []byte{1, 2, 3},
|
||||
},
|
||||
{
|
||||
Key: "ivf2",
|
||||
Value: []byte{4, 5, 6},
|
||||
},
|
||||
{
|
||||
Key: "large",
|
||||
Value: funcutil.RandomBytes(maxLengthPerRowOfIndexFile + 1),
|
||||
},
|
||||
}
|
||||
|
||||
codec := NewIndexFileBinlogCodec()
|
||||
|
||||
serializedBlobs, err := codec.Serialize(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas)
|
||||
assert.Nil(t, err)
|
||||
|
||||
idxBuildID, v, collID, parID, segID, fID, params, idxName, idxID, blobs, err := codec.DeserializeImpl(serializedBlobs)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, indexBuildID, idxBuildID)
|
||||
assert.Equal(t, version, v)
|
||||
assert.Equal(t, collectionID, collID)
|
||||
assert.Equal(t, partitionID, parID)
|
||||
assert.Equal(t, segmentID, segID)
|
||||
assert.Equal(t, fieldID, fID)
|
||||
assert.Equal(t, len(indexParams), len(params))
|
||||
for key, value := range indexParams {
|
||||
assert.Equal(t, value, params[key])
|
||||
}
|
||||
assert.Equal(t, indexName, idxName)
|
||||
assert.Equal(t, indexID, idxID)
|
||||
assert.ElementsMatch(t, datas, blobs)
|
||||
|
||||
blobs, indexParams, indexName, indexID, err = codec.Deserialize(serializedBlobs)
|
||||
assert.Nil(t, err)
|
||||
assert.ElementsMatch(t, datas, blobs)
|
||||
for key, value := range indexParams {
|
||||
assert.Equal(t, value, params[key])
|
||||
}
|
||||
assert.Equal(t, indexName, idxName)
|
||||
assert.Equal(t, indexID, idxID)
|
||||
|
||||
// empty
|
||||
_, _, _, _, _, _, _, _, _, _, err = codec.DeserializeImpl(nil)
|
||||
assert.NotNil(t, err)
|
||||
}
|
||||
|
||||
func TestIndexFileBinlogCodecError(t *testing.T) {
|
||||
var err error
|
||||
|
||||
// failed to read binlog
|
||||
codec := NewIndexFileBinlogCodec()
|
||||
_, _, _, _, err = codec.Deserialize([]*Blob{{Key: "key", Value: []byte("not in binlog format")}})
|
||||
assert.NotNil(t, err)
|
||||
|
||||
indexBuildID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
version := int64(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
collectionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
partitionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
segmentID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
fieldID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
indexName := funcutil.GenRandomStr()
|
||||
indexID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
|
||||
indexParams := make(map[string]string)
|
||||
indexParams["index_type"] = "IVF_FLAT"
|
||||
datas := []*Blob{
|
||||
{
|
||||
Key: "ivf1",
|
||||
Value: []byte{1, 2, 3},
|
||||
},
|
||||
}
|
||||
|
||||
_, err = codec.Serialize(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas)
|
||||
assert.Nil(t, err)
|
||||
}
|
||||
|
||||
func TestIndexCodec(t *testing.T) {
|
||||
indexCodec := NewIndexCodec()
|
||||
blobs := []*Blob{
|
||||
{
|
||||
"12345",
|
||||
[]byte{1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7},
|
||||
14,
|
||||
},
|
||||
{
|
||||
"6666",
|
||||
[]byte{6, 6, 6, 6, 6, 1, 2, 3, 4, 5, 6, 7},
|
||||
12,
|
||||
},
|
||||
{
|
||||
"8885",
|
||||
[]byte{8, 8, 8, 8, 8, 8, 8, 8, 2, 3, 4, 5, 6, 7},
|
||||
14,
|
||||
},
|
||||
}
|
||||
indexParams := map[string]string{
|
||||
"k1": "v1", "k2": "v2",
|
||||
}
|
||||
blobsInput, err := indexCodec.Serialize(blobs, indexParams, "index_test_name", 1234)
|
||||
assert.Nil(t, err)
|
||||
assert.EqualValues(t, 4, len(blobsInput))
|
||||
assert.EqualValues(t, IndexParamsKey, blobsInput[3].Key)
|
||||
blobsOutput, indexParamsOutput, indexName, indexID, err := indexCodec.Deserialize(blobsInput)
|
||||
assert.Nil(t, err)
|
||||
assert.EqualValues(t, 3, len(blobsOutput))
|
||||
for i := 0; i < 3; i++ {
|
||||
assert.EqualValues(t, blobs[i], blobsOutput[i])
|
||||
}
|
||||
assert.EqualValues(t, indexParams, indexParamsOutput)
|
||||
assert.EqualValues(t, "index_test_name", indexName)
|
||||
assert.EqualValues(t, 1234, indexID)
|
||||
|
||||
blobs = []*Blob{}
|
||||
_, _, _, _, err = indexCodec.Deserialize(blobs)
|
||||
assert.NotNil(t, err)
|
||||
}
|
|
@ -32,9 +32,10 @@ func NewPayloadReader(colType schemapb.DataType, buf []byte) (*PayloadReader, er
|
|||
|
||||
// GetDataFromPayload returns data,length from payload, returns err if failed
|
||||
// Return:
|
||||
// `interface{}`: all types.
|
||||
// `int`: dim, only meaningful to FLOAT/BINARY VECTOR type.
|
||||
// `error`: error.
|
||||
//
|
||||
// `interface{}`: all types.
|
||||
// `int`: dim, only meaningful to FLOAT/BINARY VECTOR type.
|
||||
// `error`: error.
|
||||
func (r *PayloadReader) GetDataFromPayload() (interface{}, int, error) {
|
||||
switch r.colType {
|
||||
case schemapb.DataType_Bool:
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"testing"
|
||||
|
||||
"github.com/apache/arrow/go/v8/parquet/file"
|
||||
"github.com/milvus-io/milvus-proto/go-api/schemapb"
|
||||
"github.com/stretchr/testify/suite"
|
||||
)
|
||||
|
||||
|
@ -20,6 +21,8 @@ type ReadDataFromAllRowGroupsSuite struct {
|
|||
func (s *ReadDataFromAllRowGroupsSuite) SetupSuite() {
|
||||
w := NewIndexFileBinlogWriter(0, 0, 1, 2, 3, 100, "", 0, "test")
|
||||
defer w.Close()
|
||||
// make sure it's still written int8 data
|
||||
w.PayloadDataType = schemapb.DataType_Int8
|
||||
ew, err := w.NextIndexFileEventWriter()
|
||||
s.Require().NoError(err)
|
||||
defer ew.Close()
|
||||
|
@ -70,12 +73,6 @@ func (s *ReadDataFromAllRowGroupsSuite) TestNormalRun() {
|
|||
s.Assert().EqualValues(s.size, valuesRead)
|
||||
}
|
||||
|
||||
func (s *ReadDataFromAllRowGroupsSuite) TestColIdxOutOfRange() {
|
||||
values := make([]int32, s.size)
|
||||
_, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](s.reader.reader, values, 1, int64(s.size))
|
||||
s.Assert().Error(err)
|
||||
}
|
||||
|
||||
func TestReadDataFromAllRowGroupsSuite(t *testing.T) {
|
||||
suite.Run(t, new(ReadDataFromAllRowGroupsSuite))
|
||||
}
|
||||
|
|
|
@ -206,7 +206,7 @@ func printBinlogFile(filename string) error {
|
|||
physical, _ = tsoutil.ParseTS(evd.EndTimestamp)
|
||||
fmt.Printf("\tEndTimestamp: %v\n", physical)
|
||||
key := fmt.Sprintf("%v", extra["key"])
|
||||
if err := printIndexFilePayloadValues(event.PayloadReaderInterface, key); err != nil {
|
||||
if err := printIndexFilePayloadValues(event.PayloadReaderInterface, key, desc.PayloadDataType); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
|
@ -385,30 +385,57 @@ func printDDLPayloadValues(eventType EventTypeCode, colType schemapb.DataType, r
|
|||
}
|
||||
|
||||
// only print slice meta and index params
|
||||
func printIndexFilePayloadValues(reader PayloadReaderInterface, key string) error {
|
||||
if key == IndexParamsKey {
|
||||
content, err := reader.GetByteFromPayload()
|
||||
if err != nil {
|
||||
return err
|
||||
func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, dataType schemapb.DataType) error {
|
||||
if dataType == schemapb.DataType_Int8 {
|
||||
if key == IndexParamsKey {
|
||||
content, err := reader.GetByteFromPayload()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Print("index params: \n")
|
||||
fmt.Println(content)
|
||||
|
||||
return nil
|
||||
}
|
||||
fmt.Print("index params: \n")
|
||||
fmt.Println(content)
|
||||
|
||||
return nil
|
||||
}
|
||||
if key == "SLICE_META" {
|
||||
content, err := reader.GetByteFromPayload()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// content is a json string serialized by milvus::json,
|
||||
// it's better to use milvus::json to parse the content also,
|
||||
// fortunately, the json string is readable enough.
|
||||
fmt.Print("index slice meta: \n")
|
||||
fmt.Println(content)
|
||||
|
||||
if key == "SLICE_META" {
|
||||
content, err := reader.GetByteFromPayload()
|
||||
if err != nil {
|
||||
return err
|
||||
return nil
|
||||
}
|
||||
// content is a json string serialized by milvus::json,
|
||||
// it's better to use milvus::json to parse the content also,
|
||||
// fortunately, the json string is readable enough.
|
||||
fmt.Print("index slice meta: \n")
|
||||
fmt.Println(content)
|
||||
} else {
|
||||
if key == IndexParamsKey {
|
||||
content, err := reader.GetStringFromPayload()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Print("index params: \n")
|
||||
fmt.Println(content[0])
|
||||
|
||||
return nil
|
||||
return nil
|
||||
}
|
||||
|
||||
if key == "SLICE_META" {
|
||||
content, err := reader.GetStringFromPayload()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// content is a json string serialized by milvus::json,
|
||||
// it's better to use milvus::json to parse the content also,
|
||||
// fortunately, the json string is readable enough.
|
||||
fmt.Print("index slice meta: \n")
|
||||
fmt.Println(content[0])
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
@ -54,6 +54,14 @@ func GenRandomBytes() []byte {
|
|||
return b
|
||||
}
|
||||
|
||||
func GenRandomBytesWithLength(length int64) []byte {
|
||||
b := make([]byte, length)
|
||||
if _, err := rand.Read(b); err != nil {
|
||||
return nil
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// GenRandomStr generates a random string.
|
||||
func GenRandomStr() string {
|
||||
return fmt.Sprintf("%X", GenRandomBytes())
|
||||
|
|
|
@ -18,6 +18,7 @@ package typeutil
|
|||
|
||||
import (
|
||||
"strings"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// AddOne add one to last byte in string, on empty string return empty
|
||||
|
@ -52,3 +53,15 @@ func After(str string, sub string) string {
|
|||
func AfterN(str string, sub string, sep string) []string {
|
||||
return strings.Split(After(str, sub), sep)
|
||||
}
|
||||
|
||||
/* #nosec G103 */
|
||||
func UnsafeStr2bytes(s string) []byte {
|
||||
x := (*[2]uintptr)(unsafe.Pointer(&s))
|
||||
b := [3]uintptr{x[0], x[1], x[1]}
|
||||
return *(*[]byte)(unsafe.Pointer(&b))
|
||||
}
|
||||
|
||||
/* #nosec G103 */
|
||||
func UnsafeBytes2str(b []byte) string {
|
||||
return *(*string)(unsafe.Pointer(&b))
|
||||
}
|
||||
|
|
|
@ -59,3 +59,19 @@ func TestAfterN(t *testing.T) {
|
|||
strs := AfterN("by-dev/meta/root-coord/credential/grantee-privileges/public/Global/*", "root-coord/credential/grantee-privileges/", "/")
|
||||
assert.Len(t, strs, 3)
|
||||
}
|
||||
|
||||
func TestStrByteConversion(t *testing.T) {
|
||||
test := "test"
|
||||
testByte := UnsafeStr2bytes(test)
|
||||
assert.Equal(t, len(testByte), 4)
|
||||
testResult := UnsafeBytes2str(testByte)
|
||||
assert.Equal(t, testResult, test)
|
||||
|
||||
testByte = []byte{1, 2, 3, 4, 5}
|
||||
testStr := UnsafeBytes2str(testByte)
|
||||
assert.Equal(t, len(testStr), 5)
|
||||
testByteResult := UnsafeStr2bytes(testStr)
|
||||
for i, b := range testByteResult {
|
||||
assert.Equal(t, testByte[i], b)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue