Recude IndexCodec Load Memory (#20621)

Signed-off-by: xiaofan-luan <xiaofan.luan@zilliz.com>

Signed-off-by: xiaofan-luan <xiaofan.luan@zilliz.com>
pull/20704/head
Xiaofan 2022-11-18 10:47:08 +08:00 committed by GitHub
parent 2390095232
commit 633a749880
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 830 additions and 491 deletions

View File

@ -29,6 +29,7 @@ import (
"github.com/milvus-io/milvus/internal/common"
"github.com/milvus-io/milvus/internal/util/funcutil"
"github.com/milvus-io/milvus/internal/util/tsoutil"
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/milvus-io/milvus/internal/util/uniquegenerator"
"github.com/stretchr/testify/assert"
@ -1034,9 +1035,10 @@ func TestIndexFileBinlog(t *testing.T) {
key := funcutil.GenRandomStr()
timestamp := Timestamp(time.Now().UnixNano())
payload := funcutil.GenRandomBytes()
payload := funcutil.GenRandomBytesWithLength(10000)
w := NewIndexFileBinlogWriter(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, key)
w.PayloadDataType = schemapb.DataType_Int8
e, err := w.NextIndexFileEventWriter()
assert.Nil(t, err)
@ -1150,6 +1152,135 @@ func TestIndexFileBinlog(t *testing.T) {
assert.NotNil(t, err)
}
/* #nosec G103 */
func TestIndexFileBinlogV2(t *testing.T) {
indexBuildID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
version := int64(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
collectionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
partitionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
segmentID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
fieldID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
indexName := funcutil.GenRandomStr()
indexID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
key := funcutil.GenRandomStr()
timestamp := Timestamp(time.Now().UnixNano())
payload := funcutil.GenRandomBytes()
w := NewIndexFileBinlogWriter(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, key)
e, err := w.NextIndexFileEventWriter()
assert.Nil(t, err)
err = e.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload))
assert.Nil(t, err)
e.SetEventTimestamp(timestamp, timestamp)
w.SetEventTimeStamp(timestamp, timestamp)
sizeTotal := 2000000
w.baseBinlogWriter.descriptorEventData.AddExtra(originalSizeKey, fmt.Sprintf("%v", sizeTotal))
_, err = w.GetBuffer()
assert.NotNil(t, err)
err = w.Finish()
assert.Nil(t, err)
buf, err := w.GetBuffer()
assert.Nil(t, err)
w.Close()
//magic number
magicNum := UnsafeReadInt32(buf, 0)
assert.Equal(t, magicNum, MagicNumber)
pos := int(unsafe.Sizeof(MagicNumber))
//descriptor header, timestamp
ts := UnsafeReadInt64(buf, pos)
assert.Greater(t, ts, int64(0))
pos += int(unsafe.Sizeof(ts))
//descriptor header, type code
tc := UnsafeReadInt8(buf, pos)
assert.Equal(t, EventTypeCode(tc), DescriptorEventType)
pos += int(unsafe.Sizeof(tc))
//descriptor header, event length
descEventLen := UnsafeReadInt32(buf, pos)
pos += int(unsafe.Sizeof(descEventLen))
//descriptor header, next position
descNxtPos := UnsafeReadInt32(buf, pos)
assert.Equal(t, descEventLen+int32(unsafe.Sizeof(MagicNumber)), descNxtPos)
pos += int(unsafe.Sizeof(descNxtPos))
//descriptor data fix, collection id
collID := UnsafeReadInt64(buf, pos)
assert.Equal(t, collID, collectionID)
pos += int(unsafe.Sizeof(collID))
//descriptor data fix, partition id
partID := UnsafeReadInt64(buf, pos)
assert.Equal(t, partID, partitionID)
pos += int(unsafe.Sizeof(partID))
//descriptor data fix, segment id
segID := UnsafeReadInt64(buf, pos)
assert.Equal(t, segID, segmentID)
pos += int(unsafe.Sizeof(segID))
//descriptor data fix, field id
fID := UnsafeReadInt64(buf, pos)
assert.Equal(t, fieldID, fieldID)
pos += int(unsafe.Sizeof(fID))
//descriptor data fix, start time stamp
startts := UnsafeReadInt64(buf, pos)
assert.Equal(t, startts, int64(timestamp))
pos += int(unsafe.Sizeof(startts))
//descriptor data fix, end time stamp
endts := UnsafeReadInt64(buf, pos)
assert.Equal(t, endts, int64(timestamp))
pos += int(unsafe.Sizeof(endts))
//descriptor data fix, payload type
colType := UnsafeReadInt32(buf, pos)
assert.Equal(t, schemapb.DataType(colType), schemapb.DataType_String)
pos += int(unsafe.Sizeof(colType))
//descriptor data, post header lengths
for i := DescriptorEventType; i < EventTypeEnd; i++ {
size := getEventFixPartSize(i)
assert.Equal(t, uint8(size), buf[pos])
pos++
}
//descriptor data, extra length
extraLength := UnsafeReadInt32(buf, pos)
assert.Equal(t, extraLength, w.baseBinlogWriter.descriptorEventData.ExtraLength)
pos += int(unsafe.Sizeof(extraLength))
multiBytes := make([]byte, extraLength)
for i := 0; i < int(extraLength); i++ {
singleByte := UnsafeReadByte(buf, pos)
multiBytes[i] = singleByte
pos++
}
j := make(map[string]interface{})
err = json.Unmarshal(multiBytes, &j)
assert.Nil(t, err)
assert.Equal(t, fmt.Sprintf("%v", indexBuildID), fmt.Sprintf("%v", j["indexBuildID"]))
assert.Equal(t, fmt.Sprintf("%v", version), fmt.Sprintf("%v", j["version"]))
assert.Equal(t, fmt.Sprintf("%v", indexName), fmt.Sprintf("%v", j["indexName"]))
assert.Equal(t, fmt.Sprintf("%v", indexID), fmt.Sprintf("%v", j["indexID"]))
assert.Equal(t, fmt.Sprintf("%v", key), fmt.Sprintf("%v", j["key"]))
assert.Equal(t, fmt.Sprintf("%v", sizeTotal), fmt.Sprintf("%v", j[originalSizeKey]))
// NextIndexFileBinlogWriter after close
_, err = w.NextIndexFileEventWriter()
assert.NotNil(t, err)
}
func TestNewBinlogReaderError(t *testing.T) {
data := []byte{}
reader, err := NewBinlogReader(data)

View File

@ -261,7 +261,7 @@ func (writer *IndexFileBinlogWriter) NextIndexFileEventWriter() (*indexFileEvent
if writer.isClosed() {
return nil, fmt.Errorf("binlog has closed")
}
event, err := newIndexFileEventWriter()
event, err := newIndexFileEventWriter(writer.PayloadDataType)
if err != nil {
return nil, err
}
@ -326,38 +326,3 @@ func NewDDLBinlogWriter(dataType schemapb.DataType, collectionID int64) *DDLBinl
}
return w
}
// NewIndexFileBinlogWriter returns a new IndexFileBinlogWriter with provided parameters
func NewIndexFileBinlogWriter(
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexName string,
indexID UniqueID,
key string,
) *IndexFileBinlogWriter {
descriptorEvent := newDescriptorEvent()
descriptorEvent.CollectionID = collectionID
descriptorEvent.PartitionID = partitionID
descriptorEvent.SegmentID = segmentID
descriptorEvent.FieldID = fieldID
descriptorEvent.PayloadDataType = schemapb.DataType_Int8
descriptorEvent.AddExtra("indexBuildID", fmt.Sprintf("%d", indexBuildID))
descriptorEvent.AddExtra("version", fmt.Sprintf("%d", version))
descriptorEvent.AddExtra("indexName", indexName)
descriptorEvent.AddExtra("indexID", fmt.Sprintf("%d", indexID))
descriptorEvent.AddExtra("key", key)
w := &IndexFileBinlogWriter{
baseBinlogWriter: baseBinlogWriter{
descriptorEvent: *descriptorEvent,
magicNumber: MagicNumber,
binlogType: IndexFileBinlog,
eventWriters: make([]EventWriter, 0),
buffer: nil,
},
}
return w
}

View File

@ -19,19 +19,14 @@ package storage
import (
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"math"
"sort"
"strconv"
"strings"
"time"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/milvus-io/milvus/internal/common"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/proto/etcdpb"
"github.com/milvus-io/milvus/internal/util/typeutil"
)
@ -1160,285 +1155,3 @@ func (dataDefinitionCodec *DataDefinitionCodec) Deserialize(blobs []*Blob) (ts [
return resultTs, requestsStrings, nil
}
type IndexFileBinlogCodec struct {
}
// NewIndexFileBinlogCodec is constructor for IndexFileBinlogCodec
func NewIndexFileBinlogCodec() *IndexFileBinlogCodec {
return &IndexFileBinlogCodec{}
}
func (codec *IndexFileBinlogCodec) serializeImpl(
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexName string,
indexID UniqueID,
key string,
value []byte,
ts Timestamp,
) (*Blob, error) {
writer := NewIndexFileBinlogWriter(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, key)
defer writer.Close()
eventWriter, err := writer.NextIndexFileEventWriter()
if err != nil {
return nil, err
}
defer eventWriter.Close()
err = eventWriter.AddByteToPayload(value)
if err != nil {
return nil, err
}
eventWriter.SetEventTimestamp(ts, ts)
writer.SetEventTimeStamp(ts, ts)
// https://github.com/milvus-io/milvus/issues/9620
// len(params) is also not accurate, indexParams is a map
writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", len(value)))
err = writer.Finish()
if err != nil {
return nil, err
}
buffer, err := writer.GetBuffer()
if err != nil {
return nil, err
}
return &Blob{
Key: key,
//Key: strconv.Itoa(len(datas)),
Value: buffer,
}, nil
}
// SerializeIndexParams serilizes index params as blob.
func (codec *IndexFileBinlogCodec) SerializeIndexParams(
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexParams map[string]string,
indexName string,
indexID UniqueID) (*Blob, error) {
ts := Timestamp(time.Now().UnixNano())
// save index params.
// querycoord will parse index extra info from binlog, better to let this key appear first.
params, _ := json.Marshal(indexParams)
indexParamBlob, err := codec.serializeImpl(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, IndexParamsKey, params, ts)
if err != nil {
return nil, err
}
return indexParamBlob, nil
}
// Serialize serilizes data as blobs.
func (codec *IndexFileBinlogCodec) Serialize(
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexParams map[string]string,
indexName string,
indexID UniqueID,
datas []*Blob,
) ([]*Blob, error) {
var err error
var blobs []*Blob
ts := Timestamp(time.Now().UnixNano())
// save index params.
// querycoord will parse index extra info from binlog, better to let this key appear first.
indexParamBlob, err := codec.SerializeIndexParams(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID)
if err != nil {
return nil, err
}
blobs = append(blobs, indexParamBlob)
for pos := range datas {
blob, err := codec.serializeImpl(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, datas[pos].Key, datas[pos].Value, ts)
if err != nil {
return nil, err
}
blobs = append(blobs, blob)
}
return blobs, nil
}
func (codec *IndexFileBinlogCodec) DeserializeImpl(blobs []*Blob) (
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexParams map[string]string,
indexName string,
indexID UniqueID,
datas []*Blob,
err error,
) {
if len(blobs) == 0 {
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, errors.New("blobs is empty")
}
indexParams = make(map[string]string)
datas = make([]*Blob, 0)
for _, blob := range blobs {
binlogReader, err := NewBinlogReader(blob.Value)
if err != nil {
log.Warn("failed to read binlog",
zap.Error(err))
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
}
dataType := binlogReader.PayloadDataType
//desc, err := binlogReader.readDescriptorEvent()
//if err != nil {
// log.Warn("failed to read descriptor event",
// zap.Error(err))
// return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
//}
desc := binlogReader.descriptorEvent
extraBytes := desc.ExtraBytes
extra := make(map[string]interface{})
_ = json.Unmarshal(extraBytes, &extra)
value, _ := strconv.Atoi(extra["indexBuildID"].(string))
indexBuildID = UniqueID(value)
value, _ = strconv.Atoi(extra["version"].(string))
version = int64(value)
collectionID = desc.CollectionID
partitionID = desc.PartitionID
segmentID = desc.SegmentID
fieldID = desc.FieldID
indexName = extra["indexName"].(string)
value, _ = strconv.Atoi(extra["indexID"].(string))
indexID = UniqueID(value)
key := extra["key"].(string)
for {
eventReader, err := binlogReader.NextEventReader()
if err != nil {
log.Warn("failed to get next event reader",
zap.Error(err))
binlogReader.Close()
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
}
if eventReader == nil {
break
}
switch dataType {
case schemapb.DataType_Int8:
content, err := eventReader.GetByteFromPayload()
if err != nil {
log.Warn("failed to get string from payload",
zap.Error(err))
eventReader.Close()
binlogReader.Close()
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
}
if key == IndexParamsKey {
_ = json.Unmarshal(content, &indexParams)
} else {
blob := &Blob{Key: key}
blob.Value = make([]byte, len(content))
copy(blob.Value, content)
datas = append(datas, blob)
}
}
eventReader.Close()
}
binlogReader.Close()
}
return indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas, nil
}
func (codec *IndexFileBinlogCodec) Deserialize(blobs []*Blob) (
datas []*Blob,
indexParams map[string]string,
indexName string,
indexID UniqueID,
err error,
) {
_, _, _, _, _, _, indexParams, indexName, indexID, datas, err = codec.DeserializeImpl(blobs)
return datas, indexParams, indexName, indexID, err
}
// IndexCodec can serialize and deserialize index
type IndexCodec struct {
}
// NewIndexCodec creates IndexCodec
func NewIndexCodec() *IndexCodec {
return &IndexCodec{}
}
// Serialize serializes index
func (indexCodec *IndexCodec) Serialize(blobs []*Blob, params map[string]string, indexName string, indexID UniqueID) ([]*Blob, error) {
paramsBytes, err := json.Marshal(struct {
Params map[string]string
IndexName string
IndexID UniqueID
}{
Params: params,
IndexName: indexName,
IndexID: indexID,
})
if err != nil {
return nil, err
}
blobs = append(blobs, &Blob{Key: IndexParamsKey, Value: paramsBytes})
return blobs, nil
}
// Deserialize deserializes index
func (indexCodec *IndexCodec) Deserialize(blobs []*Blob) ([]*Blob, map[string]string, string, UniqueID, error) {
var file *Blob
for i := 0; i < len(blobs); i++ {
if blobs[i].Key != IndexParamsKey {
continue
}
file = blobs[i]
blobs = append(blobs[:i], blobs[i+1:]...)
break
}
if file == nil {
return nil, nil, "", InvalidUniqueID, fmt.Errorf("can not find params blob")
}
info := struct {
Params map[string]string
IndexName string
IndexID UniqueID
}{}
if err := json.Unmarshal(file.Value, &info); err != nil {
return nil, nil, "", InvalidUniqueID, fmt.Errorf("json unmarshal error: %s", err.Error())
}
return blobs, info.Params, info.IndexName, info.IndexID, nil
}

View File

@ -21,9 +21,6 @@ import (
"fmt"
"testing"
"github.com/milvus-io/milvus/internal/util/funcutil"
"github.com/milvus-io/milvus/internal/util/uniquegenerator"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/proto/etcdpb"
@ -454,137 +451,6 @@ func TestDDCodec(t *testing.T) {
assert.NotNil(t, err)
}
func TestIndexFileBinlogCodec(t *testing.T) {
indexBuildID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
version := int64(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
collectionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
partitionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
segmentID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
fieldID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
indexName := funcutil.GenRandomStr()
indexID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
indexParams := make(map[string]string)
indexParams["index_type"] = "IVF_FLAT"
datas := []*Blob{
{
Key: "ivf1",
Value: []byte{1, 2, 3},
},
{
Key: "ivf2",
Value: []byte{4, 5, 6},
},
{
Key: "large",
Value: funcutil.RandomBytes(maxLengthPerRowOfIndexFile + 1),
},
}
codec := NewIndexFileBinlogCodec()
serializedBlobs, err := codec.Serialize(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas)
assert.Nil(t, err)
idxBuildID, v, collID, parID, segID, fID, params, idxName, idxID, blobs, err := codec.DeserializeImpl(serializedBlobs)
assert.Nil(t, err)
assert.Equal(t, indexBuildID, idxBuildID)
assert.Equal(t, version, v)
assert.Equal(t, collectionID, collID)
assert.Equal(t, partitionID, parID)
assert.Equal(t, segmentID, segID)
assert.Equal(t, fieldID, fID)
assert.Equal(t, len(indexParams), len(params))
for key, value := range indexParams {
assert.Equal(t, value, params[key])
}
assert.Equal(t, indexName, idxName)
assert.Equal(t, indexID, idxID)
assert.ElementsMatch(t, datas, blobs)
blobs, indexParams, indexName, indexID, err = codec.Deserialize(serializedBlobs)
assert.Nil(t, err)
assert.ElementsMatch(t, datas, blobs)
for key, value := range indexParams {
assert.Equal(t, value, params[key])
}
assert.Equal(t, indexName, idxName)
assert.Equal(t, indexID, idxID)
// empty
_, _, _, _, _, _, _, _, _, _, err = codec.DeserializeImpl(nil)
assert.NotNil(t, err)
}
func TestIndexFileBinlogCodecError(t *testing.T) {
var err error
// failed to read binlog
codec := NewIndexFileBinlogCodec()
_, _, _, _, err = codec.Deserialize([]*Blob{{Key: "key", Value: []byte("not in binlog format")}})
assert.NotNil(t, err)
indexBuildID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
version := int64(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
collectionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
partitionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
segmentID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
fieldID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
indexName := funcutil.GenRandomStr()
indexID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
indexParams := make(map[string]string)
indexParams["index_type"] = "IVF_FLAT"
datas := []*Blob{
{
Key: "ivf1",
Value: []byte{1, 2, 3},
},
}
_, err = codec.Serialize(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas)
assert.Nil(t, err)
}
func TestIndexCodec(t *testing.T) {
indexCodec := NewIndexCodec()
blobs := []*Blob{
{
"12345",
[]byte{1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7},
14,
},
{
"6666",
[]byte{6, 6, 6, 6, 6, 1, 2, 3, 4, 5, 6, 7},
12,
},
{
"8885",
[]byte{8, 8, 8, 8, 8, 8, 8, 8, 2, 3, 4, 5, 6, 7},
14,
},
}
indexParams := map[string]string{
"k1": "v1", "k2": "v2",
}
blobsInput, err := indexCodec.Serialize(blobs, indexParams, "index_test_name", 1234)
assert.Nil(t, err)
assert.EqualValues(t, 4, len(blobsInput))
assert.EqualValues(t, IndexParamsKey, blobsInput[3].Key)
blobsOutput, indexParamsOutput, indexName, indexID, err := indexCodec.Deserialize(blobsInput)
assert.Nil(t, err)
assert.EqualValues(t, 3, len(blobsOutput))
for i := 0; i < 3; i++ {
assert.EqualValues(t, blobs[i], blobsOutput[i])
}
assert.EqualValues(t, indexParams, indexParamsOutput)
assert.EqualValues(t, "index_test_name", indexName)
assert.EqualValues(t, 1234, indexID)
blobs = []*Blob{}
_, _, _, _, err = indexCodec.Deserialize(blobs)
assert.NotNil(t, err)
}
func TestTsError(t *testing.T) {
insertData := &InsertData{}
insertCodec := NewInsertCodec(nil)

View File

@ -28,6 +28,7 @@ import (
"github.com/milvus-io/milvus/internal/common"
"github.com/milvus-io/milvus/internal/util/funcutil"
"github.com/milvus-io/milvus/internal/util/tsoutil"
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/stretchr/testify/assert"
)
@ -836,8 +837,45 @@ func TestDropPartitionEvent(t *testing.T) {
/* #nosec G103 */
func TestIndexFileEvent(t *testing.T) {
t.Run("index_file_timestamp", func(t *testing.T) {
w, err := newIndexFileEventWriter()
t.Run("index_file_string", func(t *testing.T) {
w, err := newIndexFileEventWriter(schemapb.DataType_String)
assert.Nil(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
payload := funcutil.GenRandomBytes()
err = w.AddOneStringToPayload(typeutil.UnsafeBytes2str(payload))
assert.Nil(t, err)
err = w.Finish()
assert.Nil(t, err)
var buf bytes.Buffer
err = w.Write(&buf)
assert.Nil(t, err)
w.Close()
wBuf := buf.Bytes()
st := UnsafeReadInt64(wBuf, binary.Size(eventHeader{}))
assert.Equal(t, Timestamp(st), tsoutil.ComposeTS(10, 0))
et := UnsafeReadInt64(wBuf, binary.Size(eventHeader{})+int(unsafe.Sizeof(st)))
assert.Equal(t, Timestamp(et), tsoutil.ComposeTS(100, 0))
payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_String, pBuf)
assert.Nil(t, err)
assert.Equal(t, pR.numRows, int64(1))
value, err := pR.GetStringFromPayload()
assert.Equal(t, len(value), 1)
assert.Nil(t, err)
assert.Equal(t, payload, typeutil.UnsafeStr2bytes(value[0]))
pR.Close()
})
t.Run("index_file_int8", func(t *testing.T) {
w, err := newIndexFileEventWriter(schemapb.DataType_Int8)
assert.Nil(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
@ -862,6 +900,41 @@ func TestIndexFileEvent(t *testing.T) {
payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf)
assert.Equal(t, pR.numRows, int64(len(payload)))
assert.Nil(t, err)
value, err := pR.GetByteFromPayload()
assert.Nil(t, err)
assert.Equal(t, payload, value)
pR.Close()
})
t.Run("index_file_int8_large", func(t *testing.T) {
w, err := newIndexFileEventWriter(schemapb.DataType_Int8)
assert.Nil(t, err)
w.SetEventTimestamp(tsoutil.ComposeTS(10, 0), tsoutil.ComposeTS(100, 0))
payload := funcutil.GenRandomBytesWithLength(1000)
err = w.AddByteToPayload(payload)
assert.Nil(t, err)
err = w.Finish()
assert.Nil(t, err)
var buf bytes.Buffer
err = w.Write(&buf)
assert.Nil(t, err)
w.Close()
wBuf := buf.Bytes()
st := UnsafeReadInt64(wBuf, binary.Size(eventHeader{}))
assert.Equal(t, Timestamp(st), tsoutil.ComposeTS(10, 0))
et := UnsafeReadInt64(wBuf, binary.Size(eventHeader{})+int(unsafe.Sizeof(st)))
assert.Equal(t, Timestamp(et), tsoutil.ComposeTS(100, 0))
payloadOffset := binary.Size(eventHeader{}) + binary.Size(indexFileEventData{})
pBuf := wBuf[payloadOffset:]
pR, err := NewPayloadReader(schemapb.DataType_Int8, pBuf)
assert.Equal(t, pR.numRows, int64(len(payload)))
assert.Nil(t, err)
value, err := pR.GetByteFromPayload()
assert.Nil(t, err)

View File

@ -368,8 +368,8 @@ func newDropPartitionEventWriter(dataType schemapb.DataType) (*dropPartitionEven
return writer, nil
}
func newIndexFileEventWriter() (*indexFileEventWriter, error) {
payloadWriter, err := NewPayloadWriter(schemapb.DataType_Int8)
func newIndexFileEventWriter(dataType schemapb.DataType) (*indexFileEventWriter, error) {
payloadWriter, err := NewPayloadWriter(dataType)
if err != nil {
return nil, err
}

View File

@ -0,0 +1,373 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"encoding/json"
"errors"
"fmt"
"strconv"
"time"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/util/typeutil"
)
type IndexFileBinlogCodec struct {
}
// NewIndexFileBinlogCodec is constructor for IndexFileBinlogCodec
func NewIndexFileBinlogCodec() *IndexFileBinlogCodec {
return &IndexFileBinlogCodec{}
}
func (codec *IndexFileBinlogCodec) serializeImpl(
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexName string,
indexID UniqueID,
key string,
value []byte,
ts Timestamp,
) (*Blob, error) {
writer := NewIndexFileBinlogWriter(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, key)
defer writer.Close()
eventWriter, err := writer.NextIndexFileEventWriter()
if err != nil {
return nil, err
}
defer eventWriter.Close()
err = eventWriter.AddOneStringToPayload(typeutil.UnsafeBytes2str(value))
if err != nil {
return nil, err
}
eventWriter.SetEventTimestamp(ts, ts)
writer.SetEventTimeStamp(ts, ts)
// https://github.com/milvus-io/milvus/issues/9620
// len(params) is also not accurate, indexParams is a map
writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", len(value)))
err = writer.Finish()
if err != nil {
return nil, err
}
buffer, err := writer.GetBuffer()
if err != nil {
return nil, err
}
return &Blob{
Key: key,
//Key: strconv.Itoa(len(datas)),
Value: buffer,
}, nil
}
// SerializeIndexParams serilizes index params as blob.
func (codec *IndexFileBinlogCodec) SerializeIndexParams(
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexParams map[string]string,
indexName string,
indexID UniqueID) (*Blob, error) {
ts := Timestamp(time.Now().UnixNano())
// save index params.
// querycoord will parse index extra info from binlog, better to let this key appear first.
params, _ := json.Marshal(indexParams)
indexParamBlob, err := codec.serializeImpl(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, IndexParamsKey, params, ts)
if err != nil {
return nil, err
}
return indexParamBlob, nil
}
// Serialize serilizes data as blobs.
func (codec *IndexFileBinlogCodec) Serialize(
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexParams map[string]string,
indexName string,
indexID UniqueID,
datas []*Blob,
) ([]*Blob, error) {
var err error
var blobs []*Blob
ts := Timestamp(time.Now().UnixNano())
// save index params.
// querycoord will parse index extra info from binlog, better to let this key appear first.
indexParamBlob, err := codec.SerializeIndexParams(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID)
if err != nil {
return nil, err
}
blobs = append(blobs, indexParamBlob)
for pos := range datas {
blob, err := codec.serializeImpl(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, datas[pos].Key, datas[pos].Value, ts)
if err != nil {
return nil, err
}
blobs = append(blobs, blob)
}
return blobs, nil
}
func (codec *IndexFileBinlogCodec) DeserializeImpl(blobs []*Blob) (
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexParams map[string]string,
indexName string,
indexID UniqueID,
datas []*Blob,
err error,
) {
if len(blobs) == 0 {
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, errors.New("blobs is empty")
}
indexParams = make(map[string]string)
datas = make([]*Blob, 0)
for _, blob := range blobs {
binlogReader, err := NewBinlogReader(blob.Value)
if err != nil {
log.Warn("failed to read binlog",
zap.Error(err))
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
}
dataType := binlogReader.PayloadDataType
//desc, err := binlogReader.readDescriptorEvent()
//if err != nil {
// log.Warn("failed to read descriptor event",
// zap.Error(err))
// return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
//}
desc := binlogReader.descriptorEvent
extraBytes := desc.ExtraBytes
extra := make(map[string]interface{})
_ = json.Unmarshal(extraBytes, &extra)
value, _ := strconv.Atoi(extra["indexBuildID"].(string))
indexBuildID = UniqueID(value)
value, _ = strconv.Atoi(extra["version"].(string))
version = int64(value)
collectionID = desc.CollectionID
partitionID = desc.PartitionID
segmentID = desc.SegmentID
fieldID = desc.FieldID
indexName = extra["indexName"].(string)
value, _ = strconv.Atoi(extra["indexID"].(string))
indexID = UniqueID(value)
key := extra["key"].(string)
for {
eventReader, err := binlogReader.NextEventReader()
if err != nil {
log.Warn("failed to get next event reader",
zap.Error(err))
binlogReader.Close()
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
}
if eventReader == nil {
break
}
switch dataType {
// just for backward compatibility
case schemapb.DataType_Int8:
content, err := eventReader.GetByteFromPayload()
if err != nil {
log.Warn("failed to get byte from payload",
zap.Error(err))
eventReader.Close()
binlogReader.Close()
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
}
if key == IndexParamsKey {
_ = json.Unmarshal(content, &indexParams)
} else {
blob := &Blob{Key: key}
blob.Value = content
datas = append(datas, blob)
}
case schemapb.DataType_String:
content, err := eventReader.GetStringFromPayload()
if err != nil {
log.Warn("failed to get string from payload", zap.Error(err))
eventReader.Close()
binlogReader.Close()
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
}
// make sure there is one string
if len(content) != 1 {
err := fmt.Errorf("failed to parse index event because content length is not one %d", len(content))
eventReader.Close()
binlogReader.Close()
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
}
contentByte := typeutil.UnsafeStr2bytes(content[0])
if key == IndexParamsKey {
_ = json.Unmarshal(contentByte, &indexParams)
} else {
blob := &Blob{Key: key}
blob.Value = contentByte
datas = append(datas, blob)
}
}
eventReader.Close()
}
binlogReader.Close()
}
return indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas, nil
}
func (codec *IndexFileBinlogCodec) Deserialize(blobs []*Blob) (
datas []*Blob,
indexParams map[string]string,
indexName string,
indexID UniqueID,
err error,
) {
_, _, _, _, _, _, indexParams, indexName, indexID, datas, err = codec.DeserializeImpl(blobs)
return datas, indexParams, indexName, indexID, err
}
// IndexCodec can serialize and deserialize index
type IndexCodec struct {
}
// NewIndexCodec creates IndexCodec
func NewIndexCodec() *IndexCodec {
return &IndexCodec{}
}
// Serialize serializes index
func (indexCodec *IndexCodec) Serialize(blobs []*Blob, params map[string]string, indexName string, indexID UniqueID) ([]*Blob, error) {
paramsBytes, err := json.Marshal(struct {
Params map[string]string
IndexName string
IndexID UniqueID
}{
Params: params,
IndexName: indexName,
IndexID: indexID,
})
if err != nil {
return nil, err
}
blobs = append(blobs, &Blob{Key: IndexParamsKey, Value: paramsBytes})
return blobs, nil
}
// Deserialize deserializes index
func (indexCodec *IndexCodec) Deserialize(blobs []*Blob) ([]*Blob, map[string]string, string, UniqueID, error) {
var file *Blob
for i := 0; i < len(blobs); i++ {
if blobs[i].Key != IndexParamsKey {
continue
}
file = blobs[i]
blobs = append(blobs[:i], blobs[i+1:]...)
break
}
if file == nil {
return nil, nil, "", InvalidUniqueID, fmt.Errorf("can not find params blob")
}
info := struct {
Params map[string]string
IndexName string
IndexID UniqueID
}{}
if err := json.Unmarshal(file.Value, &info); err != nil {
return nil, nil, "", InvalidUniqueID, fmt.Errorf("json unmarshal error: %s", err.Error())
}
return blobs, info.Params, info.IndexName, info.IndexID, nil
}
// NewIndexFileBinlogWriter returns a new IndexFileBinlogWriter with provided parameters
func NewIndexFileBinlogWriter(
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexName string,
indexID UniqueID,
key string,
) *IndexFileBinlogWriter {
descriptorEvent := newDescriptorEvent()
descriptorEvent.CollectionID = collectionID
descriptorEvent.PartitionID = partitionID
descriptorEvent.SegmentID = segmentID
descriptorEvent.FieldID = fieldID
descriptorEvent.PayloadDataType = schemapb.DataType_String
descriptorEvent.AddExtra("indexBuildID", fmt.Sprintf("%d", indexBuildID))
descriptorEvent.AddExtra("version", fmt.Sprintf("%d", version))
descriptorEvent.AddExtra("indexName", indexName)
descriptorEvent.AddExtra("indexID", fmt.Sprintf("%d", indexID))
descriptorEvent.AddExtra("key", key)
w := &IndexFileBinlogWriter{
baseBinlogWriter: baseBinlogWriter{
descriptorEvent: *descriptorEvent,
magicNumber: MagicNumber,
binlogType: IndexFileBinlog,
eventWriters: make([]EventWriter, 0),
buffer: nil,
},
}
return w
}

View File

@ -0,0 +1,156 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"testing"
"github.com/milvus-io/milvus/internal/util/funcutil"
"github.com/milvus-io/milvus/internal/util/uniquegenerator"
"github.com/stretchr/testify/assert"
)
func TestIndexFileBinlogCodec(t *testing.T) {
indexBuildID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
version := int64(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
collectionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
partitionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
segmentID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
fieldID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
indexName := funcutil.GenRandomStr()
indexID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
indexParams := make(map[string]string)
indexParams["index_type"] = "IVF_FLAT"
datas := []*Blob{
{
Key: "ivf1",
Value: []byte{1, 2, 3},
},
{
Key: "ivf2",
Value: []byte{4, 5, 6},
},
{
Key: "large",
Value: funcutil.RandomBytes(maxLengthPerRowOfIndexFile + 1),
},
}
codec := NewIndexFileBinlogCodec()
serializedBlobs, err := codec.Serialize(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas)
assert.Nil(t, err)
idxBuildID, v, collID, parID, segID, fID, params, idxName, idxID, blobs, err := codec.DeserializeImpl(serializedBlobs)
assert.Nil(t, err)
assert.Equal(t, indexBuildID, idxBuildID)
assert.Equal(t, version, v)
assert.Equal(t, collectionID, collID)
assert.Equal(t, partitionID, parID)
assert.Equal(t, segmentID, segID)
assert.Equal(t, fieldID, fID)
assert.Equal(t, len(indexParams), len(params))
for key, value := range indexParams {
assert.Equal(t, value, params[key])
}
assert.Equal(t, indexName, idxName)
assert.Equal(t, indexID, idxID)
assert.ElementsMatch(t, datas, blobs)
blobs, indexParams, indexName, indexID, err = codec.Deserialize(serializedBlobs)
assert.Nil(t, err)
assert.ElementsMatch(t, datas, blobs)
for key, value := range indexParams {
assert.Equal(t, value, params[key])
}
assert.Equal(t, indexName, idxName)
assert.Equal(t, indexID, idxID)
// empty
_, _, _, _, _, _, _, _, _, _, err = codec.DeserializeImpl(nil)
assert.NotNil(t, err)
}
func TestIndexFileBinlogCodecError(t *testing.T) {
var err error
// failed to read binlog
codec := NewIndexFileBinlogCodec()
_, _, _, _, err = codec.Deserialize([]*Blob{{Key: "key", Value: []byte("not in binlog format")}})
assert.NotNil(t, err)
indexBuildID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
version := int64(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
collectionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
partitionID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
segmentID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
fieldID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
indexName := funcutil.GenRandomStr()
indexID := UniqueID(uniquegenerator.GetUniqueIntGeneratorIns().GetInt())
indexParams := make(map[string]string)
indexParams["index_type"] = "IVF_FLAT"
datas := []*Blob{
{
Key: "ivf1",
Value: []byte{1, 2, 3},
},
}
_, err = codec.Serialize(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas)
assert.Nil(t, err)
}
func TestIndexCodec(t *testing.T) {
indexCodec := NewIndexCodec()
blobs := []*Blob{
{
"12345",
[]byte{1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7},
14,
},
{
"6666",
[]byte{6, 6, 6, 6, 6, 1, 2, 3, 4, 5, 6, 7},
12,
},
{
"8885",
[]byte{8, 8, 8, 8, 8, 8, 8, 8, 2, 3, 4, 5, 6, 7},
14,
},
}
indexParams := map[string]string{
"k1": "v1", "k2": "v2",
}
blobsInput, err := indexCodec.Serialize(blobs, indexParams, "index_test_name", 1234)
assert.Nil(t, err)
assert.EqualValues(t, 4, len(blobsInput))
assert.EqualValues(t, IndexParamsKey, blobsInput[3].Key)
blobsOutput, indexParamsOutput, indexName, indexID, err := indexCodec.Deserialize(blobsInput)
assert.Nil(t, err)
assert.EqualValues(t, 3, len(blobsOutput))
for i := 0; i < 3; i++ {
assert.EqualValues(t, blobs[i], blobsOutput[i])
}
assert.EqualValues(t, indexParams, indexParamsOutput)
assert.EqualValues(t, "index_test_name", indexName)
assert.EqualValues(t, 1234, indexID)
blobs = []*Blob{}
_, _, _, _, err = indexCodec.Deserialize(blobs)
assert.NotNil(t, err)
}

View File

@ -32,9 +32,10 @@ func NewPayloadReader(colType schemapb.DataType, buf []byte) (*PayloadReader, er
// GetDataFromPayload returns data,length from payload, returns err if failed
// Return:
// `interface{}`: all types.
// `int`: dim, only meaningful to FLOAT/BINARY VECTOR type.
// `error`: error.
//
// `interface{}`: all types.
// `int`: dim, only meaningful to FLOAT/BINARY VECTOR type.
// `error`: error.
func (r *PayloadReader) GetDataFromPayload() (interface{}, int, error) {
switch r.colType {
case schemapb.DataType_Bool:

View File

@ -5,6 +5,7 @@ import (
"testing"
"github.com/apache/arrow/go/v8/parquet/file"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/stretchr/testify/suite"
)
@ -20,6 +21,8 @@ type ReadDataFromAllRowGroupsSuite struct {
func (s *ReadDataFromAllRowGroupsSuite) SetupSuite() {
w := NewIndexFileBinlogWriter(0, 0, 1, 2, 3, 100, "", 0, "test")
defer w.Close()
// make sure it's still written int8 data
w.PayloadDataType = schemapb.DataType_Int8
ew, err := w.NextIndexFileEventWriter()
s.Require().NoError(err)
defer ew.Close()
@ -70,12 +73,6 @@ func (s *ReadDataFromAllRowGroupsSuite) TestNormalRun() {
s.Assert().EqualValues(s.size, valuesRead)
}
func (s *ReadDataFromAllRowGroupsSuite) TestColIdxOutOfRange() {
values := make([]int32, s.size)
_, err := ReadDataFromAllRowGroups[int32, *file.Int32ColumnChunkReader](s.reader.reader, values, 1, int64(s.size))
s.Assert().Error(err)
}
func TestReadDataFromAllRowGroupsSuite(t *testing.T) {
suite.Run(t, new(ReadDataFromAllRowGroupsSuite))
}

View File

@ -206,7 +206,7 @@ func printBinlogFile(filename string) error {
physical, _ = tsoutil.ParseTS(evd.EndTimestamp)
fmt.Printf("\tEndTimestamp: %v\n", physical)
key := fmt.Sprintf("%v", extra["key"])
if err := printIndexFilePayloadValues(event.PayloadReaderInterface, key); err != nil {
if err := printIndexFilePayloadValues(event.PayloadReaderInterface, key, desc.PayloadDataType); err != nil {
return err
}
default:
@ -385,30 +385,57 @@ func printDDLPayloadValues(eventType EventTypeCode, colType schemapb.DataType, r
}
// only print slice meta and index params
func printIndexFilePayloadValues(reader PayloadReaderInterface, key string) error {
if key == IndexParamsKey {
content, err := reader.GetByteFromPayload()
if err != nil {
return err
func printIndexFilePayloadValues(reader PayloadReaderInterface, key string, dataType schemapb.DataType) error {
if dataType == schemapb.DataType_Int8 {
if key == IndexParamsKey {
content, err := reader.GetByteFromPayload()
if err != nil {
return err
}
fmt.Print("index params: \n")
fmt.Println(content)
return nil
}
fmt.Print("index params: \n")
fmt.Println(content)
return nil
}
if key == "SLICE_META" {
content, err := reader.GetByteFromPayload()
if err != nil {
return err
}
// content is a json string serialized by milvus::json,
// it's better to use milvus::json to parse the content also,
// fortunately, the json string is readable enough.
fmt.Print("index slice meta: \n")
fmt.Println(content)
if key == "SLICE_META" {
content, err := reader.GetByteFromPayload()
if err != nil {
return err
return nil
}
// content is a json string serialized by milvus::json,
// it's better to use milvus::json to parse the content also,
// fortunately, the json string is readable enough.
fmt.Print("index slice meta: \n")
fmt.Println(content)
} else {
if key == IndexParamsKey {
content, err := reader.GetStringFromPayload()
if err != nil {
return err
}
fmt.Print("index params: \n")
fmt.Println(content[0])
return nil
return nil
}
if key == "SLICE_META" {
content, err := reader.GetStringFromPayload()
if err != nil {
return err
}
// content is a json string serialized by milvus::json,
// it's better to use milvus::json to parse the content also,
// fortunately, the json string is readable enough.
fmt.Print("index slice meta: \n")
fmt.Println(content[0])
return nil
}
}
return nil

View File

@ -54,6 +54,14 @@ func GenRandomBytes() []byte {
return b
}
func GenRandomBytesWithLength(length int64) []byte {
b := make([]byte, length)
if _, err := rand.Read(b); err != nil {
return nil
}
return b
}
// GenRandomStr generates a random string.
func GenRandomStr() string {
return fmt.Sprintf("%X", GenRandomBytes())

View File

@ -18,6 +18,7 @@ package typeutil
import (
"strings"
"unsafe"
)
// AddOne add one to last byte in string, on empty string return empty
@ -52,3 +53,15 @@ func After(str string, sub string) string {
func AfterN(str string, sub string, sep string) []string {
return strings.Split(After(str, sub), sep)
}
/* #nosec G103 */
func UnsafeStr2bytes(s string) []byte {
x := (*[2]uintptr)(unsafe.Pointer(&s))
b := [3]uintptr{x[0], x[1], x[1]}
return *(*[]byte)(unsafe.Pointer(&b))
}
/* #nosec G103 */
func UnsafeBytes2str(b []byte) string {
return *(*string)(unsafe.Pointer(&b))
}

View File

@ -59,3 +59,19 @@ func TestAfterN(t *testing.T) {
strs := AfterN("by-dev/meta/root-coord/credential/grantee-privileges/public/Global/*", "root-coord/credential/grantee-privileges/", "/")
assert.Len(t, strs, 3)
}
func TestStrByteConversion(t *testing.T) {
test := "test"
testByte := UnsafeStr2bytes(test)
assert.Equal(t, len(testByte), 4)
testResult := UnsafeBytes2str(testByte)
assert.Equal(t, testResult, test)
testByte = []byte{1, 2, 3, 4, 5}
testStr := UnsafeBytes2str(testByte)
assert.Equal(t, len(testStr), 5)
testByteResult := UnsafeStr2bytes(testStr)
for i, b := range testByteResult {
assert.Equal(t, testByte[i], b)
}
}