2022-11-18 02:47:08 +00:00
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
2023-02-26 03:31:49 +00:00
2022-11-18 02:47:08 +00:00
2023-06-08 17:28:37 +00:00
2023-04-06 11:14:32 +00:00
2022-11-18 02:47:08 +00:00
2023-09-21 01:45:27 +00:00
type IndexFileBinlogCodec struct{}
2022-11-18 02:47:08 +00:00
// NewIndexFileBinlogCodec is constructor for IndexFileBinlogCodec
func NewIndexFileBinlogCodec() *IndexFileBinlogCodec {
return &IndexFileBinlogCodec{}
func (codec *IndexFileBinlogCodec) serializeImpl(
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexName string,
indexID UniqueID,
key string,
value []byte,
ts Timestamp,
) (*Blob, error) {
writer := NewIndexFileBinlogWriter(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, key)
defer writer.Close()
eventWriter, err := writer.NextIndexFileEventWriter()
if err != nil {
return nil, err
defer eventWriter.Close()
2024-06-19 09:08:00 +00:00
err = eventWriter.AddOneStringToPayload(typeutil.UnsafeBytes2str(value), true)
2022-11-18 02:47:08 +00:00
if err != nil {
return nil, err
eventWriter.SetEventTimestamp(ts, ts)
writer.SetEventTimeStamp(ts, ts)
// https://github.com/milvus-io/milvus/issues/9620
// len(params) is also not accurate, indexParams is a map
writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", len(value)))
err = writer.Finish()
if err != nil {
return nil, err
buffer, err := writer.GetBuffer()
if err != nil {
return nil, err
return &Blob{
Key: key,
2023-09-21 01:45:27 +00:00
// Key: strconv.Itoa(len(datas)),
2022-11-18 02:47:08 +00:00
Value: buffer,
}, nil
// SerializeIndexParams serilizes index params as blob.
func (codec *IndexFileBinlogCodec) SerializeIndexParams(
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexParams map[string]string,
indexName string,
2023-09-21 01:45:27 +00:00
indexID UniqueID,
) (*Blob, error) {
2022-11-18 02:47:08 +00:00
ts := Timestamp(time.Now().UnixNano())
// save index params.
// querycoord will parse index extra info from binlog, better to let this key appear first.
params, _ := json.Marshal(indexParams)
indexParamBlob, err := codec.serializeImpl(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, IndexParamsKey, params, ts)
if err != nil {
return nil, err
return indexParamBlob, nil
// Serialize serilizes data as blobs.
func (codec *IndexFileBinlogCodec) Serialize(
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexParams map[string]string,
indexName string,
indexID UniqueID,
datas []*Blob,
) ([]*Blob, error) {
var err error
var blobs []*Blob
ts := Timestamp(time.Now().UnixNano())
// save index params.
// querycoord will parse index extra info from binlog, better to let this key appear first.
indexParamBlob, err := codec.SerializeIndexParams(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID)
if err != nil {
return nil, err
blobs = append(blobs, indexParamBlob)
for pos := range datas {
blob, err := codec.serializeImpl(indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexName, indexID, datas[pos].Key, datas[pos].Value, ts)
if err != nil {
return nil, err
blobs = append(blobs, blob)
return blobs, nil
func (codec *IndexFileBinlogCodec) DeserializeImpl(blobs []*Blob) (
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexParams map[string]string,
indexName string,
indexID UniqueID,
datas []*Blob,
err error,
) {
if len(blobs) == 0 {
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, errors.New("blobs is empty")
indexParams = make(map[string]string)
datas = make([]*Blob, 0)
for _, blob := range blobs {
binlogReader, err := NewBinlogReader(blob.Value)
if err != nil {
log.Warn("failed to read binlog",
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
dataType := binlogReader.PayloadDataType
//desc, err := binlogReader.readDescriptorEvent()
//if err != nil {
// log.Warn("failed to read descriptor event",
// zap.Error(err))
// return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
desc := binlogReader.descriptorEvent
extraBytes := desc.ExtraBytes
extra := make(map[string]interface{})
_ = json.Unmarshal(extraBytes, &extra)
value, _ := strconv.Atoi(extra["indexBuildID"].(string))
indexBuildID = UniqueID(value)
value, _ = strconv.Atoi(extra["version"].(string))
version = int64(value)
collectionID = desc.CollectionID
partitionID = desc.PartitionID
segmentID = desc.SegmentID
fieldID = desc.FieldID
indexName = extra["indexName"].(string)
value, _ = strconv.Atoi(extra["indexID"].(string))
indexID = UniqueID(value)
key := extra["key"].(string)
for {
eventReader, err := binlogReader.NextEventReader()
if err != nil {
log.Warn("failed to get next event reader",
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
if eventReader == nil {
switch dataType {
// just for backward compatibility
case schemapb.DataType_Int8:
2024-07-23 08:07:51 +00:00
// todo: valid_data may need to check when create index
2024-06-19 09:08:00 +00:00
content, _, err := eventReader.GetByteFromPayload()
2022-11-18 02:47:08 +00:00
if err != nil {
log.Warn("failed to get byte from payload",
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
if key == IndexParamsKey {
_ = json.Unmarshal(content, &indexParams)
} else {
blob := &Blob{Key: key}
blob.Value = content
datas = append(datas, blob)
case schemapb.DataType_String:
2024-06-19 09:08:00 +00:00
content, _, err := eventReader.GetStringFromPayload()
2022-11-18 02:47:08 +00:00
if err != nil {
log.Warn("failed to get string from payload", zap.Error(err))
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
// make sure there is one string
if len(content) != 1 {
err := fmt.Errorf("failed to parse index event because content length is not one %d", len(content))
return 0, 0, 0, 0, 0, 0, nil, "", 0, nil, err
contentByte := typeutil.UnsafeStr2bytes(content[0])
if key == IndexParamsKey {
_ = json.Unmarshal(contentByte, &indexParams)
} else {
blob := &Blob{Key: key}
blob.Value = contentByte
datas = append(datas, blob)
return indexBuildID, version, collectionID, partitionID, segmentID, fieldID, indexParams, indexName, indexID, datas, nil
func (codec *IndexFileBinlogCodec) Deserialize(blobs []*Blob) (
datas []*Blob,
indexParams map[string]string,
indexName string,
indexID UniqueID,
err error,
) {
_, _, _, _, _, _, indexParams, indexName, indexID, datas, err = codec.DeserializeImpl(blobs)
return datas, indexParams, indexName, indexID, err
// IndexCodec can serialize and deserialize index
2023-09-21 01:45:27 +00:00
type IndexCodec struct{}
2022-11-18 02:47:08 +00:00
// NewIndexCodec creates IndexCodec
func NewIndexCodec() *IndexCodec {
return &IndexCodec{}
// Serialize serializes index
func (indexCodec *IndexCodec) Serialize(blobs []*Blob, params map[string]string, indexName string, indexID UniqueID) ([]*Blob, error) {
paramsBytes, err := json.Marshal(struct {
Params map[string]string
IndexName string
IndexID UniqueID
Params: params,
IndexName: indexName,
IndexID: indexID,
if err != nil {
return nil, err
blobs = append(blobs, &Blob{Key: IndexParamsKey, Value: paramsBytes})
return blobs, nil
// Deserialize deserializes index
func (indexCodec *IndexCodec) Deserialize(blobs []*Blob) ([]*Blob, map[string]string, string, UniqueID, error) {
var file *Blob
for i := 0; i < len(blobs); i++ {
if blobs[i].Key != IndexParamsKey {
file = blobs[i]
blobs = append(blobs[:i], blobs[i+1:]...)
if file == nil {
return nil, nil, "", InvalidUniqueID, fmt.Errorf("can not find params blob")
info := struct {
Params map[string]string
IndexName string
IndexID UniqueID
if err := json.Unmarshal(file.Value, &info); err != nil {
return nil, nil, "", InvalidUniqueID, fmt.Errorf("json unmarshal error: %s", err.Error())
return blobs, info.Params, info.IndexName, info.IndexID, nil
// NewIndexFileBinlogWriter returns a new IndexFileBinlogWriter with provided parameters
func NewIndexFileBinlogWriter(
indexBuildID UniqueID,
version int64,
collectionID UniqueID,
partitionID UniqueID,
segmentID UniqueID,
fieldID UniqueID,
indexName string,
indexID UniqueID,
key string,
) *IndexFileBinlogWriter {
descriptorEvent := newDescriptorEvent()
descriptorEvent.CollectionID = collectionID
descriptorEvent.PartitionID = partitionID
descriptorEvent.SegmentID = segmentID
descriptorEvent.FieldID = fieldID
descriptorEvent.PayloadDataType = schemapb.DataType_String
descriptorEvent.AddExtra("indexBuildID", fmt.Sprintf("%d", indexBuildID))
descriptorEvent.AddExtra("version", fmt.Sprintf("%d", version))
descriptorEvent.AddExtra("indexName", indexName)
descriptorEvent.AddExtra("indexID", fmt.Sprintf("%d", indexID))
descriptorEvent.AddExtra("key", key)
w := &IndexFileBinlogWriter{
baseBinlogWriter: baseBinlogWriter{
descriptorEvent: *descriptorEvent,
magicNumber: MagicNumber,
binlogType: IndexFileBinlog,
eventWriters: make([]EventWriter, 0),
buffer: nil,
return w