Save index meta to meta table

Signed-off-by: sunby <bingyi.sun@zilliz.com>
pull/4973/head^2
sunby 2020-12-23 15:13:45 +08:00 committed by yefu.chen
parent e52b130ba6
commit e7ebfcb05a
6 changed files with 320 additions and 160 deletions

View File

@ -5,6 +5,8 @@ import (
"log" "log"
"time" "time"
"github.com/zilliztech/milvus-distributed/internal/proto/etcdpb"
"github.com/zilliztech/milvus-distributed/internal/errors" "github.com/zilliztech/milvus-distributed/internal/errors"
) )
@ -77,7 +79,21 @@ func (scheduler *FlushScheduler) describe() error {
return err return err
} }
} }
//TODO: Save data to meta table // Save data to meta table
segMeta, err := scheduler.metaTable.GetSegmentByID(singleSegmentID)
if err != nil {
return err
}
segMeta.BinlogFilePaths = make([]*etcdpb.FieldBinlogFiles, 0)
for k, v := range mapData {
segMeta.BinlogFilePaths = append(segMeta.BinlogFilePaths, &etcdpb.FieldBinlogFiles{
FieldID: k,
BinlogFiles: v,
})
}
if err = scheduler.metaTable.UpdateSegment(segMeta); err != nil {
return err
}
log.Printf("flush segment %d finished", singleSegmentID) log.Printf("flush segment %d finished", singleSegmentID)
break break
} }

View File

@ -5,6 +5,10 @@ import (
"log" "log"
"time" "time"
"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
"github.com/zilliztech/milvus-distributed/internal/proto/etcdpb"
"github.com/zilliztech/milvus-distributed/internal/errors" "github.com/zilliztech/milvus-distributed/internal/errors"
"github.com/zilliztech/milvus-distributed/internal/proto/indexbuilderpb" "github.com/zilliztech/milvus-distributed/internal/proto/indexbuilderpb"
) )
@ -15,8 +19,9 @@ type IndexBuildInfo struct {
binlogFilePath []string binlogFilePath []string
} }
type IndexBuildChannelInfo struct { type IndexBuildChannelInfo struct {
id UniqueID id UniqueID
info *IndexBuildInfo info *IndexBuildInfo
indexParams []*commonpb.KeyValuePair
} }
type IndexBuildScheduler struct { type IndexBuildScheduler struct {
@ -47,14 +52,43 @@ func NewIndexBuildScheduler(ctx context.Context, client BuildIndexClient, metaTa
func (scheduler *IndexBuildScheduler) schedule(info interface{}) error { func (scheduler *IndexBuildScheduler) schedule(info interface{}) error {
indexBuildInfo := info.(*IndexBuildInfo) indexBuildInfo := info.(*IndexBuildInfo)
indexID, err := scheduler.client.BuildIndexWithoutID(indexBuildInfo.binlogFilePath, nil, nil) segMeta, err := scheduler.metaTable.GetSegmentByID(indexBuildInfo.segmentID)
if err != nil {
return err
}
// parse index params
indexParams, err := scheduler.metaTable.GetFieldIndexParams(segMeta.CollectionID, indexBuildInfo.fieldID)
if err != nil {
return err
}
indexParamsMap := make(map[string]string)
for _, kv := range indexParams {
indexParamsMap[kv.Key] = kv.Value
}
indexID, err := scheduler.client.BuildIndexWithoutID(indexBuildInfo.binlogFilePath, nil, indexParamsMap)
log.Printf("build index for segment %d field %d", indexBuildInfo.segmentID, indexBuildInfo.fieldID) log.Printf("build index for segment %d field %d", indexBuildInfo.segmentID, indexBuildInfo.fieldID)
if err != nil { if err != nil {
return err return err
} }
err = scheduler.metaTable.AddFieldIndexMeta(&etcdpb.FieldIndexMeta{
SegmentID: indexBuildInfo.segmentID,
FieldID: indexBuildInfo.fieldID,
IndexID: indexID,
IndexParams: indexParams,
Status: indexbuilderpb.IndexStatus_NONE,
})
if err != nil {
log.Printf("WARNING: " + err.Error())
//return err
}
scheduler.indexDescribe <- &IndexBuildChannelInfo{ scheduler.indexDescribe <- &IndexBuildChannelInfo{
id: indexID, id: indexID,
info: indexBuildInfo, info: indexBuildInfo,
indexParams: indexParams,
} }
return nil return nil
} }
@ -100,7 +134,18 @@ func (scheduler *IndexBuildScheduler) describe() error {
fieldName: fieldName, fieldName: fieldName,
indexFilePaths: filePaths, indexFilePaths: filePaths,
} }
//TODO: Save data to meta table // Save data to meta table
err = scheduler.metaTable.UpdateFieldIndexMeta(&etcdpb.FieldIndexMeta{
SegmentID: indexBuildInfo.segmentID,
FieldID: indexBuildInfo.fieldID,
IndexID: indexID,
IndexParams: channelInfo.indexParams,
Status: indexbuilderpb.IndexStatus_FINISHED,
IndexFilePaths: filePaths,
})
if err != nil {
return err
}
err = scheduler.indexLoadSch.Enqueue(info) err = scheduler.indexLoadSch.Enqueue(info)
log.Printf("build index for segment %d field %d enqueue load index", indexBuildInfo.segmentID, indexBuildInfo.fieldID) log.Printf("build index for segment %d field %d enqueue load index", indexBuildInfo.segmentID, indexBuildInfo.fieldID)
@ -108,6 +153,18 @@ func (scheduler *IndexBuildScheduler) describe() error {
return err return err
} }
log.Printf("build index for segment %d field %d finished", indexBuildInfo.segmentID, indexBuildInfo.fieldID) log.Printf("build index for segment %d field %d finished", indexBuildInfo.segmentID, indexBuildInfo.fieldID)
} else {
// save status to meta table
err = scheduler.metaTable.UpdateFieldIndexMeta(&etcdpb.FieldIndexMeta{
SegmentID: indexBuildInfo.segmentID,
FieldID: indexBuildInfo.fieldID,
IndexID: indexID,
IndexParams: channelInfo.indexParams,
Status: description.Status,
})
if err != nil {
return err
}
} }
time.Sleep(1 * time.Second) time.Sleep(1 * time.Second)
} }

View File

@ -619,3 +619,19 @@ func (mt *metaTable) removeSegmentIndexMeta(segID UniqueID) error {
return nil return nil
} }
func (mt *metaTable) GetFieldIndexParams(collID UniqueID, fieldID UniqueID) ([]*commonpb.KeyValuePair, error) {
mt.ddLock.RLock()
defer mt.ddLock.RUnlock()
if _, ok := mt.collID2Meta[collID]; !ok {
return nil, fmt.Errorf("can not find collection with id %d", collID)
}
for _, fieldSchema := range mt.collID2Meta[collID].Schema.Fields {
if fieldSchema.FieldID == fieldID {
return fieldSchema.IndexParams, nil
}
}
return nil, fmt.Errorf("can not find field %d in collection %d", fieldID, collID)
}

View File

@ -5,6 +5,9 @@ import (
"testing" "testing"
"time" "time"
"github.com/zilliztech/milvus-distributed/internal/proto/etcdpb"
"github.com/zilliztech/milvus-distributed/internal/proto/schemapb"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
etcdkv "github.com/zilliztech/milvus-distributed/internal/kv/etcd" etcdkv "github.com/zilliztech/milvus-distributed/internal/kv/etcd"
"github.com/zilliztech/milvus-distributed/internal/proto/indexbuilderpb" "github.com/zilliztech/milvus-distributed/internal/proto/indexbuilderpb"
@ -34,6 +37,23 @@ func TestPersistenceScheduler(t *testing.T) {
assert.Nil(t, err) assert.Nil(t, err)
defer meta.client.Close() defer meta.client.Close()
err = meta.AddCollection(&etcdpb.CollectionMeta{
ID: 1,
Schema: &schemapb.CollectionSchema{
Name: "testcoll",
Fields: []*schemapb.FieldSchema{
{FieldID: 1},
{FieldID: 100},
},
},
})
assert.Nil(t, err)
err = meta.AddSegment(&etcdpb.SegmentMeta{
SegmentID: 1,
CollectionID: 1,
})
assert.Nil(t, err)
//Init scheduler //Init scheduler
indexLoadSch := NewIndexLoadScheduler(ctx, loadIndexClient, meta) indexLoadSch := NewIndexLoadScheduler(ctx, loadIndexClient, meta)
indexBuildSch := NewIndexBuildScheduler(ctx, buildIndexClient, meta, indexLoadSch) indexBuildSch := NewIndexBuildScheduler(ctx, buildIndexClient, meta, indexLoadSch)

View File

@ -13,11 +13,6 @@ import (
"github.com/zilliztech/milvus-distributed/internal/util/typeutil" "github.com/zilliztech/milvus-distributed/internal/util/typeutil"
) )
const (
Ts = "ts"
DDL = "ddl"
)
type ( type (
UniqueID = typeutil.UniqueID UniqueID = typeutil.UniqueID
FieldID = typeutil.UniqueID FieldID = typeutil.UniqueID
@ -59,6 +54,13 @@ func (b Blob) GetValue() []byte {
return b.Value return b.Value
} }
type Base struct {
Version int
CommitID int
TenantID UniqueID
Schema *etcdpb.CollectionMeta
}
type FieldData interface{} type FieldData interface{}
type BoolFieldData struct { type BoolFieldData struct {
@ -120,14 +122,10 @@ type InsertData struct {
// Blob key example: // Blob key example:
// ${tenant}/insert_log/${collection_id}/${partition_id}/${segment_id}/${field_id}/${log_idx} // ${tenant}/insert_log/${collection_id}/${partition_id}/${segment_id}/${field_id}/${log_idx}
type InsertCodec struct { type InsertCodec struct {
Schema *etcdpb.CollectionMeta Base
readerCloseFunc []func() error readerCloseFunc []func() error
} }
func NewInsertCodec(schema *etcdpb.CollectionMeta) *InsertCodec {
return &InsertCodec{Schema: schema}
}
func (insertCodec *InsertCodec) Serialize(partitionID UniqueID, segmentID UniqueID, data *InsertData) ([]*Blob, error) { func (insertCodec *InsertCodec) Serialize(partitionID UniqueID, segmentID UniqueID, data *InsertData) ([]*Blob, error) {
var blobs []*Blob var blobs []*Blob
var writer *InsertBinlogWriter var writer *InsertBinlogWriter
@ -427,59 +425,20 @@ func (insertCodec *InsertCodec) Close() error {
} }
// Blob key example: // Blob key example:
// ${tenant}/data_definition_log/${collection_id}/ts/${log_idx} // ${tenant}/data_definition_log/${collection_id}/${field_type}/${log_idx}
// ${tenant}/data_definition_log/${collection_id}/ddl/${log_idx}
type DataDefinitionCodec struct { type DataDefinitionCodec struct {
collectionID int64 Base
readerCloseFunc []func() error readerCloseFunc []func() error
} }
func NewDataDefinitionCodec(collectionID int64) *DataDefinitionCodec {
return &DataDefinitionCodec{collectionID: collectionID}
}
func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequests []string, eventTypes []EventTypeCode) ([]*Blob, error) { func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequests []string, eventTypes []EventTypeCode) ([]*Blob, error) {
writer, err := NewDDLBinlogWriter(schemapb.DataType_INT64, dataDefinitionCodec.collectionID) writer, err := NewDDLBinlogWriter(schemapb.DataType_STRING, dataDefinitionCodec.Schema.ID)
if err != nil { if err != nil {
return nil, err return nil, err
} }
var blobs []*Blob var blobs []*Blob
eventWriter, err := writer.NextCreateCollectionEventWriter()
if err != nil {
return nil, err
}
var int64Ts []int64
for _, singleTs := range ts {
int64Ts = append(int64Ts, int64(singleTs))
}
err = eventWriter.AddInt64ToPayload(int64Ts)
if err != nil {
return nil, err
}
eventWriter.SetStartTimestamp(ts[0])
eventWriter.SetEndTimestamp(ts[len(ts)-1])
writer.SetStartTimeStamp(ts[0])
writer.SetEndTimeStamp(ts[len(ts)-1])
err = writer.Close()
if err != nil {
return nil, err
}
buffer, err := writer.GetBuffer()
if err != nil {
return nil, err
}
blobs = append(blobs, &Blob{
Key: Ts,
Value: buffer,
})
writer, err = NewDDLBinlogWriter(schemapb.DataType_STRING, dataDefinitionCodec.collectionID)
if err != nil {
return nil, err
}
for pos, req := range ddRequests { for pos, req := range ddRequests {
switch eventTypes[pos] { switch eventTypes[pos] {
case CreateCollectionEventType: case CreateCollectionEventType:
@ -534,12 +493,46 @@ func (dataDefinitionCodec *DataDefinitionCodec) Serialize(ts []Timestamp, ddRequ
if err != nil { if err != nil {
return nil, err return nil, err
} }
buffer, err := writer.GetBuffer()
if err != nil {
return nil, err
}
blobs = append(blobs, &Blob{
Key: "",
Value: buffer,
})
writer, err = NewDDLBinlogWriter(schemapb.DataType_INT64, dataDefinitionCodec.Schema.ID)
if err != nil {
return nil, err
}
eventWriter, err := writer.NextCreateCollectionEventWriter()
if err != nil {
return nil, err
}
var int64Ts []int64
for _, singleTs := range ts {
int64Ts = append(int64Ts, int64(singleTs))
}
err = eventWriter.AddInt64ToPayload(int64Ts)
if err != nil {
return nil, err
}
eventWriter.SetStartTimestamp(ts[0])
eventWriter.SetEndTimestamp(ts[len(ts)-1])
writer.SetStartTimeStamp(ts[0])
writer.SetEndTimeStamp(ts[len(ts)-1])
err = writer.Close()
if err != nil {
return nil, err
}
buffer, err = writer.GetBuffer() buffer, err = writer.GetBuffer()
if err != nil { if err != nil {
return nil, err return nil, err
} }
blobs = append(blobs, &Blob{ blobs = append(blobs, &Blob{
Key: DDL, Key: "",
Value: buffer, Value: buffer,
}) })
@ -627,10 +620,7 @@ func (dataDefinitionCodec *DataDefinitionCodec) Close() error {
//func (indexCodec *IndexCodec) Deserialize(blobs []*Blob) ([]*Blob, error) {} //func (indexCodec *IndexCodec) Deserialize(blobs []*Blob) ([]*Blob, error) {}
type IndexCodec struct { type IndexCodec struct {
} Base
func NewIndexCodec() *IndexCodec {
return &IndexCodec{}
} }
func (indexCodec *IndexCodec) Serialize(blobs []*Blob) ([]*Blob, error) { func (indexCodec *IndexCodec) Serialize(blobs []*Blob) ([]*Blob, error) {

View File

@ -10,104 +10,112 @@ import (
) )
func TestInsertCodec(t *testing.T) { func TestInsertCodec(t *testing.T) {
Schema := &etcdpb.CollectionMeta{ base := Base{
ID: 1, Version: 1,
CreateTime: 1, CommitID: 1,
SegmentIDs: []int64{0, 1}, TenantID: 1,
PartitionTags: []string{"partition_0", "partition_1"}, Schema: &etcdpb.CollectionMeta{
Schema: &schemapb.CollectionSchema{ ID: 1,
Name: "schema", CreateTime: 1,
Description: "schema", SegmentIDs: []int64{0, 1},
AutoID: true, PartitionTags: []string{"partition_0", "partition_1"},
Fields: []*schemapb.FieldSchema{ Schema: &schemapb.CollectionSchema{
{ Name: "schema",
FieldID: 0, Description: "schema",
Name: "row_id", AutoID: true,
IsPrimaryKey: false, Fields: []*schemapb.FieldSchema{
Description: "row_id", {
DataType: schemapb.DataType_INT64, FieldID: 0,
}, Name: "row_id",
{ IsPrimaryKey: false,
FieldID: 1, Description: "row_id",
Name: "Ts", DataType: schemapb.DataType_INT64,
IsPrimaryKey: false, },
Description: "Ts", {
DataType: schemapb.DataType_INT64, FieldID: 1,
}, Name: "Ts",
{ IsPrimaryKey: false,
FieldID: 100, Description: "Ts",
Name: "field_bool", DataType: schemapb.DataType_INT64,
IsPrimaryKey: false, },
Description: "description_2", {
DataType: schemapb.DataType_BOOL, FieldID: 100,
}, Name: "field_bool",
{ IsPrimaryKey: false,
FieldID: 101, Description: "description_2",
Name: "field_int8", DataType: schemapb.DataType_BOOL,
IsPrimaryKey: false, },
Description: "description_3", {
DataType: schemapb.DataType_INT8, FieldID: 101,
}, Name: "field_int8",
{ IsPrimaryKey: false,
FieldID: 102, Description: "description_3",
Name: "field_int16", DataType: schemapb.DataType_INT8,
IsPrimaryKey: false, },
Description: "description_4", {
DataType: schemapb.DataType_INT16, FieldID: 102,
}, Name: "field_int16",
{ IsPrimaryKey: false,
FieldID: 103, Description: "description_4",
Name: "field_int32", DataType: schemapb.DataType_INT16,
IsPrimaryKey: false, },
Description: "description_5", {
DataType: schemapb.DataType_INT32, FieldID: 103,
}, Name: "field_int32",
{ IsPrimaryKey: false,
FieldID: 104, Description: "description_5",
Name: "field_int64", DataType: schemapb.DataType_INT32,
IsPrimaryKey: false, },
Description: "description_6", {
DataType: schemapb.DataType_INT64, FieldID: 104,
}, Name: "field_int64",
{ IsPrimaryKey: false,
FieldID: 105, Description: "description_6",
Name: "field_float", DataType: schemapb.DataType_INT64,
IsPrimaryKey: false, },
Description: "description_7", {
DataType: schemapb.DataType_FLOAT, FieldID: 105,
}, Name: "field_float",
{ IsPrimaryKey: false,
FieldID: 106, Description: "description_7",
Name: "field_double", DataType: schemapb.DataType_FLOAT,
IsPrimaryKey: false, },
Description: "description_8", {
DataType: schemapb.DataType_DOUBLE, FieldID: 106,
}, Name: "field_double",
{ IsPrimaryKey: false,
FieldID: 107, Description: "description_8",
Name: "field_string", DataType: schemapb.DataType_DOUBLE,
IsPrimaryKey: false, },
Description: "description_9", {
DataType: schemapb.DataType_STRING, FieldID: 107,
}, Name: "field_string",
{ IsPrimaryKey: false,
FieldID: 108, Description: "description_9",
Name: "field_binary_vector", DataType: schemapb.DataType_STRING,
IsPrimaryKey: false, },
Description: "description_10", {
DataType: schemapb.DataType_VECTOR_BINARY, FieldID: 108,
}, Name: "field_binary_vector",
{ IsPrimaryKey: false,
FieldID: 109, Description: "description_10",
Name: "field_float_vector", DataType: schemapb.DataType_VECTOR_BINARY,
IsPrimaryKey: false, },
Description: "description_11", {
DataType: schemapb.DataType_VECTOR_FLOAT, FieldID: 109,
Name: "field_float_vector",
IsPrimaryKey: false,
Description: "description_11",
DataType: schemapb.DataType_VECTOR_FLOAT,
},
}, },
}, },
}, },
} }
insertCodec := NewInsertCodec(Schema) insertCodec := &InsertCodec{
base,
make([]func() error, 0),
}
insertDataFirst := &InsertData{ insertDataFirst := &InsertData{
Data: map[int64]FieldData{ Data: map[int64]FieldData{
0: &Int64FieldData{ 0: &Int64FieldData{
@ -260,7 +268,58 @@ func TestInsertCodec(t *testing.T) {
assert.Nil(t, insertCodec.Close()) assert.Nil(t, insertCodec.Close())
} }
func TestDDCodec(t *testing.T) { func TestDDCodec(t *testing.T) {
dataDefinitionCodec := NewDataDefinitionCodec(int64(1)) base := Base{
Version: 1,
CommitID: 1,
TenantID: 1,
Schema: &etcdpb.CollectionMeta{
ID: 1,
CreateTime: 1,
SegmentIDs: []int64{0, 1},
PartitionTags: []string{"partition_0", "partition_1"},
Schema: &schemapb.CollectionSchema{
Name: "schema",
Description: "schema",
AutoID: true,
Fields: []*schemapb.FieldSchema{
{
Name: "field_1",
IsPrimaryKey: false,
Description: "description_1",
DataType: schemapb.DataType_INT32,
},
{
Name: "field_2",
IsPrimaryKey: false,
Description: "description_1",
DataType: schemapb.DataType_INT64,
},
{
Name: "field_3",
IsPrimaryKey: false,
Description: "description_1",
DataType: schemapb.DataType_STRING,
},
{
Name: "field_3",
IsPrimaryKey: false,
Description: "description_1",
DataType: schemapb.DataType_STRING,
},
{
Name: "field_3",
IsPrimaryKey: false,
Description: "description_1",
DataType: schemapb.DataType_STRING,
},
},
},
},
}
dataDefinitionCodec := &DataDefinitionCodec{
base,
make([]func() error, 0),
}
ts := []Timestamp{ ts := []Timestamp{
1, 1,
2, 2,
@ -292,7 +351,9 @@ func TestDDCodec(t *testing.T) {
} }
func TestIndexCodec(t *testing.T) { func TestIndexCodec(t *testing.T) {
indexCodec := NewIndexCodec() indexCodec := &IndexCodec{
Base{},
}
blobs := []*Blob{ blobs := []*Blob{
{ {
"12345", "12345",