mirror of https://github.com/milvus-io/milvus.git
Refactor flush scheduler
Signed-off-by: sunby <bingyi.sun@zilliz.com>pull/4973/head^2
parent
d5d9fa03ea
commit
95b162ccfd
|
@ -68,6 +68,18 @@ func (kv *EtcdKV) Load(key string) (string, error) {
|
|||
return string(resp.Kvs[0].Value), nil
|
||||
}
|
||||
|
||||
func (kv *EtcdKV) GetCount(key string) (int64, error) {
|
||||
key = path.Join(kv.rootPath, key)
|
||||
ctx, cancel := context.WithTimeout(context.TODO(), RequestTimeout)
|
||||
defer cancel()
|
||||
resp, err := kv.client.Get(ctx, key)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
return resp.Count, nil
|
||||
}
|
||||
|
||||
func (kv *EtcdKV) MultiLoad(keys []string) ([]string, error) {
|
||||
ops := make([]clientv3.Op, 0, len(keys))
|
||||
for _, keyLoad := range keys {
|
||||
|
|
|
@ -90,12 +90,12 @@ func (m *MockBuildIndexClient) GetIndexFilePaths(indexID UniqueID) ([]string, er
|
|||
}
|
||||
|
||||
type LoadIndexClient interface {
|
||||
LoadIndex(indexPaths []string, segmentID int64, fieldID int64, fieldName string) error
|
||||
LoadIndex(indexPaths []string, segmentID int64, fieldID int64, fieldName string, indexParams map[string]string) error
|
||||
}
|
||||
|
||||
type MockLoadIndexClient struct {
|
||||
}
|
||||
|
||||
func (m *MockLoadIndexClient) LoadIndex(indexPaths []string, segmentID int64, fieldID int64, fieldName string) error {
|
||||
func (m *MockLoadIndexClient) LoadIndex(indexPaths []string, segmentID int64, fieldID int64, fieldName string, indexParams map[string]string) error {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -48,6 +48,9 @@ func (scheduler *FlushScheduler) schedule(id interface{}) error {
|
|||
return nil
|
||||
}
|
||||
func (scheduler *FlushScheduler) describe() error {
|
||||
timeTick := time.Tick(100 * time.Millisecond)
|
||||
descTasks := make(map[UniqueID]bool)
|
||||
closable := make([]UniqueID, 0)
|
||||
for {
|
||||
select {
|
||||
case <-scheduler.ctx.Done():
|
||||
|
@ -55,62 +58,72 @@ func (scheduler *FlushScheduler) describe() error {
|
|||
log.Printf("broadcast context done, exit")
|
||||
return errors.New("broadcast done exit")
|
||||
}
|
||||
case singleSegmentID := <-scheduler.segmentDescribeChan:
|
||||
for {
|
||||
case <-timeTick:
|
||||
for singleSegmentID := range descTasks {
|
||||
description, err := scheduler.client.DescribeSegment(singleSegmentID)
|
||||
if err != nil {
|
||||
log.Printf("describe segment %d err %s", singleSegmentID, err.Error())
|
||||
continue
|
||||
}
|
||||
if !description.IsClosed {
|
||||
continue
|
||||
}
|
||||
|
||||
log.Printf("flush segment %d is closed", singleSegmentID)
|
||||
mapData, err := scheduler.client.GetInsertBinlogPaths(singleSegmentID)
|
||||
if err != nil {
|
||||
log.Printf("get insert binlog paths err, segID: %d, err: %s", singleSegmentID, err.Error())
|
||||
continue
|
||||
}
|
||||
segMeta, err := scheduler.metaTable.GetSegmentByID(singleSegmentID)
|
||||
if err != nil {
|
||||
log.Printf("get segment from metable failed, segID: %d, err: %s", singleSegmentID, err.Error())
|
||||
continue
|
||||
}
|
||||
for fieldID, data := range mapData {
|
||||
// check field indexable
|
||||
indexable, err := scheduler.metaTable.IsIndexable(segMeta.CollectionID, fieldID)
|
||||
if err != nil {
|
||||
log.Printf("check field indexable from meta table failed, collID: %d, fieldID: %d, err %s", segMeta.CollectionID, fieldID, err.Error())
|
||||
continue
|
||||
}
|
||||
if !indexable {
|
||||
continue
|
||||
}
|
||||
info := &IndexBuildInfo{
|
||||
segmentID: singleSegmentID,
|
||||
fieldID: fieldID,
|
||||
binlogFilePath: data,
|
||||
}
|
||||
err = scheduler.indexBuilderSch.Enqueue(info)
|
||||
log.Printf("segment %d field %d enqueue build index scheduler", singleSegmentID, fieldID)
|
||||
if err != nil {
|
||||
log.Printf("index build enqueue failed, %s", err.Error())
|
||||
continue
|
||||
}
|
||||
}
|
||||
// Save data to meta table
|
||||
segMeta.BinlogFilePaths = make([]*etcdpb.FieldBinlogFiles, 0)
|
||||
for k, v := range mapData {
|
||||
segMeta.BinlogFilePaths = append(segMeta.BinlogFilePaths, &etcdpb.FieldBinlogFiles{
|
||||
FieldID: k,
|
||||
BinlogFiles: v,
|
||||
})
|
||||
}
|
||||
if err = scheduler.metaTable.UpdateSegment(segMeta); err != nil {
|
||||
return err
|
||||
}
|
||||
if description.IsClosed {
|
||||
log.Printf("flush segment %d is closed", singleSegmentID)
|
||||
mapData, err := scheduler.client.GetInsertBinlogPaths(singleSegmentID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for fieldID, data := range mapData {
|
||||
// check field indexable
|
||||
segMeta, err := scheduler.metaTable.GetSegmentByID(singleSegmentID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
indexable, err := scheduler.metaTable.IsIndexable(segMeta.CollectionID, fieldID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !indexable {
|
||||
continue
|
||||
}
|
||||
info := &IndexBuildInfo{
|
||||
segmentID: singleSegmentID,
|
||||
fieldID: fieldID,
|
||||
binlogFilePath: data,
|
||||
}
|
||||
err = scheduler.indexBuilderSch.Enqueue(info)
|
||||
log.Printf("segment %d field %d enqueue build index scheduler", singleSegmentID, fieldID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Save data to meta table
|
||||
segMeta, err := scheduler.metaTable.GetSegmentByID(singleSegmentID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
segMeta.BinlogFilePaths = make([]*etcdpb.FieldBinlogFiles, 0)
|
||||
for k, v := range mapData {
|
||||
segMeta.BinlogFilePaths = append(segMeta.BinlogFilePaths, &etcdpb.FieldBinlogFiles{
|
||||
FieldID: k,
|
||||
BinlogFiles: v,
|
||||
})
|
||||
}
|
||||
if err = scheduler.metaTable.UpdateSegment(segMeta); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Printf("flush segment %d finished", singleSegmentID)
|
||||
break
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
log.Printf("flush segment %d finished", singleSegmentID)
|
||||
closable = append(closable, singleSegmentID)
|
||||
}
|
||||
|
||||
// remove closed segment and clear closable
|
||||
for _, segID := range closable {
|
||||
delete(descTasks, segID)
|
||||
}
|
||||
closable = closable[:0]
|
||||
case segID := <-scheduler.segmentDescribeChan:
|
||||
descTasks[segID] = false
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -133,6 +133,7 @@ func (scheduler *IndexBuildScheduler) describe() error {
|
|||
fieldID: indexBuildInfo.fieldID,
|
||||
fieldName: fieldName,
|
||||
indexFilePaths: filePaths,
|
||||
indexParams: channelInfo.indexParams,
|
||||
}
|
||||
// Save data to meta table
|
||||
err = scheduler.metaTable.UpdateFieldIndexMeta(&etcdpb.FieldIndexMeta{
|
||||
|
|
|
@ -3,12 +3,15 @@ package master
|
|||
import (
|
||||
"context"
|
||||
"log"
|
||||
|
||||
"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
|
||||
)
|
||||
|
||||
type IndexLoadInfo struct {
|
||||
segmentID UniqueID
|
||||
fieldID UniqueID
|
||||
fieldName string
|
||||
indexParams []*commonpb.KeyValuePair
|
||||
indexFilePaths []string
|
||||
}
|
||||
|
||||
|
@ -36,7 +39,11 @@ func NewIndexLoadScheduler(ctx context.Context, client LoadIndexClient, metaTabl
|
|||
|
||||
func (scheduler *IndexLoadScheduler) schedule(info interface{}) error {
|
||||
indexLoadInfo := info.(*IndexLoadInfo)
|
||||
err := scheduler.client.LoadIndex(indexLoadInfo.indexFilePaths, indexLoadInfo.segmentID, indexLoadInfo.fieldID, indexLoadInfo.fieldName)
|
||||
indexParams := make(map[string]string)
|
||||
for _, kv := range indexLoadInfo.indexParams {
|
||||
indexParams[kv.Key] = kv.Value
|
||||
}
|
||||
err := scheduler.client.LoadIndex(indexLoadInfo.indexFilePaths, indexLoadInfo.segmentID, indexLoadInfo.fieldID, indexLoadInfo.fieldName, indexParams)
|
||||
//TODO: Save data to meta table
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
|
@ -68,6 +68,7 @@ func (task *createIndexTask) Execute() error {
|
|||
fieldID: fieldID,
|
||||
fieldName: task.req.FieldName,
|
||||
indexFilePaths: indexMeta.IndexFilePaths,
|
||||
indexParams: indexMeta.IndexParams,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
|
@ -10,6 +10,12 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/zilliztech/milvus-distributed/internal/querynode/client"
|
||||
|
||||
indexbuilderclient "github.com/zilliztech/milvus-distributed/internal/indexbuilder/client"
|
||||
|
||||
writerclient "github.com/zilliztech/milvus-distributed/internal/writenode/client"
|
||||
|
||||
etcdkv "github.com/zilliztech/milvus-distributed/internal/kv/etcd"
|
||||
ms "github.com/zilliztech/milvus-distributed/internal/msgstream"
|
||||
"github.com/zilliztech/milvus-distributed/internal/proto/masterpb"
|
||||
|
@ -175,9 +181,15 @@ func CreateServer(ctx context.Context) (*Master, error) {
|
|||
m.scheduler.SetDDMsgStream(pulsarDDStream)
|
||||
m.scheduler.SetIDAllocator(func() (UniqueID, error) { return m.idAllocator.AllocOne() })
|
||||
|
||||
flushClient := &MockWriteNodeClient{}
|
||||
buildIndexClient := &MockBuildIndexClient{}
|
||||
loadIndexClient := &MockLoadIndexClient{}
|
||||
flushClient, err := writerclient.NewWriterClient(Params.EtcdAddress, kvRootPath, Params.WriteNodeSegKvSubPath, pulsarDDStream)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
buildIndexClient, err := indexbuilderclient.NewBuildIndexClient(ctx, Params.IndexBuilderAddress)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
loadIndexClient := client.NewLoadIndexClient(ctx, Params.PulsarAddress, Params.LoadIndexChannelNames)
|
||||
|
||||
m.indexLoadSch = NewIndexLoadScheduler(ctx, loadIndexClient, m.metaTable)
|
||||
m.indexBuildSch = NewIndexBuildScheduler(ctx, buildIndexClient, m.metaTable, m.indexLoadSch)
|
||||
|
|
|
@ -50,6 +50,8 @@ type ParamTable struct {
|
|||
|
||||
MaxPartitionNum int64
|
||||
DefaultPartitionTag string
|
||||
|
||||
LoadIndexChannelNames []string
|
||||
}
|
||||
|
||||
var Params ParamTable
|
||||
|
@ -97,6 +99,8 @@ func (p *ParamTable) Init() {
|
|||
p.initMsgChannelSubName()
|
||||
p.initMaxPartitionNum()
|
||||
p.initDefaultPartitionTag()
|
||||
|
||||
p.initLoadIndexChannelNames()
|
||||
}
|
||||
|
||||
func (p *ParamTable) initAddress() {
|
||||
|
@ -356,3 +360,11 @@ func (p *ParamTable) initDefaultPartitionTag() {
|
|||
|
||||
p.DefaultPartitionTag = defaultTag
|
||||
}
|
||||
|
||||
func (p *ParamTable) initLoadIndexChannelNames() {
|
||||
loadIndexChannelName, err := p.Load("msgChannel.chanNamePrefix.cmd")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
p.LoadIndexChannelNames = []string{loadIndexChannelName}
|
||||
}
|
||||
|
|
|
@ -236,7 +236,7 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
|
|||
return err
|
||||
}
|
||||
for i, v := range val {
|
||||
fmt.Printf("\t\t%d : %f\n", i, v)
|
||||
fmt.Printf("\t\t%d : %v\n", i, v)
|
||||
}
|
||||
case schemapb.DataType_STRING:
|
||||
rows, err := reader.GetPayloadLengthFromReader()
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"github.com/golang/protobuf/proto"
|
||||
"go.etcd.io/etcd/clientv3"
|
||||
|
||||
"github.com/zilliztech/milvus-distributed/internal/errors"
|
||||
"github.com/zilliztech/milvus-distributed/internal/kv"
|
||||
etcdkv "github.com/zilliztech/milvus-distributed/internal/kv/etcd"
|
||||
"github.com/zilliztech/milvus-distributed/internal/msgstream"
|
||||
|
@ -79,6 +80,21 @@ func (c *Client) DescribeSegment(segmentID UniqueID) (*SegmentDescription, error
|
|||
}
|
||||
|
||||
key := c.kvPrefix + strconv.FormatInt(segmentID, 10)
|
||||
|
||||
etcdKV, ok := c.kvClient.(*etcdkv.EtcdKV)
|
||||
if !ok {
|
||||
return nil, errors.New("type assertion failed for etcd kv")
|
||||
}
|
||||
count, err := etcdKV.GetCount(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if count <= 0 {
|
||||
ret.IsClosed = false
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
value, err := c.kvClient.Load(key)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
|
|
Loading…
Reference in New Issue