Init params and add segment start/end position

Signed-off-by: neza2017 <yefu.chen@zilliz.com>
pull/4973/head^2
neza2017 2021-01-26 09:43:41 +08:00 committed by yefu.chen
parent f940cc455a
commit 5af23cf018
6 changed files with 250 additions and 109 deletions

View File

@ -29,6 +29,10 @@ msgChannel:
queryNodeStats: "query-node-stats"
# cmd for loadIndex, flush, etc...
cmd: "cmd"
dataServiceInsertChannel: "insert-channel-"
dataServiceStatistic: "dataservice-statistics-channel"
dataServiceTimeTick: "dataservice-timetick-channel"
dataServiceSegmentInfo: "segment-info-channel"
# sub name generation rule: ${subNamePrefix}-${NodeID}
subNamePrefix:
@ -37,6 +41,7 @@ msgChannel:
queryNodeSubNamePrefix: "queryNode"
writeNodeSubNamePrefix: "writeNode" # GOOSE TODO: remove this
dataNodeSubNamePrefix: "dataNode"
dataServiceSubNamePrefix: "dataService"
# default channel range [0, 1)
channelRange:

View File

@ -0,0 +1,13 @@
dataservice:
nodeID: 14040
address: "127.0.0.1"
port: 13333
segment:
# old name: segmentThreshold: 536870912
size: 512 # MB
sizeFactor: 0.75
defaultSizePerRecord: 1024
# old name: segmentExpireDuration: 2000
IDAssignExpiration: 2000 # ms
insertChannelNumPerCollection: 4
dataNodeNum: 2

View File

@ -1,6 +1,8 @@
package dataservice
import (
"strconv"
"github.com/zilliztech/milvus-distributed/internal/util/paramtable"
)
@ -11,8 +13,6 @@ type ParamTable struct {
Port int
NodeID int64
MasterAddress string
EtcdAddress string
MetaRootPath string
KvRootPath string
@ -31,6 +31,7 @@ type ParamTable struct {
DataNodeNum int
SegmentInfoChannelName string
DataServiceSubscriptionName string
K2SChannelNames []string
}
var Params ParamTable
@ -39,15 +40,14 @@ func (p *ParamTable) Init() {
// load yaml
p.BaseTable.Init()
err := p.LoadYaml("advanced/master.yaml")
if err != nil {
if err := p.LoadYaml("advanced/data_service.yaml"); err != nil {
panic(err)
}
// set members
p.initAddress()
p.initPort()
p.NodeID = 1 // todo
p.initNodeID()
p.initEtcdAddress()
p.initMetaRootPath()
@ -68,15 +68,19 @@ func (p *ParamTable) Init() {
}
func (p *ParamTable) initAddress() {
masterAddress, err := p.Load("master.address")
dataserviceAddress, err := p.Load("dataservice.address")
if err != nil {
panic(err)
}
p.Address = masterAddress
p.Address = dataserviceAddress
}
func (p *ParamTable) initPort() {
p.Port = p.ParseInt("master.port")
p.Port = p.ParseInt("dataservice.port")
}
func (p *ParamTable) initNodeID() {
p.NodeID = p.ParseInt64("dataservice.nodeID")
}
func (p *ParamTable) initEtcdAddress() {
@ -119,46 +123,83 @@ func (p *ParamTable) initKvRootPath() {
p.KvRootPath = rootPath + "/" + subPath
}
func (p *ParamTable) initSegmentSize() {
p.SegmentSize = p.ParseFloat("master.segment.size")
p.SegmentSize = p.ParseFloat("dataservice.segment.size")
}
func (p *ParamTable) initSegmentSizeFactor() {
p.SegmentSizeFactor = p.ParseFloat("master.segment.sizeFactor")
p.SegmentSizeFactor = p.ParseFloat("dataservice.segment.sizeFactor")
}
func (p *ParamTable) initDefaultRecordSize() {
p.DefaultRecordSize = p.ParseInt64("master.segment.defaultSizePerRecord")
p.DefaultRecordSize = p.ParseInt64("dataservice.segment.defaultSizePerRecord")
}
// TODO read from config/env
func (p *ParamTable) initSegIDAssignExpiration() {
p.SegIDAssignExpiration = 3000 //ms
p.SegIDAssignExpiration = p.ParseInt64("dataservice.segment.IDAssignExpiration") //ms
}
func (p *ParamTable) initInsertChannelPrefixName() {
p.InsertChannelPrefixName = "insert-channel-"
var err error
p.InsertChannelPrefixName, err = p.Load("msgChannel.chanNamePrefix.dataServiceInsertChannel")
if err != nil {
panic(err)
}
}
func (p *ParamTable) initInsertChannelNumPerCollection() {
p.InsertChannelNumPerCollection = 4
p.InsertChannelNumPerCollection = p.ParseInt64("dataservice.insertChannelNumPerCollection")
}
func (p *ParamTable) initStatisticsChannelName() {
p.StatisticsChannelName = "dataservice-statistics-channel"
var err error
p.StatisticsChannelName, err = p.Load("msgChannel.chanNamePrefix.dataServiceStatistic")
if err != nil {
panic(err)
}
}
func (p *ParamTable) initTimeTickChannelName() {
p.TimeTickChannelName = "dataservice-timetick-channel"
var err error
p.TimeTickChannelName, err = p.Load("msgChannel.chanNamePrefix.dataServiceTimeTick")
if err != nil {
panic(err)
}
}
func (p *ParamTable) initDataNodeNum() {
p.DataNodeNum = 2
p.DataNodeNum = p.ParseInt("dataservice.dataNodeNum")
}
func (p *ParamTable) initSegmentInfoChannelName() {
p.SegmentInfoChannelName = "segment-info-channel"
var err error
p.SegmentInfoChannelName, err = p.Load("msgChannel.chanNamePrefix.dataServiceSegmentInfo")
if err != nil {
panic(err)
}
}
func (p *ParamTable) initDataServiceSubscriptionName() {
p.DataServiceSubscriptionName = "dataserive-sub"
var err error
p.DataServiceSubscriptionName, err = p.Load("msgChannel.chanNamePrefix.dataServiceSubNamePrefix")
if err != nil {
panic(err)
}
}
func (p *ParamTable) initK2SChannelNames() {
prefix, err := p.Load("msgChannel.chanNamePrefix.k2s")
if err != nil {
panic(err)
}
prefix += "-"
iRangeStr, err := p.Load("msgChannel.channelRange.k2s")
if err != nil {
panic(err)
}
channelIDs := paramtable.ConvertRangeToIntSlice(iRangeStr, ",")
var ret []string
for _, ID := range channelIDs {
ret = append(ret, prefix+strconv.Itoa(ID))
}
p.K2SChannelNames = ret
}

View File

@ -49,28 +49,31 @@ type (
UniqueID = typeutil.UniqueID
Timestamp = typeutil.Timestamp
Server struct {
ctx context.Context
serverLoopCtx context.Context
serverLoopCancel context.CancelFunc
serverLoopWg sync.WaitGroup
state internalpb2.StateCode
client *etcdkv.EtcdKV
meta *meta
segAllocator segmentAllocator
statsHandler *statsHandler
insertChannelMgr *insertChannelManager
allocator allocator
cluster *dataNodeCluster
msgProducer *timesync.MsgProducer
registerFinishCh chan struct{}
masterClient *masterservice.GrpcClient
ttMsgStream msgstream.MsgStream
ddChannelName string
segmentInfoStream msgstream.MsgStream
ctx context.Context
serverLoopCtx context.Context
serverLoopCancel context.CancelFunc
serverLoopWg sync.WaitGroup
state internalpb2.StateCode
client *etcdkv.EtcdKV
meta *meta
segAllocator segmentAllocator
statsHandler *statsHandler
insertChannelMgr *insertChannelManager
allocator allocator
cluster *dataNodeCluster
msgProducer *timesync.MsgProducer
registerFinishCh chan struct{}
masterClient *masterservice.GrpcClient
ttMsgStream msgstream.MsgStream
k2sMsgStream msgstream.MsgStream
ddChannelName string
segmentInfoStream msgstream.MsgStream
segmentFlushStream msgstream.MsgStream
}
)
func CreateServer(ctx context.Context, client *masterservice.GrpcClient) (*Server, error) {
Params.Init()
ch := make(chan struct{})
return &Server{
ctx: ctx,
@ -83,32 +86,29 @@ func CreateServer(ctx context.Context, client *masterservice.GrpcClient) (*Serve
}
func (s *Server) Init() error {
Params.Init()
return nil
}
func (s *Server) Start() error {
var err error
s.allocator = newAllocatorImpl(s.masterClient)
if err := s.initMeta(); err != nil {
if err = s.initMeta(); err != nil {
return err
}
s.statsHandler = newStatsHandler(s.meta)
segAllocator, err := newSegmentAllocator(s.meta, s.allocator)
s.segAllocator, err = newSegmentAllocator(s.meta, s.allocator)
if err != nil {
return err
}
s.segAllocator = segAllocator
s.waitDataNodeRegister()
if err = s.loadMetaFromMaster(); err != nil {
return err
}
s.initSegmentInfoChannel()
if err = s.initMsgProducer(); err != nil {
return err
}
s.initSegmentInfoChannel()
if err = s.loadMetaFromMaster(); err != nil {
return err
}
s.startServerLoop()
s.waitDataNodeRegister()
s.state = internalpb2.StateCode_HEALTHY
log.Println("start success")
return nil
@ -128,61 +128,6 @@ func (s *Server) initMeta() error {
return nil
}
func (s *Server) waitDataNodeRegister() {
log.Println("waiting data node to register")
<-s.registerFinishCh
log.Println("all data nodes register")
}
func (s *Server) initMsgProducer() error {
ttMsgStream := pulsarms.NewPulsarTtMsgStream(s.ctx, 1024)
ttMsgStream.SetPulsarClient(Params.PulsarAddress)
ttMsgStream.CreatePulsarConsumers([]string{Params.TimeTickChannelName}, Params.DataServiceSubscriptionName, util.NewUnmarshalDispatcher(), 1024)
s.ttMsgStream = ttMsgStream
s.ttMsgStream.Start()
timeTickBarrier := timesync.NewHardTimeTickBarrier(s.ttMsgStream, s.cluster.GetNodeIDs())
dataNodeTTWatcher := newDataNodeTimeTickWatcher(s.meta, s.segAllocator, s.cluster)
producer, err := timesync.NewTimeSyncMsgProducer(timeTickBarrier, dataNodeTTWatcher)
if err != nil {
return err
}
s.msgProducer = producer
s.msgProducer.Start(s.ctx)
return nil
}
func (s *Server) startServerLoop() {
s.serverLoopCtx, s.serverLoopCancel = context.WithCancel(s.ctx)
s.serverLoopWg.Add(1)
go s.startStatsChannel(s.serverLoopCtx)
}
func (s *Server) startStatsChannel(ctx context.Context) {
defer s.serverLoopWg.Done()
statsStream := pulsarms.NewPulsarMsgStream(ctx, 1024)
statsStream.SetPulsarClient(Params.PulsarAddress)
statsStream.CreatePulsarConsumers([]string{Params.StatisticsChannelName}, Params.DataServiceSubscriptionName, util.NewUnmarshalDispatcher(), 1024)
statsStream.Start()
defer statsStream.Close()
for {
select {
case <-ctx.Done():
return
default:
}
msgPack := statsStream.Consume()
for _, msg := range msgPack.Msgs {
statistics := msg.(*msgstream.SegmentStatisticsMsg)
for _, stat := range statistics.SegStats {
if err := s.statsHandler.HandleSegmentStat(stat); err != nil {
log.Println(err.Error())
continue
}
}
}
}
}
func (s *Server) initSegmentInfoChannel() {
segmentInfoStream := pulsarms.NewPulsarMsgStream(s.ctx, 1024)
segmentInfoStream.SetPulsarClient(Params.PulsarAddress)
@ -190,6 +135,28 @@ func (s *Server) initSegmentInfoChannel() {
s.segmentInfoStream = segmentInfoStream
s.segmentInfoStream.Start()
}
func (s *Server) initMsgProducer() error {
ttMsgStream := pulsarms.NewPulsarMsgStream(s.ctx, 1024)
ttMsgStream.SetPulsarClient(Params.PulsarAddress)
ttMsgStream.CreatePulsarConsumers([]string{Params.TimeTickChannelName}, Params.DataServiceSubscriptionName, util.NewUnmarshalDispatcher(), 1024)
s.ttMsgStream = ttMsgStream
s.ttMsgStream.Start()
timeTickBarrier := timesync.NewHardTimeTickBarrier(s.ttMsgStream, s.cluster.GetNodeIDs())
dataNodeTTWatcher := newDataNodeTimeTickWatcher(s.meta, s.segAllocator, s.cluster)
k2sStream := pulsarms.NewPulsarMsgStream(s.ctx, 1024)
k2sStream.SetPulsarClient(Params.PulsarAddress)
k2sStream.CreatePulsarProducers(Params.K2SChannelNames)
s.k2sMsgStream = k2sStream
s.k2sMsgStream.Start()
k2sMsgWatcher := timesync.NewMsgTimeTickWatcher(s.k2sMsgStream)
producer, err := timesync.NewTimeSyncMsgProducer(timeTickBarrier, dataNodeTTWatcher, k2sMsgWatcher)
if err != nil {
return err
}
s.msgProducer = producer
s.msgProducer.Start(s.ctx)
return nil
}
func (s *Server) loadMetaFromMaster() error {
log.Println("loading collection meta from master")
@ -248,9 +215,83 @@ func (s *Server) loadMetaFromMaster() error {
log.Println("load collection meta from master complete")
return nil
}
func (s *Server) startServerLoop() {
s.serverLoopCtx, s.serverLoopCancel = context.WithCancel(s.ctx)
s.serverLoopWg.Add(2)
go s.startStatsChannel(s.serverLoopCtx)
go s.startSegmentFlushChannel(s.serverLoopCtx)
}
func (s *Server) startStatsChannel(ctx context.Context) {
defer s.serverLoopWg.Done()
statsStream := pulsarms.NewPulsarMsgStream(ctx, 1024)
statsStream.SetPulsarClient(Params.PulsarAddress)
statsStream.CreatePulsarConsumers([]string{Params.StatisticsChannelName}, Params.DataServiceSubscriptionName, util.NewUnmarshalDispatcher(), 1024)
statsStream.Start()
defer statsStream.Close()
for {
select {
case <-ctx.Done():
return
default:
}
msgPack := statsStream.Consume()
for _, msg := range msgPack.Msgs {
statistics := msg.(*msgstream.SegmentStatisticsMsg)
for _, stat := range statistics.SegStats {
if err := s.statsHandler.HandleSegmentStat(stat); err != nil {
log.Println(err.Error())
continue
}
}
}
}
}
func (s *Server) startSegmentFlushChannel(ctx context.Context) {
defer s.serverLoopWg.Done()
flushStream := pulsarms.NewPulsarMsgStream(ctx, 1024)
flushStream.SetPulsarClient(Params.PulsarAddress)
flushStream.CreatePulsarConsumers([]string{Params.SegmentInfoChannelName}, Params.DataServiceSubscriptionName, util.NewUnmarshalDispatcher(), 1024)
flushStream.Start()
defer flushStream.Close()
for {
select {
case <-ctx.Done():
log.Println("segment flush channel shut down")
return
default:
}
msgPack := flushStream.Consume()
for _, msg := range msgPack.Msgs {
if msg.Type() != commonpb.MsgType_kSegmentFlushDone {
continue
}
realMsg := msg.(*msgstream.FlushCompletedMsg)
segmentInfo, err := s.meta.GetSegment(realMsg.SegmentID)
if err != nil {
log.Println(err.Error())
continue
}
segmentInfo.FlushedTime = realMsg.BeginTimestamp
if err = s.meta.UpdateSegment(segmentInfo); err != nil {
log.Println(err.Error())
continue
}
}
}
}
func (s *Server) waitDataNodeRegister() {
log.Println("waiting data node to register")
<-s.registerFinishCh
log.Println("all data nodes register")
}
func (s *Server) Stop() error {
s.ttMsgStream.Close()
s.k2sMsgStream.Close()
s.msgProducer.Close()
s.segmentInfoStream.Close()
s.stopServerLoop()
@ -398,6 +439,23 @@ func (s *Server) openNewSegment(collectionID UniqueID, partitionID UniqueID, cha
if err = s.segAllocator.OpenSegment(segmentInfo); err != nil {
return err
}
infoMsg := &msgstream.SegmentInfoMsg{
SegmentMsg: datapb.SegmentMsg{
Base: &commonpb.MsgBase{
MsgType: commonpb.MsgType_kSegmentInfo,
MsgID: 0,
Timestamp: 0, // todo
SourceID: 0,
},
Segment: segmentInfo,
},
}
msgPack := &pulsarms.MsgPack{
Msgs: []msgstream.TsMsg{infoMsg},
}
if err = s.segmentInfoStream.Produce(msgPack); err != nil {
return err
}
return nil
}
@ -422,7 +480,8 @@ func (s *Server) GetSegmentStates(req *datapb.SegmentStatesRequest) (*datapb.Seg
resp.CreateTime = segmentInfo.OpenTime
resp.SealedTime = segmentInfo.SealedTime
resp.FlushedTime = segmentInfo.FlushedTime
// TODO start/end positions
resp.StartPositions = segmentInfo.StartPosition
resp.EndPositions = segmentInfo.EndPosition
return resp, nil
}

View File

@ -20,10 +20,25 @@ func (handler *statsHandler) HandleSegmentStat(segStats *internalpb2.SegmentStat
return err
}
//if segStats.IsNewSegment {
// segMeta.OpenTime = segStats.CreateTime
// segMeta.segStats.StartPositions
//}
if segStats.IsNewSegment {
segMeta.OpenTime = segStats.CreateTime
segMeta.StartPosition = append(segMeta.StartPosition, segStats.StartPositions...)
}
segMeta.SealedTime = segStats.EndTime
for _, pos := range segStats.EndPositions {
isNew := true
for _, epos := range segMeta.EndPosition {
if epos.ChannelName == pos.ChannelName {
epos.Timestamp = pos.Timestamp
epos.MsgID = pos.MsgID
isNew = false
break
}
}
if isNew {
segMeta.EndPosition = append(segMeta.EndPosition, pos)
}
}
segMeta.NumRows = segStats.NumRows
segMeta.MemSize = segStats.MemorySize

View File

@ -17,6 +17,14 @@ type MsgTimeTickWatcher struct {
msgQueue chan *ms.TimeTickMsg
}
func NewMsgTimeTickWatcher(streams ...ms.MsgStream) *MsgTimeTickWatcher {
watcher := &MsgTimeTickWatcher{
streams: streams,
msgQueue: make(chan *ms.TimeTickMsg),
}
return watcher
}
func (watcher *MsgTimeTickWatcher) Watch(msg *ms.TimeTickMsg) {
watcher.msgQueue <- msg
}