Refactor data service segment allocator

Signed-off-by: sunby <bingyi.sun@zilliz.com>
pull/4973/head^2
sunby 2021-04-12 16:35:51 +08:00 committed by yefu.chen
parent e760605187
commit 61edc53faf
5 changed files with 176 additions and 88 deletions

View File

@ -4,6 +4,8 @@ import (
"context"
"github.com/zilliztech/milvus-distributed/internal/proto/datapb"
"github.com/zilliztech/milvus-distributed/internal/proto/milvuspb"
"github.com/zilliztech/milvus-distributed/internal/types"
"github.com/golang/protobuf/proto"
"github.com/zilliztech/milvus-distributed/internal/msgstream"
@ -14,12 +16,14 @@ import (
type ddHandler struct {
meta *meta
segmentAllocator segmentAllocatorInterface
masterClient types.MasterService
}
func newDDHandler(meta *meta, allocator segmentAllocatorInterface) *ddHandler {
func newDDHandler(meta *meta, allocator segmentAllocatorInterface, client types.MasterService) *ddHandler {
return &ddHandler{
meta: meta,
segmentAllocator: allocator,
masterClient: client,
}
}
@ -47,9 +51,24 @@ func (handler *ddHandler) handleCreateCollection(msg *msgstream.CreateCollection
if err := proto.Unmarshal(msg.Schema, schema); err != nil {
return err
}
err := handler.meta.AddCollection(&datapb.CollectionInfo{
ID: msg.CollectionID,
Schema: schema,
presp, err := handler.masterClient.ShowPartitions(context.TODO(), &milvuspb.ShowPartitionsRequest{
Base: &commonpb.MsgBase{
MsgType: commonpb.MsgType_ShowPartitions,
MsgID: -1, // todo
Timestamp: 0, // todo
SourceID: Params.NodeID,
},
DbName: "",
CollectionName: schema.Name,
CollectionID: msg.CollectionID,
})
if err = VerifyResponse(presp, err); err != nil {
return err
}
err = handler.meta.AddCollection(&datapb.CollectionInfo{
ID: msg.CollectionID,
Schema: schema,
Partitions: presp.PartitionIDs,
})
if err != nil {
return err

View File

@ -347,6 +347,21 @@ func (meta *meta) DropPartition(collID UniqueID, partitionID UniqueID) error {
return nil
}
func (meta *meta) HasPartition(collID UniqueID, partID UniqueID) bool {
meta.RLock()
defer meta.RUnlock()
coll, ok := meta.collections[collID]
if !ok {
return false
}
for _, id := range coll.Partitions {
if partID == id {
return true
}
}
return false
}
func (meta *meta) GetNumRowsOfPartition(collectionID UniqueID, partitionID UniqueID) (int64, error) {
meta.RLock()
defer meta.RUnlock()

View File

@ -10,6 +10,9 @@ import (
"go.uber.org/zap"
"github.com/zilliztech/milvus-distributed/internal/log"
"github.com/zilliztech/milvus-distributed/internal/msgstream"
"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
"github.com/zilliztech/milvus-distributed/internal/util/trace"
"github.com/zilliztech/milvus-distributed/internal/util/tsoutil"
"github.com/zilliztech/milvus-distributed/internal/util/typeutil"
@ -71,10 +74,22 @@ type segmentAllocator struct {
segmentThresholdFactor float64
mu sync.RWMutex
allocator allocatorInterface
segmentInfoStream msgstream.MsgStream
}
func newSegmentAllocator(meta *meta, allocator allocatorInterface) *segmentAllocator {
segmentAllocator := &segmentAllocator{
type Option struct {
apply func(alloc *segmentAllocator)
}
func WithSegmentStream(stream msgstream.MsgStream) Option {
return Option{
apply: func(alloc *segmentAllocator) {
alloc.segmentInfoStream = stream
},
}
}
func newSegmentAllocator(meta *meta, allocator allocatorInterface, opts ...Option) *segmentAllocator {
alloc := &segmentAllocator{
mt: meta,
segments: make(map[UniqueID]*segmentStatus),
segmentExpireDuration: Params.SegIDAssignExpiration,
@ -82,7 +97,10 @@ func newSegmentAllocator(meta *meta, allocator allocatorInterface) *segmentAlloc
segmentThresholdFactor: Params.SegmentSizeFactor,
allocator: allocator,
}
return segmentAllocator
for _, opt := range opts {
opt.apply(alloc)
}
return alloc
}
func (allocator *segmentAllocator) OpenSegment(ctx context.Context, segmentInfo *datapb.SegmentInfo) error {
@ -93,6 +111,10 @@ func (allocator *segmentAllocator) OpenSegment(ctx context.Context, segmentInfo
if _, ok := allocator.segments[segmentInfo.ID]; ok {
return fmt.Errorf("segment %d already exist", segmentInfo.ID)
}
return allocator.open(segmentInfo)
}
func (allocator *segmentAllocator) open(segmentInfo *datapb.SegmentInfo) error {
totalRows, err := allocator.estimateTotalRows(segmentInfo.CollectionID)
if err != nil {
return err
@ -139,7 +161,24 @@ func (allocator *segmentAllocator) AllocSegment(ctx context.Context, collectionI
return
}
err = newErrRemainInSufficient(requestRows)
var segStatus *segmentStatus
segStatus, err = allocator.openNewSegment(ctx, collectionID, partitionID, channelName)
if err != nil {
return
}
var success bool
success, err = allocator.alloc(segStatus, requestRows)
if err != nil {
return
}
if !success {
err = newErrRemainInSufficient(requestRows)
return
}
segID = segStatus.id
retCount = requestRows
expireTime = segStatus.lastExpireTime
return
}
@ -176,6 +215,48 @@ func (allocator *segmentAllocator) alloc(segStatus *segmentStatus, numRows int)
return true, nil
}
func (allocator *segmentAllocator) openNewSegment(ctx context.Context, collectionID UniqueID, partitionID UniqueID, channelName string) (*segmentStatus, error) {
sp, _ := trace.StartSpanFromContext(ctx)
defer sp.Finish()
id, err := allocator.allocator.allocID()
if err != nil {
return nil, err
}
segmentInfo, err := BuildSegment(collectionID, partitionID, id, channelName)
if err != nil {
return nil, err
}
if err = allocator.mt.AddSegment(segmentInfo); err != nil {
return nil, err
}
if err = allocator.open(segmentInfo); err != nil {
return nil, err
}
infoMsg := &msgstream.SegmentInfoMsg{
BaseMsg: msgstream.BaseMsg{
HashValues: []uint32{0},
},
SegmentMsg: datapb.SegmentMsg{
Base: &commonpb.MsgBase{
MsgType: commonpb.MsgType_SegmentInfo,
MsgID: 0,
Timestamp: 0,
SourceID: Params.NodeID,
},
Segment: segmentInfo,
},
}
msgPack := &msgstream.MsgPack{
Msgs: []msgstream.TsMsg{infoMsg},
}
if allocator.segmentInfoStream != nil {
if err = allocator.segmentInfoStream.Produce(msgPack); err != nil {
return nil, err
}
}
return allocator.segments[segmentInfo.ID], nil
}
func (allocator *segmentAllocator) estimateTotalRows(collectionID UniqueID) (int, error) {
collMeta, err := allocator.mt.GetCollection(collectionID)
if err != nil {

View File

@ -48,9 +48,6 @@ func TestAllocSegment(t *testing.T) {
expectResult bool
}{
{collID, 100, "c1", 100, true},
{collID + 1, 100, "c1", 100, false},
{collID, 101, "c1", 100, false},
{collID, 100, "c3", 100, false},
{collID, 100, "c1", math.MaxInt64, false},
}
for _, c := range cases {

View File

@ -22,7 +22,6 @@ import (
"github.com/zilliztech/milvus-distributed/internal/timesync"
"github.com/zilliztech/milvus-distributed/internal/types"
"github.com/zilliztech/milvus-distributed/internal/util/retry"
"github.com/zilliztech/milvus-distributed/internal/util/trace"
"github.com/zilliztech/milvus-distributed/internal/util/typeutil"
"go.etcd.io/etcd/clientv3"
"go.uber.org/zap"
@ -62,7 +61,6 @@ type Server struct {
insertChannels []string
msFactory msgstream.Factory
ttBarrier timesync.TimeTickBarrier
allocMu sync.RWMutex
}
func CreateServer(ctx context.Context, factory msgstream.Factory) (*Server, error) {
@ -112,9 +110,9 @@ func (s *Server) Start() error {
return err
}
s.statsHandler = newStatsHandler(s.meta)
s.segAllocator = newSegmentAllocator(s.meta, s.allocator)
s.ddHandler = newDDHandler(s.meta, s.segAllocator)
s.ddHandler = newDDHandler(s.meta, s.segAllocator, s.masterClient)
s.initSegmentInfoChannel()
s.segAllocator = newSegmentAllocator(s.meta, s.allocator, WithSegmentStream(s.segmentInfoStream))
if err = s.loadMetaFromMaster(); err != nil {
return err
}
@ -369,7 +367,6 @@ func (s *Server) startProxyServiceTimeTickLoop(ctx context.Context) {
default:
}
msgPack := flushStream.Consume()
s.allocMu.Lock()
for _, msg := range msgPack.Msgs {
if msg.Type() != commonpb.MsgType_TimeTick {
log.Warn("receive unknown msg from proxy service timetick", zap.Stringer("msgType", msg.Type()))
@ -381,7 +378,6 @@ func (s *Server) startProxyServiceTimeTickLoop(ctx context.Context) {
log.Error("expire allocations error", zap.Error(err))
}
}
s.allocMu.Unlock()
}
}
@ -526,8 +522,6 @@ func (s *Server) newDataNode(ip string, port int64, id UniqueID) (*dataNode, err
}
func (s *Server) Flush(ctx context.Context, req *datapb.FlushRequest) (*commonpb.Status, error) {
s.allocMu.Lock()
defer s.allocMu.Unlock()
if !s.checkStateIsHealthy() {
return &commonpb.Status{
ErrorCode: commonpb.ErrorCode_UnexpectedError,
@ -541,19 +535,18 @@ func (s *Server) Flush(ctx context.Context, req *datapb.FlushRequest) (*commonpb
}
func (s *Server) AssignSegmentID(ctx context.Context, req *datapb.AssignSegmentIDRequest) (*datapb.AssignSegmentIDResponse, error) {
s.allocMu.Lock()
defer s.allocMu.Unlock()
resp := &datapb.AssignSegmentIDResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
ErrorCode: commonpb.ErrorCode_UnexpectedError,
},
SegIDAssignments: make([]*datapb.SegmentIDAssignment, 0),
}
if !s.checkStateIsHealthy() {
resp.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
resp.Status.Reason = "server is initializing"
return resp, nil
}
assigns := make([]*datapb.SegmentIDAssignment, 0, len(req.SegmentIDRequests))
for _, r := range req.SegmentIDRequests {
if !s.meta.HasCollection(r.CollectionID) {
if err := s.loadCollectionFromMaster(ctx, r.CollectionID); err != nil {
@ -566,29 +559,19 @@ func (s *Server) AssignSegmentID(ctx context.Context, req *datapb.AssignSegmentI
ErrorCode: commonpb.ErrorCode_UnexpectedError,
},
}
//if err := s.validateAllocRequest(r.CollectionID, r.PartitionID, r.ChannelName); err != nil {
//result.Status.Reason = err.Error()
//assigns = append(assigns, result)
//continue
//}
segmentID, retCount, expireTs, err := s.segAllocator.AllocSegment(ctx, r.CollectionID, r.PartitionID, r.ChannelName, int(r.Count))
if err != nil {
if _, ok := err.(errRemainInSufficient); !ok {
result.Status.Reason = fmt.Sprintf("allocation of Collection %d, Partition %d, Channel %s, Count %d error: %s",
r.CollectionID, r.PartitionID, r.ChannelName, r.Count, err.Error())
resp.SegIDAssignments = append(resp.SegIDAssignments, result)
continue
}
if err = s.openNewSegment(ctx, r.CollectionID, r.PartitionID, r.ChannelName); err != nil {
result.Status.Reason = fmt.Sprintf("open new segment of Collection %d, Partition %d, Channel %s, Count %d error: %s",
r.CollectionID, r.PartitionID, r.ChannelName, r.Count, err.Error())
resp.SegIDAssignments = append(resp.SegIDAssignments, result)
continue
}
segmentID, retCount, expireTs, err = s.segAllocator.AllocSegment(ctx, r.CollectionID, r.PartitionID, r.ChannelName, int(r.Count))
if err != nil {
result.Status.Reason = fmt.Sprintf("retry allocation of Collection %d, Partition %d, Channel %s, Count %d error: %s",
r.CollectionID, r.PartitionID, r.ChannelName, r.Count, err.Error())
resp.SegIDAssignments = append(resp.SegIDAssignments, result)
continue
}
result.Status.Reason = fmt.Sprintf("allocation of collection %d, partition %d, channel %s, count %d error: %s",
r.CollectionID, r.PartitionID, r.ChannelName, r.Count, err.Error())
assigns = append(assigns, result)
continue
}
result.Status.ErrorCode = commonpb.ErrorCode_Success
@ -598,11 +581,28 @@ func (s *Server) AssignSegmentID(ctx context.Context, req *datapb.AssignSegmentI
result.Count = uint32(retCount)
result.ExpireTime = expireTs
result.ChannelName = r.ChannelName
resp.SegIDAssignments = append(resp.SegIDAssignments, result)
assigns = append(assigns, result)
}
resp.Status.ErrorCode = commonpb.ErrorCode_Success
resp.SegIDAssignments = assigns
return resp, nil
}
func (s *Server) validateAllocRequest(collID UniqueID, partID UniqueID, channelName string) error {
if !s.meta.HasCollection(collID) {
return fmt.Errorf("can not find collection %d", collID)
}
if !s.meta.HasPartition(collID, partID) {
return fmt.Errorf("can not find partition %d", partID)
}
for _, name := range s.insertChannels {
if name == channelName {
return nil
}
}
return fmt.Errorf("can not find channel %s", channelName)
}
func (s *Server) loadCollectionFromMaster(ctx context.Context, collectionID int64) error {
resp, err := s.masterClient.DescribeCollection(ctx, &milvuspb.DescribeCollectionRequest{
Base: &commonpb.MsgBase{
@ -615,53 +615,29 @@ func (s *Server) loadCollectionFromMaster(ctx context.Context, collectionID int6
if err = VerifyResponse(resp, err); err != nil {
return err
}
presp, err := s.masterClient.ShowPartitions(ctx, &milvuspb.ShowPartitionsRequest{
Base: &commonpb.MsgBase{
MsgType: commonpb.MsgType_ShowPartitions,
MsgID: -1, // todo
Timestamp: 0, // todo
SourceID: Params.NodeID,
},
DbName: "",
CollectionName: resp.Schema.Name,
CollectionID: resp.CollectionID,
})
if err = VerifyResponse(presp, err); err != nil {
log.Error("show partitions error", zap.String("collectionName", resp.Schema.Name), zap.Int64("collectionID", resp.CollectionID), zap.Error(err))
return err
}
collInfo := &datapb.CollectionInfo{
ID: resp.CollectionID,
Schema: resp.Schema,
ID: resp.CollectionID,
Schema: resp.Schema,
Partitions: presp.PartitionIDs,
}
return s.meta.AddCollection(collInfo)
}
func (s *Server) openNewSegment(ctx context.Context, collectionID UniqueID, partitionID UniqueID, channelName string) error {
sp, _ := trace.StartSpanFromContext(ctx)
defer sp.Finish()
id, err := s.allocator.allocID()
if err != nil {
return err
}
segmentInfo, err := BuildSegment(collectionID, partitionID, id, channelName)
if err != nil {
return err
}
if err = s.meta.AddSegment(segmentInfo); err != nil {
return err
}
if err = s.segAllocator.OpenSegment(ctx, segmentInfo); err != nil {
return err
}
infoMsg := &msgstream.SegmentInfoMsg{
BaseMsg: msgstream.BaseMsg{
HashValues: []uint32{0},
},
SegmentMsg: datapb.SegmentMsg{
Base: &commonpb.MsgBase{
MsgType: commonpb.MsgType_SegmentInfo,
MsgID: 0,
Timestamp: 0,
SourceID: Params.NodeID,
},
Segment: segmentInfo,
},
}
msgPack := msgstream.MsgPack{
Msgs: []msgstream.TsMsg{infoMsg},
}
if err = s.segmentInfoStream.Produce(&msgPack); err != nil {
return err
}
return nil
}
func (s *Server) ShowSegments(ctx context.Context, req *datapb.ShowSegmentsRequest) (*datapb.ShowSegmentsResponse, error) {
resp := &datapb.ShowSegmentsResponse{
Status: &commonpb.Status{