mirror of https://github.com/milvus-io/milvus.git
Refactor data service segment allocator
Signed-off-by: sunby <bingyi.sun@zilliz.com>pull/4973/head^2
parent
e760605187
commit
61edc53faf
|
@ -4,6 +4,8 @@ import (
|
|||
"context"
|
||||
|
||||
"github.com/zilliztech/milvus-distributed/internal/proto/datapb"
|
||||
"github.com/zilliztech/milvus-distributed/internal/proto/milvuspb"
|
||||
"github.com/zilliztech/milvus-distributed/internal/types"
|
||||
|
||||
"github.com/golang/protobuf/proto"
|
||||
"github.com/zilliztech/milvus-distributed/internal/msgstream"
|
||||
|
@ -14,12 +16,14 @@ import (
|
|||
type ddHandler struct {
|
||||
meta *meta
|
||||
segmentAllocator segmentAllocatorInterface
|
||||
masterClient types.MasterService
|
||||
}
|
||||
|
||||
func newDDHandler(meta *meta, allocator segmentAllocatorInterface) *ddHandler {
|
||||
func newDDHandler(meta *meta, allocator segmentAllocatorInterface, client types.MasterService) *ddHandler {
|
||||
return &ddHandler{
|
||||
meta: meta,
|
||||
segmentAllocator: allocator,
|
||||
masterClient: client,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -47,9 +51,24 @@ func (handler *ddHandler) handleCreateCollection(msg *msgstream.CreateCollection
|
|||
if err := proto.Unmarshal(msg.Schema, schema); err != nil {
|
||||
return err
|
||||
}
|
||||
err := handler.meta.AddCollection(&datapb.CollectionInfo{
|
||||
ID: msg.CollectionID,
|
||||
Schema: schema,
|
||||
presp, err := handler.masterClient.ShowPartitions(context.TODO(), &milvuspb.ShowPartitionsRequest{
|
||||
Base: &commonpb.MsgBase{
|
||||
MsgType: commonpb.MsgType_ShowPartitions,
|
||||
MsgID: -1, // todo
|
||||
Timestamp: 0, // todo
|
||||
SourceID: Params.NodeID,
|
||||
},
|
||||
DbName: "",
|
||||
CollectionName: schema.Name,
|
||||
CollectionID: msg.CollectionID,
|
||||
})
|
||||
if err = VerifyResponse(presp, err); err != nil {
|
||||
return err
|
||||
}
|
||||
err = handler.meta.AddCollection(&datapb.CollectionInfo{
|
||||
ID: msg.CollectionID,
|
||||
Schema: schema,
|
||||
Partitions: presp.PartitionIDs,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
|
@ -347,6 +347,21 @@ func (meta *meta) DropPartition(collID UniqueID, partitionID UniqueID) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (meta *meta) HasPartition(collID UniqueID, partID UniqueID) bool {
|
||||
meta.RLock()
|
||||
defer meta.RUnlock()
|
||||
coll, ok := meta.collections[collID]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
for _, id := range coll.Partitions {
|
||||
if partID == id {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (meta *meta) GetNumRowsOfPartition(collectionID UniqueID, partitionID UniqueID) (int64, error) {
|
||||
meta.RLock()
|
||||
defer meta.RUnlock()
|
||||
|
|
|
@ -10,6 +10,9 @@ import (
|
|||
"go.uber.org/zap"
|
||||
|
||||
"github.com/zilliztech/milvus-distributed/internal/log"
|
||||
"github.com/zilliztech/milvus-distributed/internal/msgstream"
|
||||
"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
|
||||
|
||||
"github.com/zilliztech/milvus-distributed/internal/util/trace"
|
||||
"github.com/zilliztech/milvus-distributed/internal/util/tsoutil"
|
||||
"github.com/zilliztech/milvus-distributed/internal/util/typeutil"
|
||||
|
@ -71,10 +74,22 @@ type segmentAllocator struct {
|
|||
segmentThresholdFactor float64
|
||||
mu sync.RWMutex
|
||||
allocator allocatorInterface
|
||||
segmentInfoStream msgstream.MsgStream
|
||||
}
|
||||
|
||||
func newSegmentAllocator(meta *meta, allocator allocatorInterface) *segmentAllocator {
|
||||
segmentAllocator := &segmentAllocator{
|
||||
type Option struct {
|
||||
apply func(alloc *segmentAllocator)
|
||||
}
|
||||
|
||||
func WithSegmentStream(stream msgstream.MsgStream) Option {
|
||||
return Option{
|
||||
apply: func(alloc *segmentAllocator) {
|
||||
alloc.segmentInfoStream = stream
|
||||
},
|
||||
}
|
||||
}
|
||||
func newSegmentAllocator(meta *meta, allocator allocatorInterface, opts ...Option) *segmentAllocator {
|
||||
alloc := &segmentAllocator{
|
||||
mt: meta,
|
||||
segments: make(map[UniqueID]*segmentStatus),
|
||||
segmentExpireDuration: Params.SegIDAssignExpiration,
|
||||
|
@ -82,7 +97,10 @@ func newSegmentAllocator(meta *meta, allocator allocatorInterface) *segmentAlloc
|
|||
segmentThresholdFactor: Params.SegmentSizeFactor,
|
||||
allocator: allocator,
|
||||
}
|
||||
return segmentAllocator
|
||||
for _, opt := range opts {
|
||||
opt.apply(alloc)
|
||||
}
|
||||
return alloc
|
||||
}
|
||||
|
||||
func (allocator *segmentAllocator) OpenSegment(ctx context.Context, segmentInfo *datapb.SegmentInfo) error {
|
||||
|
@ -93,6 +111,10 @@ func (allocator *segmentAllocator) OpenSegment(ctx context.Context, segmentInfo
|
|||
if _, ok := allocator.segments[segmentInfo.ID]; ok {
|
||||
return fmt.Errorf("segment %d already exist", segmentInfo.ID)
|
||||
}
|
||||
return allocator.open(segmentInfo)
|
||||
}
|
||||
|
||||
func (allocator *segmentAllocator) open(segmentInfo *datapb.SegmentInfo) error {
|
||||
totalRows, err := allocator.estimateTotalRows(segmentInfo.CollectionID)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -139,7 +161,24 @@ func (allocator *segmentAllocator) AllocSegment(ctx context.Context, collectionI
|
|||
return
|
||||
}
|
||||
|
||||
err = newErrRemainInSufficient(requestRows)
|
||||
var segStatus *segmentStatus
|
||||
segStatus, err = allocator.openNewSegment(ctx, collectionID, partitionID, channelName)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
var success bool
|
||||
success, err = allocator.alloc(segStatus, requestRows)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if !success {
|
||||
err = newErrRemainInSufficient(requestRows)
|
||||
return
|
||||
}
|
||||
|
||||
segID = segStatus.id
|
||||
retCount = requestRows
|
||||
expireTime = segStatus.lastExpireTime
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -176,6 +215,48 @@ func (allocator *segmentAllocator) alloc(segStatus *segmentStatus, numRows int)
|
|||
return true, nil
|
||||
}
|
||||
|
||||
func (allocator *segmentAllocator) openNewSegment(ctx context.Context, collectionID UniqueID, partitionID UniqueID, channelName string) (*segmentStatus, error) {
|
||||
sp, _ := trace.StartSpanFromContext(ctx)
|
||||
defer sp.Finish()
|
||||
id, err := allocator.allocator.allocID()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
segmentInfo, err := BuildSegment(collectionID, partitionID, id, channelName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = allocator.mt.AddSegment(segmentInfo); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = allocator.open(segmentInfo); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
infoMsg := &msgstream.SegmentInfoMsg{
|
||||
BaseMsg: msgstream.BaseMsg{
|
||||
HashValues: []uint32{0},
|
||||
},
|
||||
SegmentMsg: datapb.SegmentMsg{
|
||||
Base: &commonpb.MsgBase{
|
||||
MsgType: commonpb.MsgType_SegmentInfo,
|
||||
MsgID: 0,
|
||||
Timestamp: 0,
|
||||
SourceID: Params.NodeID,
|
||||
},
|
||||
Segment: segmentInfo,
|
||||
},
|
||||
}
|
||||
msgPack := &msgstream.MsgPack{
|
||||
Msgs: []msgstream.TsMsg{infoMsg},
|
||||
}
|
||||
if allocator.segmentInfoStream != nil {
|
||||
if err = allocator.segmentInfoStream.Produce(msgPack); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return allocator.segments[segmentInfo.ID], nil
|
||||
}
|
||||
|
||||
func (allocator *segmentAllocator) estimateTotalRows(collectionID UniqueID) (int, error) {
|
||||
collMeta, err := allocator.mt.GetCollection(collectionID)
|
||||
if err != nil {
|
||||
|
|
|
@ -48,9 +48,6 @@ func TestAllocSegment(t *testing.T) {
|
|||
expectResult bool
|
||||
}{
|
||||
{collID, 100, "c1", 100, true},
|
||||
{collID + 1, 100, "c1", 100, false},
|
||||
{collID, 101, "c1", 100, false},
|
||||
{collID, 100, "c3", 100, false},
|
||||
{collID, 100, "c1", math.MaxInt64, false},
|
||||
}
|
||||
for _, c := range cases {
|
||||
|
|
|
@ -22,7 +22,6 @@ import (
|
|||
"github.com/zilliztech/milvus-distributed/internal/timesync"
|
||||
"github.com/zilliztech/milvus-distributed/internal/types"
|
||||
"github.com/zilliztech/milvus-distributed/internal/util/retry"
|
||||
"github.com/zilliztech/milvus-distributed/internal/util/trace"
|
||||
"github.com/zilliztech/milvus-distributed/internal/util/typeutil"
|
||||
"go.etcd.io/etcd/clientv3"
|
||||
"go.uber.org/zap"
|
||||
|
@ -62,7 +61,6 @@ type Server struct {
|
|||
insertChannels []string
|
||||
msFactory msgstream.Factory
|
||||
ttBarrier timesync.TimeTickBarrier
|
||||
allocMu sync.RWMutex
|
||||
}
|
||||
|
||||
func CreateServer(ctx context.Context, factory msgstream.Factory) (*Server, error) {
|
||||
|
@ -112,9 +110,9 @@ func (s *Server) Start() error {
|
|||
return err
|
||||
}
|
||||
s.statsHandler = newStatsHandler(s.meta)
|
||||
s.segAllocator = newSegmentAllocator(s.meta, s.allocator)
|
||||
s.ddHandler = newDDHandler(s.meta, s.segAllocator)
|
||||
s.ddHandler = newDDHandler(s.meta, s.segAllocator, s.masterClient)
|
||||
s.initSegmentInfoChannel()
|
||||
s.segAllocator = newSegmentAllocator(s.meta, s.allocator, WithSegmentStream(s.segmentInfoStream))
|
||||
if err = s.loadMetaFromMaster(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -369,7 +367,6 @@ func (s *Server) startProxyServiceTimeTickLoop(ctx context.Context) {
|
|||
default:
|
||||
}
|
||||
msgPack := flushStream.Consume()
|
||||
s.allocMu.Lock()
|
||||
for _, msg := range msgPack.Msgs {
|
||||
if msg.Type() != commonpb.MsgType_TimeTick {
|
||||
log.Warn("receive unknown msg from proxy service timetick", zap.Stringer("msgType", msg.Type()))
|
||||
|
@ -381,7 +378,6 @@ func (s *Server) startProxyServiceTimeTickLoop(ctx context.Context) {
|
|||
log.Error("expire allocations error", zap.Error(err))
|
||||
}
|
||||
}
|
||||
s.allocMu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -526,8 +522,6 @@ func (s *Server) newDataNode(ip string, port int64, id UniqueID) (*dataNode, err
|
|||
}
|
||||
|
||||
func (s *Server) Flush(ctx context.Context, req *datapb.FlushRequest) (*commonpb.Status, error) {
|
||||
s.allocMu.Lock()
|
||||
defer s.allocMu.Unlock()
|
||||
if !s.checkStateIsHealthy() {
|
||||
return &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
|
@ -541,19 +535,18 @@ func (s *Server) Flush(ctx context.Context, req *datapb.FlushRequest) (*commonpb
|
|||
}
|
||||
|
||||
func (s *Server) AssignSegmentID(ctx context.Context, req *datapb.AssignSegmentIDRequest) (*datapb.AssignSegmentIDResponse, error) {
|
||||
s.allocMu.Lock()
|
||||
defer s.allocMu.Unlock()
|
||||
resp := &datapb.AssignSegmentIDResponse{
|
||||
Status: &commonpb.Status{
|
||||
ErrorCode: commonpb.ErrorCode_Success,
|
||||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
},
|
||||
SegIDAssignments: make([]*datapb.SegmentIDAssignment, 0),
|
||||
}
|
||||
if !s.checkStateIsHealthy() {
|
||||
resp.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
|
||||
resp.Status.Reason = "server is initializing"
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
assigns := make([]*datapb.SegmentIDAssignment, 0, len(req.SegmentIDRequests))
|
||||
|
||||
for _, r := range req.SegmentIDRequests {
|
||||
if !s.meta.HasCollection(r.CollectionID) {
|
||||
if err := s.loadCollectionFromMaster(ctx, r.CollectionID); err != nil {
|
||||
|
@ -566,29 +559,19 @@ func (s *Server) AssignSegmentID(ctx context.Context, req *datapb.AssignSegmentI
|
|||
ErrorCode: commonpb.ErrorCode_UnexpectedError,
|
||||
},
|
||||
}
|
||||
|
||||
//if err := s.validateAllocRequest(r.CollectionID, r.PartitionID, r.ChannelName); err != nil {
|
||||
//result.Status.Reason = err.Error()
|
||||
//assigns = append(assigns, result)
|
||||
//continue
|
||||
//}
|
||||
|
||||
segmentID, retCount, expireTs, err := s.segAllocator.AllocSegment(ctx, r.CollectionID, r.PartitionID, r.ChannelName, int(r.Count))
|
||||
if err != nil {
|
||||
if _, ok := err.(errRemainInSufficient); !ok {
|
||||
result.Status.Reason = fmt.Sprintf("allocation of Collection %d, Partition %d, Channel %s, Count %d error: %s",
|
||||
r.CollectionID, r.PartitionID, r.ChannelName, r.Count, err.Error())
|
||||
resp.SegIDAssignments = append(resp.SegIDAssignments, result)
|
||||
continue
|
||||
}
|
||||
|
||||
if err = s.openNewSegment(ctx, r.CollectionID, r.PartitionID, r.ChannelName); err != nil {
|
||||
result.Status.Reason = fmt.Sprintf("open new segment of Collection %d, Partition %d, Channel %s, Count %d error: %s",
|
||||
r.CollectionID, r.PartitionID, r.ChannelName, r.Count, err.Error())
|
||||
resp.SegIDAssignments = append(resp.SegIDAssignments, result)
|
||||
continue
|
||||
}
|
||||
|
||||
segmentID, retCount, expireTs, err = s.segAllocator.AllocSegment(ctx, r.CollectionID, r.PartitionID, r.ChannelName, int(r.Count))
|
||||
if err != nil {
|
||||
result.Status.Reason = fmt.Sprintf("retry allocation of Collection %d, Partition %d, Channel %s, Count %d error: %s",
|
||||
r.CollectionID, r.PartitionID, r.ChannelName, r.Count, err.Error())
|
||||
resp.SegIDAssignments = append(resp.SegIDAssignments, result)
|
||||
continue
|
||||
}
|
||||
result.Status.Reason = fmt.Sprintf("allocation of collection %d, partition %d, channel %s, count %d error: %s",
|
||||
r.CollectionID, r.PartitionID, r.ChannelName, r.Count, err.Error())
|
||||
assigns = append(assigns, result)
|
||||
continue
|
||||
}
|
||||
|
||||
result.Status.ErrorCode = commonpb.ErrorCode_Success
|
||||
|
@ -598,11 +581,28 @@ func (s *Server) AssignSegmentID(ctx context.Context, req *datapb.AssignSegmentI
|
|||
result.Count = uint32(retCount)
|
||||
result.ExpireTime = expireTs
|
||||
result.ChannelName = r.ChannelName
|
||||
resp.SegIDAssignments = append(resp.SegIDAssignments, result)
|
||||
assigns = append(assigns, result)
|
||||
}
|
||||
resp.Status.ErrorCode = commonpb.ErrorCode_Success
|
||||
resp.SegIDAssignments = assigns
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (s *Server) validateAllocRequest(collID UniqueID, partID UniqueID, channelName string) error {
|
||||
if !s.meta.HasCollection(collID) {
|
||||
return fmt.Errorf("can not find collection %d", collID)
|
||||
}
|
||||
if !s.meta.HasPartition(collID, partID) {
|
||||
return fmt.Errorf("can not find partition %d", partID)
|
||||
}
|
||||
for _, name := range s.insertChannels {
|
||||
if name == channelName {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("can not find channel %s", channelName)
|
||||
}
|
||||
|
||||
func (s *Server) loadCollectionFromMaster(ctx context.Context, collectionID int64) error {
|
||||
resp, err := s.masterClient.DescribeCollection(ctx, &milvuspb.DescribeCollectionRequest{
|
||||
Base: &commonpb.MsgBase{
|
||||
|
@ -615,53 +615,29 @@ func (s *Server) loadCollectionFromMaster(ctx context.Context, collectionID int6
|
|||
if err = VerifyResponse(resp, err); err != nil {
|
||||
return err
|
||||
}
|
||||
presp, err := s.masterClient.ShowPartitions(ctx, &milvuspb.ShowPartitionsRequest{
|
||||
Base: &commonpb.MsgBase{
|
||||
MsgType: commonpb.MsgType_ShowPartitions,
|
||||
MsgID: -1, // todo
|
||||
Timestamp: 0, // todo
|
||||
SourceID: Params.NodeID,
|
||||
},
|
||||
DbName: "",
|
||||
CollectionName: resp.Schema.Name,
|
||||
CollectionID: resp.CollectionID,
|
||||
})
|
||||
if err = VerifyResponse(presp, err); err != nil {
|
||||
log.Error("show partitions error", zap.String("collectionName", resp.Schema.Name), zap.Int64("collectionID", resp.CollectionID), zap.Error(err))
|
||||
return err
|
||||
}
|
||||
collInfo := &datapb.CollectionInfo{
|
||||
ID: resp.CollectionID,
|
||||
Schema: resp.Schema,
|
||||
ID: resp.CollectionID,
|
||||
Schema: resp.Schema,
|
||||
Partitions: presp.PartitionIDs,
|
||||
}
|
||||
return s.meta.AddCollection(collInfo)
|
||||
}
|
||||
|
||||
func (s *Server) openNewSegment(ctx context.Context, collectionID UniqueID, partitionID UniqueID, channelName string) error {
|
||||
sp, _ := trace.StartSpanFromContext(ctx)
|
||||
defer sp.Finish()
|
||||
id, err := s.allocator.allocID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
segmentInfo, err := BuildSegment(collectionID, partitionID, id, channelName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err = s.meta.AddSegment(segmentInfo); err != nil {
|
||||
return err
|
||||
}
|
||||
if err = s.segAllocator.OpenSegment(ctx, segmentInfo); err != nil {
|
||||
return err
|
||||
}
|
||||
infoMsg := &msgstream.SegmentInfoMsg{
|
||||
BaseMsg: msgstream.BaseMsg{
|
||||
HashValues: []uint32{0},
|
||||
},
|
||||
SegmentMsg: datapb.SegmentMsg{
|
||||
Base: &commonpb.MsgBase{
|
||||
MsgType: commonpb.MsgType_SegmentInfo,
|
||||
MsgID: 0,
|
||||
Timestamp: 0,
|
||||
SourceID: Params.NodeID,
|
||||
},
|
||||
Segment: segmentInfo,
|
||||
},
|
||||
}
|
||||
msgPack := msgstream.MsgPack{
|
||||
Msgs: []msgstream.TsMsg{infoMsg},
|
||||
}
|
||||
if err = s.segmentInfoStream.Produce(&msgPack); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) ShowSegments(ctx context.Context, req *datapb.ShowSegmentsRequest) (*datapb.ShowSegmentsResponse, error) {
|
||||
resp := &datapb.ShowSegmentsResponse{
|
||||
Status: &commonpb.Status{
|
||||
|
|
Loading…
Reference in New Issue