package querynode import ( "context" "errors" "strconv" "github.com/zilliztech/milvus-distributed/internal/kv" miniokv "github.com/zilliztech/milvus-distributed/internal/kv/minio" "github.com/zilliztech/milvus-distributed/internal/msgstream" "github.com/zilliztech/milvus-distributed/internal/proto/commonpb" "github.com/zilliztech/milvus-distributed/internal/proto/datapb" "github.com/zilliztech/milvus-distributed/internal/proto/indexpb" internalPb "github.com/zilliztech/milvus-distributed/internal/proto/internalpb2" "github.com/zilliztech/milvus-distributed/internal/proto/milvuspb" "github.com/zilliztech/milvus-distributed/internal/storage" ) type segmentManager struct { replica collectionReplica dmStream msgstream.MsgStream loadIndexReqChan chan []msgstream.TsMsg masterClient MasterServiceInterface dataClient DataServiceInterface indexClient IndexServiceInterface kv kv.Base // minio kv iCodec *storage.InsertCodec } func (s *segmentManager) seekSegment(positions []*internalPb.MsgPosition) error { // TODO: open seek //for _, position := range positions { // err := s.dmStream.Seek(position) // if err != nil { // return err // } //} return nil } func (s *segmentManager) getIndexInfo(collectionID UniqueID, segmentID UniqueID) (UniqueID, UniqueID, error) { req := &milvuspb.DescribeSegmentRequest{ Base: &commonpb.MsgBase{ MsgType: commonpb.MsgType_kDescribeSegment, }, CollectionID: collectionID, SegmentID: segmentID, } response, err := s.masterClient.DescribeSegment(req) if err != nil { return 0, 0, err } return response.IndexID, response.BuildID, nil } func (s *segmentManager) loadSegment(collectionID UniqueID, partitionID UniqueID, segmentIDs []UniqueID, fieldIDs []int64) error { // TODO: interim solution if len(fieldIDs) == 0 { collection, err := s.replica.getCollectionByID(collectionID) if err != nil { return err } fieldIDs = make([]int64, 0) for _, field := range collection.Schema().Fields { fieldIDs = append(fieldIDs, field.FieldID) } } for _, segmentID := range segmentIDs { // we don't need index id yet _, buildID, err := s.getIndexInfo(collectionID, segmentID) if err == nil { // we don't need load to vector fields vectorFields, err := s.replica.getVecFieldsBySegmentID(segmentID) if err != nil { return err } fieldIDs = s.filterOutVectorFields(fieldIDs, vectorFields) } paths, srcFieldIDs, err := s.getInsertBinlogPaths(segmentID) if err != nil { return err } targetFields := s.getTargetFields(paths, srcFieldIDs, fieldIDs) // replace segment err = s.replica.removeSegment(segmentID) if err != nil { return err } err = s.replica.addSegment(segmentID, partitionID, collectionID, segTypeSealed) if err != nil { return err } err = s.loadSegmentFieldsData(segmentID, targetFields) if err != nil { return err } indexPaths, err := s.getIndexPaths(buildID) if err != nil { return err } err = s.loadIndex(segmentID, indexPaths) if err != nil { // TODO: return or continue? return err } } return nil } func (s *segmentManager) releaseSegment(segmentID UniqueID) error { err := s.replica.removeSegment(segmentID) return err } //------------------------------------------------------------------------------------------------- internal functions func (s *segmentManager) getInsertBinlogPaths(segmentID UniqueID) ([]*internalPb.StringList, []int64, error) { if s.dataClient == nil { return nil, nil, errors.New("null data service client") } insertBinlogPathRequest := &datapb.InsertBinlogPathRequest{ SegmentID: segmentID, } pathResponse, err := s.dataClient.GetInsertBinlogPaths(insertBinlogPathRequest) if err != nil { return nil, nil, err } if len(pathResponse.FieldIDs) != len(pathResponse.Paths) { return nil, nil, errors.New("illegal InsertBinlogPathsResponse") } return pathResponse.Paths, pathResponse.FieldIDs, nil } func (s *segmentManager) filterOutVectorFields(fieldIDs []int64, vectorFields map[int64]string) []int64 { targetFields := make([]int64, 0) for _, id := range fieldIDs { if _, ok := vectorFields[id]; !ok { targetFields = append(targetFields, id) } } return targetFields } func (s *segmentManager) getTargetFields(paths []*internalPb.StringList, srcFieldIDS []int64, dstFields []int64) map[int64]*internalPb.StringList { targetFields := make(map[int64]*internalPb.StringList) containsFunc := func(s []int64, e int64) bool { for _, a := range s { if a == e { return true } } return false } for i, fieldID := range srcFieldIDS { if containsFunc(dstFields, fieldID) { targetFields[fieldID] = paths[i] } } return targetFields } func (s *segmentManager) loadSegmentFieldsData(segmentID UniqueID, targetFields map[int64]*internalPb.StringList) error { for id, p := range targetFields { if id == timestampFieldID { // seg core doesn't need timestamp field continue } paths := p.Values blobs := make([]*storage.Blob, 0) for _, path := range paths { binLog, err := s.kv.Load(path) if err != nil { // TODO: return or continue? return err } blobs = append(blobs, &storage.Blob{ Key: strconv.FormatInt(id, 10), // TODO: key??? Value: []byte(binLog), }) } _, _, insertData, err := s.iCodec.Deserialize(blobs) if err != nil { // TODO: return or continue return err } if len(insertData.Data) != 1 { return errors.New("we expect only one field in deserialized insert data") } for _, value := range insertData.Data { var numRows int var data interface{} switch fieldData := value.(type) { case *storage.BoolFieldData: numRows = fieldData.NumRows data = fieldData.Data case *storage.Int8FieldData: numRows = fieldData.NumRows data = fieldData.Data case *storage.Int16FieldData: numRows = fieldData.NumRows data = fieldData.Data case *storage.Int32FieldData: numRows = fieldData.NumRows data = fieldData.Data case *storage.Int64FieldData: numRows = fieldData.NumRows data = fieldData.Data case *storage.FloatFieldData: numRows = fieldData.NumRows data = fieldData.Data case *storage.DoubleFieldData: numRows = fieldData.NumRows data = fieldData.Data case storage.StringFieldData: numRows = fieldData.NumRows data = fieldData.Data case *storage.FloatVectorFieldData: numRows = fieldData.NumRows data = fieldData.Data case *storage.BinaryVectorFieldData: numRows = fieldData.NumRows data = fieldData.Data default: return errors.New("unexpected field data type") } segment, err := s.replica.getSegmentByID(segmentID) if err != nil { // TODO: return or continue? return err } err = segment.segmentLoadFieldData(id, numRows, data) if err != nil { // TODO: return or continue? return err } } } return nil } func (s *segmentManager) getIndexPaths(buildID UniqueID) ([]string, error) { if s.indexClient == nil { return nil, errors.New("null index service client") } indexFilePathRequest := &indexpb.IndexFilePathsRequest{ // TODO: rename indexIDs to buildIDs IndexIDs: []UniqueID{buildID}, } pathResponse, err := s.indexClient.GetIndexFilePaths(indexFilePathRequest) if err != nil || pathResponse.Status.ErrorCode != commonpb.ErrorCode_SUCCESS { return nil, err } if len(pathResponse.FilePaths) <= 0 { return nil, errors.New("illegal index file paths") } return pathResponse.FilePaths[0].IndexFilePaths, nil } func (s *segmentManager) loadIndex(segmentID UniqueID, indexPaths []string) error { // get vector field ids from schema to load index vecFieldIDs, err := s.replica.getVecFieldsBySegmentID(segmentID) if err != nil { return err } for id, name := range vecFieldIDs { // non-blocking sending go s.sendLoadIndex(indexPaths, segmentID, id, name) } return nil } func (s *segmentManager) sendLoadIndex(indexPaths []string, segmentID int64, fieldID int64, fieldName string) { loadIndexRequest := internalPb.LoadIndex{ Base: &commonpb.MsgBase{ MsgType: commonpb.MsgType_kSearchResult, }, SegmentID: segmentID, FieldName: fieldName, FieldID: fieldID, IndexPaths: indexPaths, } loadIndexMsg := &msgstream.LoadIndexMsg{ LoadIndex: loadIndexRequest, } messages := []msgstream.TsMsg{loadIndexMsg} s.loadIndexReqChan <- messages } func newSegmentManager(ctx context.Context, masterClient MasterServiceInterface, dataClient DataServiceInterface, indexClient IndexServiceInterface, replica collectionReplica, dmStream msgstream.MsgStream, loadIndexReqChan chan []msgstream.TsMsg) *segmentManager { bucketName := Params.MinioBucketName option := &miniokv.Option{ Address: Params.MinioEndPoint, AccessKeyID: Params.MinioAccessKeyID, SecretAccessKeyID: Params.MinioSecretAccessKey, UseSSL: Params.MinioUseSSLStr, BucketName: bucketName, CreateBucket: true, } minioKV, err := miniokv.NewMinIOKV(ctx, option) if err != nil { panic(err) } return &segmentManager{ replica: replica, dmStream: dmStream, loadIndexReqChan: loadIndexReqChan, masterClient: masterClient, dataClient: dataClient, indexClient: indexClient, kv: minioKV, iCodec: &storage.InsertCodec{}, } }