2021-11-17 11:41:41 +00:00
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
2021-04-19 05:47:10 +00:00
// with the License. You may obtain a copy of the License at
//
2021-11-17 11:41:41 +00:00
// http://www.apache.org/licenses/LICENSE-2.0
2021-04-19 05:47:10 +00:00
//
2021-11-17 11:41:41 +00:00
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2021-04-19 05:47:10 +00:00
2021-06-22 08:44:09 +00:00
package querycoord
2021-04-15 07:15:46 +00:00
import (
"context"
"errors"
2021-11-18 03:31:12 +00:00
"fmt"
2021-08-17 02:06:11 +00:00
2021-04-15 07:15:46 +00:00
"go.uber.org/zap"
2021-12-13 02:15:26 +00:00
"github.com/milvus-io/milvus/internal/common"
2021-04-22 06:45:57 +00:00
"github.com/milvus-io/milvus/internal/log"
2022-02-28 08:51:55 +00:00
"github.com/milvus-io/milvus/internal/metrics"
2021-04-22 06:45:57 +00:00
"github.com/milvus-io/milvus/internal/proto/commonpb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/milvuspb"
"github.com/milvus-io/milvus/internal/proto/querypb"
2021-09-15 14:17:49 +00:00
"github.com/milvus-io/milvus/internal/util/metricsinfo"
2021-12-13 02:15:26 +00:00
"github.com/milvus-io/milvus/internal/util/typeutil"
2021-04-15 07:15:46 +00:00
)
2021-09-29 09:05:58 +00:00
// GetComponentStates return information about whether the coord is healthy
2021-06-22 08:44:09 +00:00
func ( qc * QueryCoord ) GetComponentStates ( ctx context . Context ) ( * internalpb . ComponentStates , error ) {
2021-11-19 05:57:12 +00:00
nodeID := common . NotRegisteredID
if qc . session != nil && qc . session . Registered ( ) {
nodeID = qc . session . ServerID
}
2021-04-15 07:15:46 +00:00
serviceComponentInfo := & internalpb . ComponentInfo {
2021-11-19 05:57:12 +00:00
// NodeID: Params.QueryCoordID, // will race with QueryCoord.Register()
NodeID : nodeID ,
2021-06-22 08:44:09 +00:00
StateCode : qc . stateCode . Load ( ) . ( internalpb . StateCode ) ,
2021-04-15 07:15:46 +00:00
}
2021-06-19 03:45:09 +00:00
//subComponentInfos, err := qs.cluster.GetComponentInfos(ctx)
//if err != nil {
// return &internalpb.ComponentStates{
// Status: &commonpb.Status{
// ErrorCode: commonpb.ErrorCode_UnexpectedError,
// Reason: err.Error(),
// },
// }, err
//}
2021-04-15 07:15:46 +00:00
return & internalpb . ComponentStates {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
} ,
2021-06-19 03:45:09 +00:00
State : serviceComponentInfo ,
//SubcomponentStates: subComponentInfos,
2021-04-15 07:15:46 +00:00
} , nil
}
2021-10-15 01:02:40 +00:00
// GetTimeTickChannel returns the time tick channel
// TimeTickChannel contains many time tick messages, which has been sent by query nodes
2021-06-22 08:44:09 +00:00
func ( qc * QueryCoord ) GetTimeTickChannel ( ctx context . Context ) ( * milvuspb . StringResponse , error ) {
2021-04-15 07:15:46 +00:00
return & milvuspb . StringResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
Reason : "" ,
} ,
2022-03-04 03:17:56 +00:00
Value : Params . CommonCfg . QueryCoordTimeTick ,
2021-04-15 07:15:46 +00:00
} , nil
}
2021-10-20 01:14:37 +00:00
// GetStatisticsChannel return the statistics channel
// Statistics channel contains statistics infos of query nodes, such as segment infos, memory infos
2021-06-22 08:44:09 +00:00
func ( qc * QueryCoord ) GetStatisticsChannel ( ctx context . Context ) ( * milvuspb . StringResponse , error ) {
2021-04-15 07:15:46 +00:00
return & milvuspb . StringResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
Reason : "" ,
} ,
2022-03-04 03:17:56 +00:00
Value : Params . CommonCfg . QueryNodeStats ,
2021-04-15 07:15:46 +00:00
} , nil
}
2021-09-29 15:26:56 +00:00
// ShowCollections return all the collections that have been loaded
2021-06-22 08:44:09 +00:00
func ( qc * QueryCoord ) ShowCollections ( ctx context . Context , req * querypb . ShowCollectionsRequest ) ( * querypb . ShowCollectionsResponse , error ) {
2022-04-26 03:29:54 +00:00
log . Info ( "show collection start" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64s ( "collectionIDs" , req . CollectionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-06-24 08:00:15 +00:00
status := & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
}
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-12-15 14:11:09 +00:00
err := errors . New ( "QueryCoord is not healthy" )
2021-06-24 08:00:15 +00:00
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Error ( "show collection failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2021-06-24 08:00:15 +00:00
return & querypb . ShowCollectionsResponse {
Status : status ,
2021-12-01 14:15:37 +00:00
} , nil
2021-06-24 08:00:15 +00:00
}
2021-08-02 14:39:25 +00:00
collectionInfos := qc . meta . showCollections ( )
ID2collectionInfo := make ( map [ UniqueID ] * querypb . CollectionInfo )
inMemoryCollectionIDs := make ( [ ] UniqueID , 0 )
for _ , info := range collectionInfos {
ID2collectionInfo [ info . CollectionID ] = info
inMemoryCollectionIDs = append ( inMemoryCollectionIDs , info . CollectionID )
}
inMemoryPercentages := make ( [ ] int64 , 0 )
if len ( req . CollectionIDs ) == 0 {
for _ , id := range inMemoryCollectionIDs {
inMemoryPercentages = append ( inMemoryPercentages , ID2collectionInfo [ id ] . InMemoryPercentage )
}
2022-04-26 03:29:54 +00:00
log . Info ( "show collection end" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64s ( "collections" , inMemoryCollectionIDs ) ,
zap . Int64s ( "inMemoryPercentage" , inMemoryPercentages ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-08-02 14:39:25 +00:00
return & querypb . ShowCollectionsResponse {
Status : status ,
CollectionIDs : inMemoryCollectionIDs ,
InMemoryPercentages : inMemoryPercentages ,
} , nil
}
for _ , id := range req . CollectionIDs {
if _ , ok := ID2collectionInfo [ id ] ; ! ok {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-11-18 03:31:12 +00:00
err := fmt . Errorf ( "collection %d has not been loaded to memory or load failed" , id )
2021-08-02 14:39:25 +00:00
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Warn ( "show collection failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , id ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2021-08-02 14:39:25 +00:00
return & querypb . ShowCollectionsResponse {
Status : status ,
2021-12-01 14:15:37 +00:00
} , nil
2021-08-02 14:39:25 +00:00
}
inMemoryPercentages = append ( inMemoryPercentages , ID2collectionInfo [ id ] . InMemoryPercentage )
}
2022-04-26 03:29:54 +00:00
log . Info ( "show collection end" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64s ( "collections" , req . CollectionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Int64s ( "inMemoryPercentage" , inMemoryPercentages ) )
2021-04-15 07:15:46 +00:00
return & querypb . ShowCollectionsResponse {
2021-08-02 14:39:25 +00:00
Status : status ,
CollectionIDs : req . CollectionIDs ,
InMemoryPercentages : inMemoryPercentages ,
2021-04-15 07:15:46 +00:00
} , nil
}
2021-09-30 03:12:03 +00:00
// LoadCollection loads all the sealed segments of this collection to queryNodes, and assigns watchDmChannelRequest to queryNodes
2021-06-22 08:44:09 +00:00
func ( qc * QueryCoord ) LoadCollection ( ctx context . Context , req * querypb . LoadCollectionRequest ) ( * commonpb . Status , error ) {
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . TotalLabel ) . Inc ( )
2022-02-28 08:51:55 +00:00
2021-04-15 07:15:46 +00:00
collectionID := req . CollectionID
2021-06-19 03:45:09 +00:00
//schema := req.Schema
2022-04-26 03:29:54 +00:00
log . Info ( "loadCollectionRequest received" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-04-15 07:15:46 +00:00
status := & commonpb . Status {
2021-06-23 09:44:12 +00:00
ErrorCode : commonpb . ErrorCode_Success ,
2021-04-15 07:15:46 +00:00
}
2021-06-24 08:00:15 +00:00
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-12-15 14:11:09 +00:00
err := errors . New ( "QueryCoord is not healthy" )
2021-06-24 08:00:15 +00:00
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Error ( "load collection failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-06-24 08:00:15 +00:00
}
2021-04-15 07:15:46 +00:00
2021-12-15 14:11:09 +00:00
if collectionInfo , err := qc . meta . getCollectionInfoByID ( collectionID ) ; err == nil {
// if collection has been loaded by load collection request, return success
2022-03-14 01:50:01 +00:00
if collectionInfo . LoadType == querypb . LoadType_LoadCollection {
2022-04-28 02:54:00 +00:00
if collectionInfo . ReplicaNumber != req . ReplicaNumber {
msg := fmt . Sprintf ( "collection has already been loaded, and the number of replicas %v is not same as the request's %v. Should release first then reload with the new number of replicas" ,
collectionInfo . ReplicaNumber ,
req . ReplicaNumber )
log . Warn ( msg ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Int32 ( "collectionReplicaNumber" , collectionInfo . ReplicaNumber ) ,
zap . Int32 ( "requestReplicaNumber" , req . ReplicaNumber ) )
status . ErrorCode = commonpb . ErrorCode_IllegalArgument
status . Reason = msg
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
return status , nil
}
2022-04-26 03:29:54 +00:00
log . Info ( "collection has already been loaded, return load success directly" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . SuccessLabel ) . Inc ( )
2021-12-15 14:11:09 +00:00
return status , nil
}
// if some partitions of the collection have been loaded by load partitions request, return error
// should release partitions first, then load collection again
if collectionInfo . LoadType == querypb . LoadType_LoadPartition {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
err = fmt . Errorf ( "some partitions %v of collection %d has been loaded into QueryNode, please release partitions firstly" ,
collectionInfo . PartitionIDs , collectionID )
status . Reason = err . Error ( )
log . Warn ( "loadCollectionRequest failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64s ( "loaded partitionIDs" , collectionInfo . PartitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-15 14:11:09 +00:00
return status , nil
}
}
2021-12-15 08:53:12 +00:00
baseTask := newBaseTask ( qc . loopCtx , querypb . TriggerCondition_GrpcRequest )
2021-10-18 13:34:47 +00:00
loadCollectionTask := & loadCollectionTask {
baseTask : baseTask ,
2021-04-15 07:15:46 +00:00
LoadCollectionRequest : req ,
2022-02-08 13:57:46 +00:00
broker : qc . broker ,
2021-06-22 08:44:09 +00:00
cluster : qc . cluster ,
meta : qc . meta ,
2021-04-15 07:15:46 +00:00
}
2021-10-11 01:54:37 +00:00
err := qc . scheduler . Enqueue ( loadCollectionTask )
if err != nil {
2021-12-15 14:11:09 +00:00
log . Error ( "loadCollectionRequest failed to add execute task to scheduler" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2021-10-11 01:54:37 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-10-11 01:54:37 +00:00
}
2021-04-15 07:15:46 +00:00
2021-10-14 12:18:33 +00:00
err = loadCollectionTask . waitToFinish ( )
2021-06-21 11:20:31 +00:00
if err != nil {
2021-12-15 14:11:09 +00:00
log . Error ( "load collection to query nodes failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2021-06-23 09:44:12 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-06-21 11:20:31 +00:00
status . Reason = err . Error ( )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-06-21 11:20:31 +00:00
}
2021-04-15 07:15:46 +00:00
2022-04-26 03:29:54 +00:00
log . Info ( "loadCollectionRequest completed" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-04-15 07:15:46 +00:00
return status , nil
}
2021-10-01 02:03:52 +00:00
// ReleaseCollection clears all data related to this collecion on the querynode
2021-06-22 08:44:09 +00:00
func ( qc * QueryCoord ) ReleaseCollection ( ctx context . Context , req * querypb . ReleaseCollectionRequest ) ( * commonpb . Status , error ) {
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . TotalLabel ) . Inc ( )
2021-06-15 04:41:40 +00:00
//dbID := req.DbID
2021-04-15 07:15:46 +00:00
collectionID := req . CollectionID
2022-04-26 03:29:54 +00:00
log . Info ( "releaseCollectionRequest received" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-04-15 07:15:46 +00:00
status := & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
}
2021-06-24 08:00:15 +00:00
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-12-15 14:11:09 +00:00
err := errors . New ( "QueryCoord is not healthy" )
2021-06-24 08:00:15 +00:00
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Error ( "release collection failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-06-24 08:00:15 +00:00
}
2021-12-15 14:11:09 +00:00
// if collection has not been loaded into memory, return release collection successfully
2021-06-22 08:44:09 +00:00
hasCollection := qc . meta . hasCollection ( collectionID )
2021-06-15 04:41:40 +00:00
if ! hasCollection {
2022-04-26 03:29:54 +00:00
log . Info ( "release collection end, the collection has not been loaded into QueryNode" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . SuccessLabel ) . Inc ( )
2021-04-15 07:15:46 +00:00
return status , nil
}
2021-12-15 08:53:12 +00:00
baseTask := newBaseTask ( qc . loopCtx , querypb . TriggerCondition_GrpcRequest )
2021-10-18 13:34:47 +00:00
releaseCollectionTask := & releaseCollectionTask {
baseTask : baseTask ,
2021-04-15 07:15:46 +00:00
ReleaseCollectionRequest : req ,
2021-06-22 08:44:09 +00:00
cluster : qc . cluster ,
2021-07-02 02:40:13 +00:00
meta : qc . meta ,
2022-02-08 13:57:46 +00:00
broker : qc . broker ,
2021-04-15 07:15:46 +00:00
}
2021-10-11 01:54:37 +00:00
err := qc . scheduler . Enqueue ( releaseCollectionTask )
if err != nil {
2021-12-15 14:11:09 +00:00
log . Error ( "releaseCollectionRequest failed to add execute task to scheduler" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2021-10-11 01:54:37 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-10-11 01:54:37 +00:00
}
2021-06-15 04:41:40 +00:00
2021-10-14 12:18:33 +00:00
err = releaseCollectionTask . waitToFinish ( )
2021-04-15 07:15:46 +00:00
if err != nil {
2021-12-15 14:11:09 +00:00
log . Error ( "release collection from query nodes failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2021-04-15 07:15:46 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-04-15 07:15:46 +00:00
}
2022-04-26 03:29:54 +00:00
log . Info ( "releaseCollectionRequest completed" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-08-02 14:39:25 +00:00
//qc.MetaReplica.printMeta()
2021-07-14 06:15:55 +00:00
//qc.cluster.printMeta()
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . SuccessLabel ) . Inc ( )
2022-02-28 08:51:55 +00:00
metrics . QueryCoordReleaseLatency . WithLabelValues ( ) . Observe ( float64 ( releaseCollectionTask . elapseSpan ( ) . Milliseconds ( ) ) )
2021-04-15 07:15:46 +00:00
return status , nil
}
2021-09-29 15:30:45 +00:00
// ShowPartitions return all the partitions that have been loaded
2021-06-22 08:44:09 +00:00
func ( qc * QueryCoord ) ShowPartitions ( ctx context . Context , req * querypb . ShowPartitionsRequest ) ( * querypb . ShowPartitionsResponse , error ) {
2021-04-15 07:15:46 +00:00
collectionID := req . CollectionID
2022-04-26 03:29:54 +00:00
log . Info ( "show partitions start" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64s ( "partitionIDs" , req . PartitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-06-24 08:00:15 +00:00
status := & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
}
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-12-15 14:11:09 +00:00
err := errors . New ( "QueryCoord is not healthy" )
2021-06-24 08:00:15 +00:00
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Error ( "show partition failed" , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2021-06-24 08:00:15 +00:00
return & querypb . ShowPartitionsResponse {
Status : status ,
2021-12-01 14:15:37 +00:00
} , nil
2021-06-24 08:00:15 +00:00
}
2021-08-02 14:39:25 +00:00
partitionStates , err := qc . meta . showPartitions ( collectionID )
2021-04-15 07:15:46 +00:00
if err != nil {
2021-12-15 14:11:09 +00:00
err = fmt . Errorf ( "collection %d has not been loaded into QueryNode" , collectionID )
2021-06-24 08:00:15 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Warn ( "show partitions failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2021-04-15 07:15:46 +00:00
return & querypb . ShowPartitionsResponse {
2021-06-24 08:00:15 +00:00
Status : status ,
2021-12-01 14:15:37 +00:00
} , nil
2021-04-15 07:15:46 +00:00
}
2021-08-02 14:39:25 +00:00
ID2PartitionState := make ( map [ UniqueID ] * querypb . PartitionStates )
inMemoryPartitionIDs := make ( [ ] UniqueID , 0 )
for _ , state := range partitionStates {
ID2PartitionState [ state . PartitionID ] = state
inMemoryPartitionIDs = append ( inMemoryPartitionIDs , state . PartitionID )
}
inMemoryPercentages := make ( [ ] int64 , 0 )
if len ( req . PartitionIDs ) == 0 {
for _ , id := range inMemoryPartitionIDs {
inMemoryPercentages = append ( inMemoryPercentages , ID2PartitionState [ id ] . InMemoryPercentage )
}
2022-04-26 03:29:54 +00:00
log . Info ( "show partitions end" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Int64s ( "partitionIDs" , inMemoryPartitionIDs ) ,
zap . Int64s ( "inMemoryPercentage" , inMemoryPercentages ) )
2021-08-02 14:39:25 +00:00
return & querypb . ShowPartitionsResponse {
Status : status ,
PartitionIDs : inMemoryPartitionIDs ,
InMemoryPercentages : inMemoryPercentages ,
} , nil
}
for _ , id := range req . PartitionIDs {
if _ , ok := ID2PartitionState [ id ] ; ! ok {
2021-12-15 14:11:09 +00:00
err = fmt . Errorf ( "partition %d of collection %d has not been loaded into QueryNode" , id , collectionID )
2021-08-02 14:39:25 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Warn ( "show partitions failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "partitionID" , id ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2021-08-02 14:39:25 +00:00
return & querypb . ShowPartitionsResponse {
Status : status ,
2021-12-01 14:15:37 +00:00
} , nil
2021-08-02 14:39:25 +00:00
}
inMemoryPercentages = append ( inMemoryPercentages , ID2PartitionState [ id ] . InMemoryPercentage )
}
2021-06-15 04:41:40 +00:00
2022-04-26 03:29:54 +00:00
log . Info ( "show partitions end" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64s ( "partitionIDs" , req . PartitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Int64s ( "inMemoryPercentage" , inMemoryPercentages ) )
2021-06-19 03:45:09 +00:00
2021-04-15 07:15:46 +00:00
return & querypb . ShowPartitionsResponse {
2021-08-02 14:39:25 +00:00
Status : status ,
PartitionIDs : req . PartitionIDs ,
InMemoryPercentages : inMemoryPercentages ,
2021-04-15 07:15:46 +00:00
} , nil
}
2021-10-15 01:04:32 +00:00
// LoadPartitions loads all the sealed segments of this partition to queryNodes, and assigns watchDmChannelRequest to queryNodes
2021-06-22 08:44:09 +00:00
func ( qc * QueryCoord ) LoadPartitions ( ctx context . Context , req * querypb . LoadPartitionsRequest ) ( * commonpb . Status , error ) {
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . TotalLabel ) . Inc ( )
2021-04-15 07:15:46 +00:00
collectionID := req . CollectionID
partitionIDs := req . PartitionIDs
2021-12-15 14:11:09 +00:00
2022-04-26 03:29:54 +00:00
log . Info ( "loadPartitionRequest received" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-04-15 07:15:46 +00:00
status := & commonpb . Status {
2021-06-23 09:44:12 +00:00
ErrorCode : commonpb . ErrorCode_Success ,
2021-06-19 03:45:09 +00:00
}
2021-06-24 08:00:15 +00:00
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-12-15 14:11:09 +00:00
err := errors . New ( "QueryCoord is not healthy" )
2021-06-24 08:00:15 +00:00
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Error ( "load partition failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-06-24 08:00:15 +00:00
}
2021-04-15 07:15:46 +00:00
2021-12-15 14:11:09 +00:00
// if partitionIDs to load are empty, return error
2021-04-15 07:15:46 +00:00
if len ( partitionIDs ) == 0 {
2021-06-23 09:44:12 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-04-15 07:15:46 +00:00
err := errors . New ( "partitionIDs are empty" )
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Warn ( "loadPartitionRequest failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-04-15 07:15:46 +00:00
}
2021-12-15 14:11:09 +00:00
if collectionInfo , err := qc . meta . getCollectionInfoByID ( collectionID ) ; err == nil {
// if the collection has been loaded into memory by load collection request, return error
// should release collection first, then load partitions again
2022-03-14 01:50:01 +00:00
if collectionInfo . LoadType == querypb . LoadType_LoadCollection {
2021-12-15 14:11:09 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
err = fmt . Errorf ( "collection %d has been loaded into QueryNode, please release collection firstly" , collectionID )
status . Reason = err . Error ( )
}
if collectionInfo . LoadType == querypb . LoadType_LoadPartition {
2022-04-28 02:54:00 +00:00
if collectionInfo . ReplicaNumber != req . ReplicaNumber {
msg := fmt . Sprintf ( "partitions has already been loaded, and the number of replicas %v is not same as the request's %v. Should release first then reload with the new number of replicas" ,
collectionInfo . ReplicaNumber ,
req . ReplicaNumber )
log . Warn ( msg ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Int32 ( "collectionReplicaNumber" , collectionInfo . ReplicaNumber ) ,
zap . Int32 ( "requestReplicaNumber" , req . ReplicaNumber ) )
status . ErrorCode = commonpb . ErrorCode_IllegalArgument
status . Reason = msg
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
return status , nil
}
2021-12-15 14:11:09 +00:00
for _ , toLoadPartitionID := range partitionIDs {
needLoad := true
for _ , loadedPartitionID := range collectionInfo . PartitionIDs {
if toLoadPartitionID == loadedPartitionID {
needLoad = false
break
}
2021-06-23 09:44:12 +00:00
}
2021-12-15 14:11:09 +00:00
if needLoad {
// if new partitions need to be loaded, return error
// should release partitions first, then load partitions again
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
err = fmt . Errorf ( "some partitions %v of collection %d has been loaded into QueryNode, please release partitions firstly" ,
collectionInfo . PartitionIDs , collectionID )
status . Reason = err . Error ( )
2021-06-23 09:44:12 +00:00
}
}
2021-06-19 03:45:09 +00:00
}
2021-12-15 14:11:09 +00:00
if status . ErrorCode != commonpb . ErrorCode_Success {
log . Warn ( "loadPartitionRequest failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-06-23 09:44:12 +00:00
return status , nil
2021-06-21 11:20:31 +00:00
}
2021-12-15 14:11:09 +00:00
2022-04-26 03:29:54 +00:00
log . Info ( "loadPartitionRequest completed, all partitions to load have already been loaded into memory" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . SuccessLabel ) . Inc ( )
2021-12-15 14:11:09 +00:00
return status , nil
2021-06-23 09:44:12 +00:00
}
2021-12-15 08:53:12 +00:00
baseTask := newBaseTask ( qc . loopCtx , querypb . TriggerCondition_GrpcRequest )
2021-10-18 13:34:47 +00:00
loadPartitionTask := & loadPartitionTask {
baseTask : baseTask ,
2021-06-23 09:44:12 +00:00
LoadPartitionsRequest : req ,
2022-02-08 13:57:46 +00:00
broker : qc . broker ,
2021-06-23 09:44:12 +00:00
cluster : qc . cluster ,
meta : qc . meta ,
}
2021-10-11 01:54:37 +00:00
err := qc . scheduler . Enqueue ( loadPartitionTask )
if err != nil {
2021-12-15 14:11:09 +00:00
log . Error ( "loadPartitionRequest failed to add execute task to scheduler" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2021-10-11 01:54:37 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-10-11 01:54:37 +00:00
}
2021-06-23 09:44:12 +00:00
2021-10-14 12:18:33 +00:00
err = loadPartitionTask . waitToFinish ( )
2021-06-23 09:44:12 +00:00
if err != nil {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Error ( "loadPartitionRequest failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordLoadCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-06-19 03:45:09 +00:00
}
2022-04-26 03:29:54 +00:00
log . Info ( "loadPartitionRequest completed" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-06-15 04:41:40 +00:00
return status , nil
2021-04-15 07:15:46 +00:00
}
2021-10-15 12:21:12 +00:00
// ReleasePartitions clears all data related to this partition on the querynode
2021-06-22 08:44:09 +00:00
func ( qc * QueryCoord ) ReleasePartitions ( ctx context . Context , req * querypb . ReleasePartitionsRequest ) ( * commonpb . Status , error ) {
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . TotalLabel ) . Inc ( )
2022-02-28 08:51:55 +00:00
2021-06-15 04:41:40 +00:00
//dbID := req.DbID
2021-04-15 07:15:46 +00:00
collectionID := req . CollectionID
partitionIDs := req . PartitionIDs
2022-04-26 03:29:54 +00:00
log . Info ( "releasePartitionRequest received" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-04-15 07:15:46 +00:00
status := & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
}
2021-06-24 08:00:15 +00:00
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-12-15 14:11:09 +00:00
err := errors . New ( "QueryCoord is not healthy" )
2021-06-24 08:00:15 +00:00
status . Reason = err . Error ( )
2022-04-26 03:29:54 +00:00
log . Warn ( "release partition failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-06-23 09:44:12 +00:00
return status , nil
2021-04-15 07:15:46 +00:00
}
2021-06-23 09:44:12 +00:00
if len ( partitionIDs ) == 0 {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
err := errors . New ( "partitionIDs are empty" )
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Warn ( "releasePartitionsRequest failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-06-23 09:44:12 +00:00
}
2021-04-15 07:15:46 +00:00
2021-12-15 14:11:09 +00:00
releaseCollection := true
var toReleasedPartitions [ ] UniqueID
if collectionInfo , err := qc . meta . getCollectionInfoByID ( collectionID ) ; err == nil {
// if collection has been loaded into memory by load collection request, return error
// part of the partitions released after load collection is temporarily not supported, and will be supported soon
2022-03-14 01:50:01 +00:00
if collectionInfo . LoadType == querypb . LoadType_LoadCollection {
2021-12-15 14:11:09 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
err := errors . New ( "releasing some partitions after load collection is not supported" )
status . Reason = err . Error ( )
log . Warn ( "releasePartitionsRequest failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-15 14:11:09 +00:00
return status , nil
2021-09-10 11:14:01 +00:00
}
2021-12-15 14:11:09 +00:00
for _ , partitionID := range collectionInfo . PartitionIDs {
toRelease := false
for _ , releasedPartitionID := range partitionIDs {
if partitionID == releasedPartitionID {
toRelease = true
toReleasedPartitions = append ( toReleasedPartitions , releasedPartitionID )
}
}
if ! toRelease {
releaseCollection = false
}
}
} else {
2022-04-26 03:29:54 +00:00
log . Info ( "release partitions end, the collection has not been loaded into QueryNode" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . SuccessLabel ) . Inc ( )
2021-12-15 14:11:09 +00:00
return status , nil
2021-09-10 11:14:01 +00:00
}
2021-12-15 14:11:09 +00:00
2021-09-10 11:14:01 +00:00
if len ( toReleasedPartitions ) == 0 {
2022-04-26 03:29:54 +00:00
log . Info ( "release partitions end, the partitions has not been loaded into QueryNode" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . SuccessLabel ) . Inc ( )
2021-09-10 11:14:01 +00:00
return status , nil
}
2021-12-15 14:11:09 +00:00
var releaseTask task
2021-12-15 08:53:12 +00:00
baseTask := newBaseTask ( qc . loopCtx , querypb . TriggerCondition_GrpcRequest )
2021-12-15 14:11:09 +00:00
if releaseCollection {
// if all loaded partitions will be released from memory, then upgrade release partitions request to release collection request
2022-04-26 03:29:54 +00:00
log . Info ( fmt . Sprintf ( "all partitions of collection %d will released from QueryNode, so release the collection directly" , collectionID ) ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
msgBase := req . Base
msgBase . MsgType = commonpb . MsgType_ReleaseCollection
releaseCollectionRequest := & querypb . ReleaseCollectionRequest {
Base : msgBase ,
CollectionID : req . CollectionID ,
}
releaseTask = & releaseCollectionTask {
baseTask : baseTask ,
ReleaseCollectionRequest : releaseCollectionRequest ,
cluster : qc . cluster ,
meta : qc . meta ,
2022-02-08 13:57:46 +00:00
broker : qc . broker ,
2021-12-15 14:11:09 +00:00
}
} else {
req . PartitionIDs = toReleasedPartitions
releaseTask = & releasePartitionTask {
baseTask : baseTask ,
ReleasePartitionsRequest : req ,
cluster : qc . cluster ,
2021-12-21 03:57:39 +00:00
meta : qc . meta ,
2021-12-15 14:11:09 +00:00
}
2021-06-23 09:44:12 +00:00
}
2021-12-15 14:11:09 +00:00
err := qc . scheduler . Enqueue ( releaseTask )
2021-10-11 01:54:37 +00:00
if err != nil {
2022-04-26 03:29:54 +00:00
log . Warn ( "releasePartitionRequest failed to add execute task to scheduler" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2021-10-11 01:54:37 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-10-11 01:54:37 +00:00
}
2021-06-23 09:44:12 +00:00
2021-12-15 14:11:09 +00:00
err = releaseTask . waitToFinish ( )
2021-06-23 09:44:12 +00:00
if err != nil {
2022-04-26 03:29:54 +00:00
log . Warn ( "releasePartitionRequest failed" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2021-06-23 09:44:12 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . FailLabel ) . Inc ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-04-15 07:15:46 +00:00
}
2021-12-15 14:11:09 +00:00
2022-04-26 03:29:54 +00:00
log . Info ( "releasePartitionRequest completed" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-08-02 14:39:25 +00:00
//qc.MetaReplica.printMeta()
2021-07-14 06:15:55 +00:00
//qc.cluster.printMeta()
2022-02-28 08:51:55 +00:00
2022-03-03 08:05:57 +00:00
metrics . QueryCoordReleaseCount . WithLabelValues ( metrics . SuccessLabel ) . Inc ( )
2022-02-28 08:51:55 +00:00
metrics . QueryCoordReleaseLatency . WithLabelValues ( ) . Observe ( float64 ( releaseTask . elapseSpan ( ) . Milliseconds ( ) ) )
2021-04-15 07:15:46 +00:00
return status , nil
}
2021-10-01 04:11:47 +00:00
// CreateQueryChannel assigns unique querychannel and resultchannel to the specified collecion
2021-06-22 08:44:09 +00:00
func ( qc * QueryCoord ) CreateQueryChannel ( ctx context . Context , req * querypb . CreateQueryChannelRequest ) ( * querypb . CreateQueryChannelResponse , error ) {
2021-12-15 14:11:09 +00:00
log . Debug ( "createQueryChannelRequest received" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) )
2021-06-24 08:00:15 +00:00
status := & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
}
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-12-15 14:11:09 +00:00
err := errors . New ( "QueryCoord is not healthy" )
2021-06-24 08:00:15 +00:00
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Error ( "createQueryChannel failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Error ( err ) )
2021-06-24 08:00:15 +00:00
return & querypb . CreateQueryChannelResponse {
Status : status ,
2021-12-01 14:15:37 +00:00
} , nil
2021-06-24 08:00:15 +00:00
}
2021-06-15 04:41:40 +00:00
collectionID := req . CollectionID
2021-12-21 05:50:54 +00:00
info := qc . meta . getQueryChannelInfoByID ( collectionID )
2021-12-15 14:11:09 +00:00
log . Debug ( "createQueryChannelRequest completed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , collectionID ) ,
zap . String ( "request channel" , info . QueryChannel ) ,
zap . String ( "result channel" , info . QueryResultChannel ) )
2021-06-15 04:41:40 +00:00
return & querypb . CreateQueryChannelResponse {
2021-12-15 08:53:12 +00:00
Status : status ,
QueryChannel : info . QueryChannel ,
QueryResultChannel : info . QueryResultChannel ,
2021-06-15 04:41:40 +00:00
} , nil
}
2021-10-01 04:19:41 +00:00
// GetPartitionStates returns state of the partition, including notExist, notPresent, onDisk, partitionInMemory, inMemory, partitionInGPU, InGPU
2021-06-22 08:44:09 +00:00
func ( qc * QueryCoord ) GetPartitionStates ( ctx context . Context , req * querypb . GetPartitionStatesRequest ) ( * querypb . GetPartitionStatesResponse , error ) {
2022-04-26 03:29:54 +00:00
log . Info ( "getPartitionStatesRequest received" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64s ( "partitionIDs" , req . PartitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-06-24 08:00:15 +00:00
status := & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
}
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-12-15 14:11:09 +00:00
err := errors . New ( "QueryCoord is not healthy" )
2021-06-24 08:00:15 +00:00
status . Reason = err . Error ( )
2022-04-26 03:29:54 +00:00
log . Warn ( "getPartitionStates failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2021-06-24 08:00:15 +00:00
return & querypb . GetPartitionStatesResponse {
Status : status ,
2021-12-01 14:15:37 +00:00
} , nil
2021-06-24 08:00:15 +00:00
}
2021-06-15 04:41:40 +00:00
partitionIDs := req . PartitionIDs
partitionStates := make ( [ ] * querypb . PartitionStates , 0 )
for _ , partitionID := range partitionIDs {
2021-08-02 14:39:25 +00:00
res , err := qc . meta . getPartitionStatesByID ( req . CollectionID , partitionID )
2021-04-15 07:15:46 +00:00
if err != nil {
2021-12-15 14:11:09 +00:00
err = fmt . Errorf ( "partition %d of collection %d has not been loaded into QueryNode" , partitionID , req . CollectionID )
2021-06-24 08:00:15 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Warn ( "getPartitionStatesRequest failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64 ( "partitionID" , partitionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2021-06-15 04:41:40 +00:00
return & querypb . GetPartitionStatesResponse {
2021-06-24 08:00:15 +00:00
Status : status ,
2021-12-01 14:15:37 +00:00
} , nil
2021-04-15 07:15:46 +00:00
}
2021-06-15 04:41:40 +00:00
partitionState := & querypb . PartitionStates {
PartitionID : partitionID ,
2021-08-02 14:39:25 +00:00
State : res . State ,
2021-06-15 04:41:40 +00:00
}
partitionStates = append ( partitionStates , partitionState )
2021-04-15 07:15:46 +00:00
}
2022-04-26 03:29:54 +00:00
log . Info ( "getPartitionStatesRequest completed" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64s ( "partitionIDs" , req . PartitionIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-06-15 04:41:40 +00:00
return & querypb . GetPartitionStatesResponse {
2021-06-24 08:00:15 +00:00
Status : status ,
2021-06-15 04:41:40 +00:00
PartitionDescriptions : partitionStates ,
2021-04-15 07:15:46 +00:00
} , nil
}
2021-10-02 03:15:55 +00:00
// GetSegmentInfo returns information of all the segments on queryNodes, and the information includes memSize, numRow, indexName, indexID ...
2021-06-22 08:44:09 +00:00
func ( qc * QueryCoord ) GetSegmentInfo ( ctx context . Context , req * querypb . GetSegmentInfoRequest ) ( * querypb . GetSegmentInfoResponse , error ) {
2022-04-26 03:29:54 +00:00
log . Info ( "getSegmentInfoRequest received" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
2022-05-05 13:17:50 +00:00
zap . Int64 ( "collection ID" , req . GetCollectionID ( ) ) ,
zap . Int64s ( "segment IDs" , req . GetSegmentIDs ( ) ) ,
zap . Int64 ( "msg ID" , req . GetBase ( ) . GetMsgID ( ) ) )
2021-12-15 14:11:09 +00:00
2021-06-24 08:00:15 +00:00
status := & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
}
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-12-15 14:11:09 +00:00
err := errors . New ( "QueryCoord is not healthy" )
2021-06-24 08:00:15 +00:00
status . Reason = err . Error ( )
2022-04-26 03:29:54 +00:00
log . Warn ( "getSegmentInfo failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2021-06-24 08:00:15 +00:00
return & querypb . GetSegmentInfoResponse {
Status : status ,
2021-12-01 14:15:37 +00:00
} , nil
2021-06-24 08:00:15 +00:00
}
2021-06-15 04:41:40 +00:00
totalMemSize := int64 ( 0 )
totalNumRows := int64 ( 0 )
2021-08-02 14:39:25 +00:00
//TODO::get segment infos from MetaReplica
2021-06-15 04:41:40 +00:00
//segmentIDs := req.SegmentIDs
2021-08-02 14:39:25 +00:00
//segmentInfos, err := qs.MetaReplica.getSegmentInfos(segmentIDs)
2021-06-22 08:44:09 +00:00
segmentInfos , err := qc . cluster . getSegmentInfo ( ctx , req )
2021-04-15 07:15:46 +00:00
if err != nil {
2021-06-24 08:00:15 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
2021-12-15 14:11:09 +00:00
log . Error ( "getSegmentInfoRequest failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64s ( "segmentIDs" , req . SegmentIDs ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2021-06-15 04:41:40 +00:00
return & querypb . GetSegmentInfoResponse {
2021-06-24 08:00:15 +00:00
Status : status ,
2021-12-01 14:15:37 +00:00
} , nil
2021-04-15 07:15:46 +00:00
}
2022-05-25 07:17:59 +00:00
2021-06-15 04:41:40 +00:00
for _ , info := range segmentInfos {
totalNumRows += info . NumRows
totalMemSize += info . MemSize
2021-04-15 07:15:46 +00:00
}
2022-04-26 03:29:54 +00:00
log . Info ( "getSegmentInfoRequest completed" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Int64 ( "num rows" , totalNumRows ) ,
zap . Int64 ( "memory size" , totalMemSize ) )
2021-04-15 07:15:46 +00:00
return & querypb . GetSegmentInfoResponse {
2021-06-24 08:00:15 +00:00
Status : status ,
Infos : segmentInfos ,
2021-04-15 07:15:46 +00:00
} , nil
}
2021-08-17 02:06:11 +00:00
2021-11-06 08:54:59 +00:00
// LoadBalance would do a load balancing operation between query nodes
func ( qc * QueryCoord ) LoadBalance ( ctx context . Context , req * querypb . LoadBalanceRequest ) ( * commonpb . Status , error ) {
2022-04-26 03:29:54 +00:00
log . Info ( "loadBalanceRequest received" ,
2021-12-13 02:15:26 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
2021-12-15 14:11:09 +00:00
zap . Int64s ( "source nodeIDs" , req . SourceNodeIDs ) ,
zap . Int64s ( "dst nodeIDs" , req . DstNodeIDs ) ,
zap . Int64s ( "balanced segments" , req . SealedSegmentIDs ) ,
2022-04-20 08:15:41 +00:00
zap . Int64 ( "collectionID" , req . CollectionID ) ,
2021-12-15 14:11:09 +00:00
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-11-06 08:54:59 +00:00
status := & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
}
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
2021-12-15 14:11:09 +00:00
err := errors . New ( "QueryCoord is not healthy" )
2021-11-06 08:54:59 +00:00
status . Reason = err . Error ( )
2022-04-26 03:29:54 +00:00
log . Warn ( "loadBalance failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2021-11-06 08:54:59 +00:00
return status , nil
}
2021-12-15 08:53:12 +00:00
baseTask := newBaseTask ( qc . loopCtx , querypb . TriggerCondition_LoadBalance )
2022-01-25 09:26:13 +00:00
req . BalanceReason = querypb . TriggerCondition_LoadBalance
2021-11-06 08:54:59 +00:00
loadBalanceTask := & loadBalanceTask {
baseTask : baseTask ,
LoadBalanceRequest : req ,
2022-02-08 13:57:46 +00:00
broker : qc . broker ,
2021-11-06 08:54:59 +00:00
cluster : qc . cluster ,
meta : qc . meta ,
}
err := qc . scheduler . Enqueue ( loadBalanceTask )
if err != nil {
2022-04-26 03:29:54 +00:00
log . Warn ( "loadBalanceRequest failed to add execute task to scheduler" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
2021-11-06 08:54:59 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
return status , nil
}
err = loadBalanceTask . waitToFinish ( )
if err != nil {
2022-04-26 03:29:54 +00:00
log . Warn ( "loadBalanceRequest failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2021-11-06 08:54:59 +00:00
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
status . Reason = err . Error ( )
2021-12-01 14:15:37 +00:00
return status , nil
2021-11-06 08:54:59 +00:00
}
2021-12-15 14:11:09 +00:00
2022-04-26 03:29:54 +00:00
log . Info ( "loadBalanceRequest completed" ,
2021-12-13 02:15:26 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
2021-12-15 14:11:09 +00:00
zap . Int64s ( "source nodeIDs" , req . SourceNodeIDs ) ,
zap . Int64s ( "dst nodeIDs" , req . DstNodeIDs ) ,
zap . Int64s ( "balanced segments" , req . SealedSegmentIDs ) ,
2022-04-20 08:15:41 +00:00
zap . Int64 ( "collectionID" , req . CollectionID ) ,
2021-12-15 14:11:09 +00:00
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-11-06 08:54:59 +00:00
2021-12-15 14:11:09 +00:00
return status , nil
2021-08-17 02:06:11 +00:00
}
2021-10-02 01:42:23 +00:00
// GetMetrics returns all the queryCoord's metrics
2021-08-17 02:06:11 +00:00
func ( qc * QueryCoord ) GetMetrics ( ctx context . Context , req * milvuspb . GetMetricsRequest ) ( * milvuspb . GetMetricsResponse , error ) {
2021-12-15 14:11:09 +00:00
log . Debug ( "getMetricsRequest received" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . String ( "req" , req . Request ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-08-17 02:06:11 +00:00
2021-12-15 14:11:09 +00:00
getMetricsResponse := & milvuspb . GetMetricsResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
} ,
2022-04-24 14:03:44 +00:00
ComponentName : metricsinfo . ConstructComponentName ( typeutil . QueryCoordRole , Params . QueryCoordCfg . GetNodeID ( ) ) ,
2021-12-15 14:11:09 +00:00
}
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
err := errors . New ( "QueryCoord is not healthy" )
getMetricsResponse . Status . Reason = err . Error ( )
2022-04-26 03:29:54 +00:00
log . Warn ( "getMetrics failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2021-12-15 14:11:09 +00:00
return getMetricsResponse , nil
2021-08-17 02:06:11 +00:00
}
metricType , err := metricsinfo . ParseMetricType ( req . Request )
if err != nil {
2021-12-15 14:11:09 +00:00
getMetricsResponse . Status . Reason = err . Error ( )
2022-04-26 03:29:54 +00:00
log . Warn ( "getMetrics failed to parse metric type" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
2021-08-17 02:06:11 +00:00
zap . Error ( err ) )
2021-12-15 14:11:09 +00:00
return getMetricsResponse , nil
2021-08-17 02:06:11 +00:00
}
2021-12-15 14:11:09 +00:00
log . Debug ( "getMetrics" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
2021-08-17 02:06:11 +00:00
zap . String ( "metric_type" , metricType ) )
if metricType == metricsinfo . SystemInfoMetrics {
2021-09-03 09:15:26 +00:00
ret , err := qc . metricsCacheManager . GetSystemInfoMetrics ( )
if err == nil && ret != nil {
2021-12-15 14:11:09 +00:00
log . Debug ( "getMetrics completed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-09-03 09:15:26 +00:00
return ret , nil
}
2021-12-15 14:11:09 +00:00
2021-09-03 09:15:26 +00:00
log . Debug ( "failed to get system info metrics from cache, recompute instead" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
2021-09-03 09:15:26 +00:00
2021-08-17 02:06:11 +00:00
metrics , err := getSystemInfoMetrics ( ctx , req , qc )
2021-12-15 14:11:09 +00:00
if err != nil {
log . Error ( "getSystemInfoMetrics failed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
getMetricsResponse . Status . Reason = err . Error ( )
return getMetricsResponse , nil
}
2021-08-17 02:06:11 +00:00
2021-12-15 14:11:09 +00:00
// get metric success, the set the status.ErrorCode to success
getMetricsResponse . Response = metrics
qc . metricsCacheManager . UpdateSystemInfoMetrics ( getMetricsResponse )
log . Debug ( "getMetrics completed" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
2021-08-17 02:06:11 +00:00
zap . String ( "req" , req . Request ) ,
2021-12-15 14:11:09 +00:00
zap . Int64 ( "msgID" , req . Base . MsgID ) )
getMetricsResponse . Status . ErrorCode = commonpb . ErrorCode_Success
return getMetricsResponse , nil
2021-08-17 02:06:11 +00:00
}
2021-09-10 11:14:01 +00:00
err = errors . New ( metricsinfo . MsgUnimplementedMetric )
2021-12-15 14:11:09 +00:00
getMetricsResponse . Status . Reason = err . Error ( )
2022-04-26 03:29:54 +00:00
log . Warn ( "getMetrics failed" ,
2021-12-15 14:11:09 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
2021-08-17 02:06:11 +00:00
zap . String ( "req" , req . Request ) ,
2021-12-15 14:11:09 +00:00
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
2021-09-10 11:14:01 +00:00
zap . Error ( err ) )
2021-08-17 02:06:11 +00:00
2021-12-15 14:11:09 +00:00
return getMetricsResponse , nil
2021-08-17 02:06:11 +00:00
}
2022-03-31 08:39:29 +00:00
2022-04-01 08:15:29 +00:00
// GetReplicas gets replicas of a certain collection
2022-04-06 06:57:31 +00:00
func ( qc * QueryCoord ) GetReplicas ( ctx context . Context , req * milvuspb . GetReplicasRequest ) ( * milvuspb . GetReplicasResponse , error ) {
2022-04-26 03:29:54 +00:00
log . Info ( "GetReplicas received" ,
2022-04-20 08:15:41 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
status := & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
}
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
err := errors . New ( "QueryCoord is not healthy" )
status . Reason = err . Error ( )
2022-04-26 03:29:54 +00:00
log . Warn ( "GetReplicasResponse failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2022-04-20 08:15:41 +00:00
return & milvuspb . GetReplicasResponse {
Status : status ,
} , nil
}
replicas , err := qc . meta . getReplicasByCollectionID ( req . CollectionID )
if err != nil {
status . ErrorCode = commonpb . ErrorCode_MetaFailed
status . Reason = err . Error ( )
2022-04-26 03:29:54 +00:00
log . Warn ( "GetReplicasResponse failed to get replicas" ,
2022-04-20 08:15:41 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
return & milvuspb . GetReplicasResponse {
Status : status ,
} , nil
}
if req . WithShardNodes {
2022-04-27 08:27:46 +00:00
shardNodes := getShardNodes ( req . CollectionID , qc . meta )
2022-04-20 08:15:41 +00:00
for _ , replica := range replicas {
for _ , shard := range replica . ShardReplicas {
2022-04-24 09:37:46 +00:00
shard . NodeIds = append ( shard . NodeIds , shard . LeaderID )
2022-04-24 03:35:45 +00:00
nodes := shardNodes [ shard . DmChannelName ]
for _ , nodeID := range replica . NodeIds {
2022-04-24 09:37:46 +00:00
if _ , ok := nodes [ nodeID ] ; ok && nodeID != shard . LeaderID {
2022-04-24 03:35:45 +00:00
shard . NodeIds = append ( shard . NodeIds , nodeID )
}
2022-04-20 08:15:41 +00:00
}
}
}
}
2022-04-26 03:29:54 +00:00
log . Info ( "GetReplicas finished" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Any ( "replicas" , replicas ) )
2022-04-06 06:57:31 +00:00
return & milvuspb . GetReplicasResponse {
2022-04-20 08:15:41 +00:00
Status : status ,
Replicas : replicas ,
2022-04-01 08:15:29 +00:00
} , nil
}
2022-03-31 08:39:29 +00:00
// GetShardLeaders gets shard leaders of a certain collection
func ( qc * QueryCoord ) GetShardLeaders ( ctx context . Context , req * querypb . GetShardLeadersRequest ) ( * querypb . GetShardLeadersResponse , error ) {
2022-04-26 03:29:54 +00:00
log . Info ( "GetShardLeaders received" ,
2022-04-20 08:15:41 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) )
status := & commonpb . Status {
ErrorCode : commonpb . ErrorCode_Success ,
}
if qc . stateCode . Load ( ) != internalpb . StateCode_Healthy {
status . ErrorCode = commonpb . ErrorCode_UnexpectedError
err := errors . New ( "QueryCoord is not healthy" )
status . Reason = err . Error ( )
2022-04-26 03:29:54 +00:00
log . Warn ( "GetShardLeadersResponse failed" , zap . String ( "role" , typeutil . QueryCoordRole ) , zap . Int64 ( "msgID" , req . Base . MsgID ) , zap . Error ( err ) )
2022-04-20 08:15:41 +00:00
return & querypb . GetShardLeadersResponse {
Status : status ,
} , nil
}
replicas , err := qc . meta . getReplicasByCollectionID ( req . CollectionID )
if err != nil {
status . ErrorCode = commonpb . ErrorCode_MetaFailed
status . Reason = err . Error ( )
2022-04-26 03:29:54 +00:00
log . Warn ( "GetShardLeadersResponse failed to get replicas" ,
2022-04-20 08:15:41 +00:00
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Int64 ( "msgID" , req . Base . MsgID ) ,
zap . Error ( err ) )
return & querypb . GetShardLeadersResponse {
Status : status ,
} , nil
}
shards := make ( map [ string ] * querypb . ShardLeadersList )
2022-04-27 08:27:46 +00:00
shardNodes := getShardNodes ( req . CollectionID , qc . meta )
2022-04-20 08:15:41 +00:00
for _ , replica := range replicas {
for _ , shard := range replica . ShardReplicas {
list , ok := shards [ shard . DmChannelName ]
if ! ok {
list = & querypb . ShardLeadersList {
ChannelName : shard . DmChannelName ,
NodeIds : make ( [ ] int64 , 0 ) ,
NodeAddrs : make ( [ ] string , 0 ) ,
}
}
2022-04-27 08:27:46 +00:00
isShardAvailable , err := qc . cluster . isOnline ( shard . LeaderID )
if err != nil || ! isShardAvailable {
log . Warn ( "shard leader is unavailable" ,
zap . Int64 ( "collectionID" , replica . CollectionID ) ,
zap . Int64 ( "replicaID" , replica . ReplicaID ) ,
zap . String ( "DmChannel" , shard . DmChannelName ) ,
zap . Int64 ( "shardLeaderID" , shard . LeaderID ) ,
zap . Error ( err ) )
continue
}
nodes := shardNodes [ shard . DmChannelName ]
for _ , nodeID := range replica . NodeIds {
if _ , ok := nodes [ nodeID ] ; ok {
if ok , err := qc . cluster . isOnline ( nodeID ) ; err != nil || ! ok {
isShardAvailable = false
break
}
}
}
if isShardAvailable {
list . NodeIds = append ( list . NodeIds , shard . LeaderID )
list . NodeAddrs = append ( list . NodeAddrs , shard . LeaderAddr )
shards [ shard . DmChannelName ] = list
}
2022-04-20 08:15:41 +00:00
}
}
shardLeaderLists := make ( [ ] * querypb . ShardLeadersList , 0 , len ( shards ) )
for _ , shard := range shards {
shardLeaderLists = append ( shardLeaderLists , shard )
}
2022-05-10 11:47:53 +00:00
// all replicas are not available
if len ( shardLeaderLists ) == 0 {
return & querypb . GetShardLeadersResponse {
Status : & commonpb . Status {
ErrorCode : commonpb . ErrorCode_UnexpectedError ,
Reason : "no replica available" ,
} ,
} , nil
}
2022-04-26 03:29:54 +00:00
log . Info ( "GetShardLeaders finished" ,
zap . String ( "role" , typeutil . QueryCoordRole ) ,
zap . Int64 ( "collectionID" , req . CollectionID ) ,
zap . Any ( "replicas" , shardLeaderLists ) )
2022-03-31 08:39:29 +00:00
return & querypb . GetShardLeadersResponse {
2022-04-20 08:15:41 +00:00
Status : status ,
Shards : shardLeaderLists ,
2022-03-31 08:39:29 +00:00
} , nil
}