mirror of https://github.com/milvus-io/milvus.git
parent
863444cc45
commit
c63086fefa
|
@ -29,6 +29,7 @@ import (
|
||||||
"github.com/milvus-io/milvus/internal/proto/milvuspb"
|
"github.com/milvus-io/milvus/internal/proto/milvuspb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -45,6 +46,7 @@ type Cluster interface {
|
||||||
getNumSegments(nodeID int64) (int, error)
|
getNumSegments(nodeID int64) (int, error)
|
||||||
|
|
||||||
watchDmChannels(ctx context.Context, nodeID int64, in *querypb.WatchDmChannelsRequest) error
|
watchDmChannels(ctx context.Context, nodeID int64, in *querypb.WatchDmChannelsRequest) error
|
||||||
|
//TODO:: removeDmChannel
|
||||||
getNumDmChannels(nodeID int64) (int, error)
|
getNumDmChannels(nodeID int64) (int, error)
|
||||||
|
|
||||||
hasWatchedQueryChannel(ctx context.Context, nodeID int64, collectionID UniqueID) bool
|
hasWatchedQueryChannel(ctx context.Context, nodeID int64, collectionID UniqueID) bool
|
||||||
|
@ -55,31 +57,51 @@ type Cluster interface {
|
||||||
releasePartitions(ctx context.Context, nodeID int64, in *querypb.ReleasePartitionsRequest) error
|
releasePartitions(ctx context.Context, nodeID int64, in *querypb.ReleasePartitionsRequest) error
|
||||||
getSegmentInfo(ctx context.Context, in *querypb.GetSegmentInfoRequest) ([]*querypb.SegmentInfo, error)
|
getSegmentInfo(ctx context.Context, in *querypb.GetSegmentInfoRequest) ([]*querypb.SegmentInfo, error)
|
||||||
|
|
||||||
registerNode(ctx context.Context, session *sessionutil.Session, id UniqueID) error
|
registerNode(ctx context.Context, session *sessionutil.Session, id UniqueID, state nodeState) error
|
||||||
getNodeByID(nodeID int64) (Node, error)
|
getNodeByID(nodeID int64) (Node, error)
|
||||||
removeNodeInfo(nodeID int64) error
|
removeNodeInfo(nodeID int64) error
|
||||||
stopNode(nodeID int64)
|
stopNode(nodeID int64)
|
||||||
onServiceNodes() (map[int64]Node, error)
|
onlineNodes() (map[int64]Node, error)
|
||||||
isOnService(nodeID int64) (bool, error)
|
isOnline(nodeID int64) (bool, error)
|
||||||
|
offlineNodes() (map[int64]Node, error)
|
||||||
|
|
||||||
printMeta()
|
getSessionVersion() int64
|
||||||
|
|
||||||
|
getMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest) []queryNodeGetMetricsResponse
|
||||||
}
|
}
|
||||||
|
|
||||||
type newQueryNodeFn func(ctx context.Context, address string, id UniqueID, kv *etcdkv.EtcdKV) (Node, error)
|
type newQueryNodeFn func(ctx context.Context, address string, id UniqueID, kv *etcdkv.EtcdKV) (Node, error)
|
||||||
|
|
||||||
|
type nodeState int
|
||||||
|
|
||||||
|
const (
|
||||||
|
disConnect nodeState = 0
|
||||||
|
online nodeState = 1
|
||||||
|
offline nodeState = 2
|
||||||
|
)
|
||||||
|
|
||||||
type queryNodeCluster struct {
|
type queryNodeCluster struct {
|
||||||
|
ctx context.Context
|
||||||
|
cancel context.CancelFunc
|
||||||
client *etcdkv.EtcdKV
|
client *etcdkv.EtcdKV
|
||||||
|
|
||||||
|
session *sessionutil.Session
|
||||||
|
sessionVersion int64
|
||||||
|
|
||||||
sync.RWMutex
|
sync.RWMutex
|
||||||
clusterMeta Meta
|
clusterMeta Meta
|
||||||
nodes map[int64]Node
|
nodes map[int64]Node
|
||||||
newNodeFn newQueryNodeFn
|
newNodeFn newQueryNodeFn
|
||||||
}
|
}
|
||||||
|
|
||||||
func newQueryNodeCluster(clusterMeta Meta, kv *etcdkv.EtcdKV, newNodeFn newQueryNodeFn) (*queryNodeCluster, error) {
|
func newQueryNodeCluster(ctx context.Context, clusterMeta Meta, kv *etcdkv.EtcdKV, newNodeFn newQueryNodeFn, session *sessionutil.Session) (*queryNodeCluster, error) {
|
||||||
|
childCtx, cancel := context.WithCancel(ctx)
|
||||||
nodes := make(map[int64]Node)
|
nodes := make(map[int64]Node)
|
||||||
c := &queryNodeCluster{
|
c := &queryNodeCluster{
|
||||||
|
ctx: childCtx,
|
||||||
|
cancel: cancel,
|
||||||
client: kv,
|
client: kv,
|
||||||
|
session: session,
|
||||||
clusterMeta: clusterMeta,
|
clusterMeta: clusterMeta,
|
||||||
nodes: nodes,
|
nodes: nodes,
|
||||||
newNodeFn: newNodeFn,
|
newNodeFn: newNodeFn,
|
||||||
|
@ -93,30 +115,55 @@ func newQueryNodeCluster(clusterMeta Meta, kv *etcdkv.EtcdKV, newNodeFn newQuery
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *queryNodeCluster) reloadFromKV() error {
|
func (c *queryNodeCluster) reloadFromKV() error {
|
||||||
nodeIDs := make([]UniqueID, 0)
|
toLoadMetaNodeIDs := make([]int64, 0)
|
||||||
keys, values, err := c.client.LoadWithPrefix(queryNodeInfoPrefix)
|
// get current online session
|
||||||
|
onlineNodeSessions, version, _ := c.session.GetSessions(typeutil.QueryNodeRole)
|
||||||
|
onlineSessionMap := make(map[int64]*sessionutil.Session)
|
||||||
|
for _, session := range onlineNodeSessions {
|
||||||
|
nodeID := session.ServerID
|
||||||
|
onlineSessionMap[nodeID] = session
|
||||||
|
}
|
||||||
|
for nodeID, session := range onlineSessionMap {
|
||||||
|
log.Debug("ReloadFromKV: register a queryNode to cluster", zap.Any("nodeID", nodeID))
|
||||||
|
err := c.registerNode(c.ctx, session, nodeID, disConnect)
|
||||||
|
if err != nil {
|
||||||
|
log.Error("query node failed to register", zap.Int64("nodeID", nodeID), zap.String("error info", err.Error()))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
toLoadMetaNodeIDs = append(toLoadMetaNodeIDs, nodeID)
|
||||||
|
}
|
||||||
|
c.sessionVersion = version
|
||||||
|
|
||||||
|
// load node information before power off from etcd
|
||||||
|
oldStringNodeIDs, oldNodeSessions, err := c.client.LoadWithPrefix(queryNodeInfoPrefix)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
log.Error("reloadFromKV: get previous node info from etcd error", zap.Error(err))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
for index := range keys {
|
for index := range oldStringNodeIDs {
|
||||||
nodeID, err := strconv.ParseInt(filepath.Base(keys[index]), 10, 64)
|
nodeID, err := strconv.ParseInt(filepath.Base(oldStringNodeIDs[index]), 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
log.Error("WatchNodeLoop: parse nodeID error", zap.Error(err))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if _, ok := onlineSessionMap[nodeID]; !ok {
|
||||||
session := &sessionutil.Session{}
|
session := &sessionutil.Session{}
|
||||||
err = json.Unmarshal([]byte(values[index]), session)
|
err = json.Unmarshal([]byte(oldNodeSessions[index]), session)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
log.Error("WatchNodeLoop: unmarshal session error", zap.Error(err))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = c.registerNode(context.Background(), session, nodeID, offline)
|
||||||
|
if err != nil {
|
||||||
|
log.Debug("ReloadFromKV: failed to add queryNode to cluster", zap.Int64("nodeID", nodeID), zap.String("error info", err.Error()))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
toLoadMetaNodeIDs = append(toLoadMetaNodeIDs, nodeID)
|
||||||
}
|
}
|
||||||
err = c.registerNode(context.Background(), session, nodeID)
|
|
||||||
if err != nil {
|
|
||||||
log.Debug("ReloadFromKV: failed to add queryNode to cluster", zap.Int64("nodeID", nodeID), zap.String("error info", err.Error()))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
nodeIDs = append(nodeIDs, nodeID)
|
|
||||||
}
|
}
|
||||||
for _, nodeID := range nodeIDs {
|
|
||||||
|
// load collection meta of queryNode from etcd
|
||||||
|
for _, nodeID := range toLoadMetaNodeIDs {
|
||||||
infoPrefix := fmt.Sprintf("%s/%d", queryNodeMetaPrefix, nodeID)
|
infoPrefix := fmt.Sprintf("%s/%d", queryNodeMetaPrefix, nodeID)
|
||||||
_, collectionValues, err := c.client.LoadWithPrefix(infoPrefix)
|
_, collectionValues, err := c.client.LoadWithPrefix(infoPrefix)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -138,11 +185,15 @@ func (c *queryNodeCluster) reloadFromKV() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *queryNodeCluster) getSessionVersion() int64 {
|
||||||
|
return c.sessionVersion
|
||||||
|
}
|
||||||
|
|
||||||
func (c *queryNodeCluster) getComponentInfos(ctx context.Context) ([]*internalpb.ComponentInfo, error) {
|
func (c *queryNodeCluster) getComponentInfos(ctx context.Context) ([]*internalpb.ComponentInfo, error) {
|
||||||
c.RLock()
|
c.RLock()
|
||||||
defer c.RUnlock()
|
defer c.RUnlock()
|
||||||
subComponentInfos := make([]*internalpb.ComponentInfo, 0)
|
subComponentInfos := make([]*internalpb.ComponentInfo, 0)
|
||||||
nodes, err := c.getOnServiceNodes()
|
nodes, err := c.getOnlineNodes()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug("GetComponentInfos: failed get on service nodes", zap.String("error info", err.Error()))
|
log.Debug("GetComponentInfos: failed get on service nodes", zap.String("error info", err.Error()))
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -204,7 +255,7 @@ func (c *queryNodeCluster) releaseSegments(ctx context.Context, nodeID int64, in
|
||||||
defer c.Unlock()
|
defer c.Unlock()
|
||||||
|
|
||||||
if node, ok := c.nodes[nodeID]; ok {
|
if node, ok := c.nodes[nodeID]; ok {
|
||||||
if !node.isOnService() {
|
if !node.isOnline() {
|
||||||
return errors.New("node offline")
|
return errors.New("node offline")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -416,7 +467,7 @@ func (c *queryNodeCluster) getNumSegments(nodeID int64) (int, error) {
|
||||||
return numSegment, nil
|
return numSegment, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *queryNodeCluster) registerNode(ctx context.Context, session *sessionutil.Session, id UniqueID) error {
|
func (c *queryNodeCluster) registerNode(ctx context.Context, session *sessionutil.Session, id UniqueID, state nodeState) error {
|
||||||
c.Lock()
|
c.Lock()
|
||||||
defer c.Unlock()
|
defer c.Unlock()
|
||||||
|
|
||||||
|
@ -431,23 +482,17 @@ func (c *queryNodeCluster) registerNode(ctx context.Context, session *sessionuti
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
c.nodes[id], err = c.newNodeFn(ctx, session.Address, id, c.client)
|
node, err := c.newNodeFn(ctx, session.Address, id, c.client)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug("RegisterNode: create a new query node failed", zap.Int64("nodeID", id), zap.Error(err))
|
log.Debug("RegisterNode: create a new query node failed", zap.Int64("nodeID", id), zap.Error(err))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
node.setState(state)
|
||||||
|
if state < online {
|
||||||
|
go node.start()
|
||||||
|
}
|
||||||
|
c.nodes[id] = node
|
||||||
log.Debug("RegisterNode: create a new query node", zap.Int64("nodeID", id), zap.String("address", session.Address))
|
log.Debug("RegisterNode: create a new query node", zap.Int64("nodeID", id), zap.String("address", session.Address))
|
||||||
|
|
||||||
go func() {
|
|
||||||
err = c.nodes[id].start()
|
|
||||||
if err != nil {
|
|
||||||
log.Error("RegisterNode: start queryNode client failed", zap.Int64("nodeID", id), zap.String("error", err.Error()))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
log.Debug("RegisterNode: start queryNode success, print cluster MetaReplica info", zap.Int64("nodeID", id))
|
|
||||||
c.printMeta()
|
|
||||||
}()
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return fmt.Errorf("RegisterNode: node %d alredy exists in cluster", id)
|
return fmt.Errorf("RegisterNode: node %d alredy exists in cluster", id)
|
||||||
|
@ -496,56 +541,77 @@ func (c *queryNodeCluster) stopNode(nodeID int64) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *queryNodeCluster) onServiceNodes() (map[int64]Node, error) {
|
func (c *queryNodeCluster) onlineNodes() (map[int64]Node, error) {
|
||||||
c.RLock()
|
c.RLock()
|
||||||
defer c.RUnlock()
|
defer c.RUnlock()
|
||||||
|
|
||||||
return c.getOnServiceNodes()
|
return c.getOnlineNodes()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *queryNodeCluster) getOnServiceNodes() (map[int64]Node, error) {
|
func (c *queryNodeCluster) getOnlineNodes() (map[int64]Node, error) {
|
||||||
nodes := make(map[int64]Node)
|
nodes := make(map[int64]Node)
|
||||||
for nodeID, node := range c.nodes {
|
for nodeID, node := range c.nodes {
|
||||||
if node.isOnService() {
|
if node.isOnline() {
|
||||||
nodes[nodeID] = node
|
nodes[nodeID] = node
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(nodes) == 0 {
|
if len(nodes) == 0 {
|
||||||
return nil, errors.New("GetOnServiceNodes: no queryNode is alive")
|
return nil, errors.New("GetOnlineNodes: no queryNode is alive")
|
||||||
}
|
}
|
||||||
|
|
||||||
return nodes, nil
|
return nodes, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *queryNodeCluster) isOnService(nodeID int64) (bool, error) {
|
func (c *queryNodeCluster) offlineNodes() (map[int64]Node, error) {
|
||||||
|
c.RLock()
|
||||||
|
defer c.RUnlock()
|
||||||
|
|
||||||
|
return c.getOfflineNodes()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *queryNodeCluster) getOfflineNodes() (map[int64]Node, error) {
|
||||||
|
nodes := make(map[int64]Node)
|
||||||
|
for nodeID, node := range c.nodes {
|
||||||
|
if node.isOffline() {
|
||||||
|
nodes[nodeID] = node
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(nodes) == 0 {
|
||||||
|
return nil, errors.New("GetOfflineNodes: no queryNode is offline")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nodes, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *queryNodeCluster) isOnline(nodeID int64) (bool, error) {
|
||||||
c.Lock()
|
c.Lock()
|
||||||
defer c.Unlock()
|
defer c.Unlock()
|
||||||
|
|
||||||
if node, ok := c.nodes[nodeID]; ok {
|
if node, ok := c.nodes[nodeID]; ok {
|
||||||
return node.isOnService(), nil
|
return node.isOnline(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return false, fmt.Errorf("IsOnService: query node %d not exist", nodeID)
|
return false, fmt.Errorf("IsOnService: query node %d not exist", nodeID)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *queryNodeCluster) printMeta() {
|
//func (c *queryNodeCluster) printMeta() {
|
||||||
c.RLock()
|
// c.RLock()
|
||||||
defer c.RUnlock()
|
// defer c.RUnlock()
|
||||||
|
//
|
||||||
for id, node := range c.nodes {
|
// for id, node := range c.nodes {
|
||||||
if node.isOnService() {
|
// if node.isOnline() {
|
||||||
collectionInfos := node.showCollections()
|
// collectionInfos := node.showCollections()
|
||||||
for _, info := range collectionInfos {
|
// for _, info := range collectionInfos {
|
||||||
log.Debug("PrintMeta: query coordinator cluster info: collectionInfo", zap.Int64("nodeID", id), zap.Int64("collectionID", info.CollectionID), zap.Any("info", info))
|
// log.Debug("PrintMeta: query coordinator cluster info: collectionInfo", zap.Int64("nodeID", id), zap.Int64("collectionID", info.CollectionID), zap.Any("info", info))
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
queryChannelInfos := node.showWatchedQueryChannels()
|
// queryChannelInfos := node.showWatchedQueryChannels()
|
||||||
for _, info := range queryChannelInfos {
|
// for _, info := range queryChannelInfos {
|
||||||
log.Debug("PrintMeta: query coordinator cluster info: watchedQueryChannelInfo", zap.Int64("nodeID", id), zap.Int64("collectionID", info.CollectionID), zap.Any("info", info))
|
// log.Debug("PrintMeta: query coordinator cluster info: watchedQueryChannelInfo", zap.Int64("nodeID", id), zap.Int64("collectionID", info.CollectionID), zap.Any("info", info))
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
//}
|
||||||
|
|
||||||
func (c *queryNodeCluster) getCollectionInfosByID(ctx context.Context, nodeID int64) []*querypb.CollectionInfo {
|
func (c *queryNodeCluster) getCollectionInfosByID(ctx context.Context, nodeID int64) []*querypb.CollectionInfo {
|
||||||
c.RLock()
|
c.RLock()
|
||||||
|
|
|
@ -24,6 +24,7 @@ import (
|
||||||
"github.com/milvus-io/milvus/internal/log"
|
"github.com/milvus-io/milvus/internal/log"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestQueryNodeCluster_getMetrics(t *testing.T) {
|
func TestQueryNodeCluster_getMetrics(t *testing.T) {
|
||||||
|
@ -31,38 +32,163 @@ func TestQueryNodeCluster_getMetrics(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestReloadClusterFromKV(t *testing.T) {
|
func TestReloadClusterFromKV(t *testing.T) {
|
||||||
|
t.Run("Test LoadOnlineNodes", func(t *testing.T) {
|
||||||
|
refreshParams()
|
||||||
|
baseCtx := context.Background()
|
||||||
|
kv, err := etcdkv.NewEtcdKV(Params.EtcdEndpoints, Params.MetaRootPath)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
clusterSession := sessionutil.NewSession(context.Background(), Params.MetaRootPath, Params.EtcdEndpoints)
|
||||||
|
clusterSession.Init(typeutil.QueryCoordRole, Params.Address, true)
|
||||||
|
cluster := &queryNodeCluster{
|
||||||
|
ctx: baseCtx,
|
||||||
|
client: kv,
|
||||||
|
nodes: make(map[int64]Node),
|
||||||
|
newNodeFn: newQueryNodeTest,
|
||||||
|
session: clusterSession,
|
||||||
|
}
|
||||||
|
|
||||||
|
queryNode, err := startQueryNodeServer(baseCtx)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
cluster.reloadFromKV()
|
||||||
|
|
||||||
|
nodeID := queryNode.queryNodeID
|
||||||
|
for {
|
||||||
|
_, err = cluster.getNodeByID(nodeID)
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
queryNode.stop()
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Test LoadOfflineNodes", func(t *testing.T) {
|
||||||
|
refreshParams()
|
||||||
|
kv, err := etcdkv.NewEtcdKV(Params.EtcdEndpoints, Params.MetaRootPath)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
clusterSession := sessionutil.NewSession(context.Background(), Params.MetaRootPath, Params.EtcdEndpoints)
|
||||||
|
clusterSession.Init(typeutil.QueryCoordRole, Params.Address, true)
|
||||||
|
cluster := &queryNodeCluster{
|
||||||
|
client: kv,
|
||||||
|
nodes: make(map[int64]Node),
|
||||||
|
newNodeFn: newQueryNodeTest,
|
||||||
|
session: clusterSession,
|
||||||
|
}
|
||||||
|
|
||||||
|
kvs := make(map[string]string)
|
||||||
|
session := &sessionutil.Session{
|
||||||
|
ServerID: 100,
|
||||||
|
Address: "localhost",
|
||||||
|
}
|
||||||
|
sessionBlob, err := json.Marshal(session)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
sessionKey := fmt.Sprintf("%s/%d", queryNodeInfoPrefix, 100)
|
||||||
|
kvs[sessionKey] = string(sessionBlob)
|
||||||
|
|
||||||
|
collectionInfo := &querypb.CollectionInfo{
|
||||||
|
CollectionID: defaultCollectionID,
|
||||||
|
}
|
||||||
|
collectionBlobs := proto.MarshalTextString(collectionInfo)
|
||||||
|
nodeKey := fmt.Sprintf("%s/%d", queryNodeMetaPrefix, 100)
|
||||||
|
kvs[nodeKey] = collectionBlobs
|
||||||
|
|
||||||
|
err = kv.MultiSave(kvs)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
cluster.reloadFromKV()
|
||||||
|
|
||||||
|
assert.Equal(t, 1, len(cluster.nodes))
|
||||||
|
collection := cluster.getCollectionInfosByID(context.Background(), 100)
|
||||||
|
assert.Equal(t, defaultCollectionID, collection[0].CollectionID)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGrpcRequest(t *testing.T) {
|
||||||
refreshParams()
|
refreshParams()
|
||||||
|
baseCtx, cancel := context.WithCancel(context.Background())
|
||||||
kv, err := etcdkv.NewEtcdKV(Params.EtcdEndpoints, Params.MetaRootPath)
|
kv, err := etcdkv.NewEtcdKV(Params.EtcdEndpoints, Params.MetaRootPath)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
clusterSession := sessionutil.NewSession(context.Background(), Params.MetaRootPath, Params.EtcdEndpoints)
|
||||||
|
clusterSession.Init(typeutil.QueryCoordRole, Params.Address, true)
|
||||||
|
meta, err := newMeta(kv)
|
||||||
|
assert.Nil(t, err)
|
||||||
cluster := &queryNodeCluster{
|
cluster := &queryNodeCluster{
|
||||||
client: kv,
|
ctx: baseCtx,
|
||||||
nodes: make(map[int64]Node),
|
cancel: cancel,
|
||||||
newNodeFn: newQueryNodeTest,
|
client: kv,
|
||||||
|
clusterMeta: meta,
|
||||||
|
nodes: make(map[int64]Node),
|
||||||
|
newNodeFn: newQueryNodeTest,
|
||||||
|
session: clusterSession,
|
||||||
}
|
}
|
||||||
|
|
||||||
kvs := make(map[string]string)
|
node, err := startQueryNodeServer(baseCtx)
|
||||||
session := &sessionutil.Session{
|
|
||||||
ServerID: 100,
|
|
||||||
Address: "localhost",
|
|
||||||
}
|
|
||||||
sessionBlob, err := json.Marshal(session)
|
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
sessionKey := fmt.Sprintf("%s/%d", queryNodeInfoPrefix, 100)
|
nodeSession := node.session
|
||||||
kvs[sessionKey] = string(sessionBlob)
|
nodeID := node.queryNodeID
|
||||||
|
cluster.registerNode(baseCtx, nodeSession, nodeID, disConnect)
|
||||||
|
|
||||||
collectionInfo := &querypb.CollectionInfo{
|
for {
|
||||||
CollectionID: defaultCollectionID,
|
online, err := cluster.isOnline(nodeID)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
if online {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
collectionBlobs := proto.MarshalTextString(collectionInfo)
|
|
||||||
nodeKey := fmt.Sprintf("%s/%d", queryNodeMetaPrefix, 100)
|
|
||||||
kvs[nodeKey] = collectionBlobs
|
|
||||||
|
|
||||||
err = kv.MultiSave(kvs)
|
t.Run("Test GetComponentInfos", func(t *testing.T) {
|
||||||
assert.Nil(t, err)
|
_, err := cluster.getComponentInfos(baseCtx)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
cluster.reloadFromKV()
|
t.Run("Test LoadSegments", func(t *testing.T) {
|
||||||
|
segmentLoadInfo := &querypb.SegmentLoadInfo{
|
||||||
|
SegmentID: defaultSegmentID,
|
||||||
|
PartitionID: defaultPartitionID,
|
||||||
|
CollectionID: defaultCollectionID,
|
||||||
|
}
|
||||||
|
loadSegmentReq := &querypb.LoadSegmentsRequest{
|
||||||
|
NodeID: nodeID,
|
||||||
|
Infos: []*querypb.SegmentLoadInfo{segmentLoadInfo},
|
||||||
|
}
|
||||||
|
err := cluster.loadSegments(baseCtx, nodeID, loadSegmentReq)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
assert.Equal(t, 1, len(cluster.nodes))
|
t.Run("Test ReleaseSegments", func(t *testing.T) {
|
||||||
collection := cluster.getCollectionInfosByID(context.Background(), 100)
|
releaseSegmentReq := &querypb.ReleaseSegmentsRequest{
|
||||||
assert.Equal(t, defaultCollectionID, collection[0].CollectionID)
|
NodeID: nodeID,
|
||||||
|
CollectionID: defaultCollectionID,
|
||||||
|
PartitionIDs: []UniqueID{defaultPartitionID},
|
||||||
|
SegmentIDs: []UniqueID{defaultSegmentID},
|
||||||
|
}
|
||||||
|
err := cluster.releaseSegments(baseCtx, nodeID, releaseSegmentReq)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Test AddQueryChannel", func(t *testing.T) {
|
||||||
|
reqChannel, resChannel := cluster.clusterMeta.GetQueryChannel(defaultCollectionID)
|
||||||
|
addQueryChannelReq := &querypb.AddQueryChannelRequest{
|
||||||
|
NodeID: nodeID,
|
||||||
|
CollectionID: defaultCollectionID,
|
||||||
|
RequestChannelID: reqChannel,
|
||||||
|
ResultChannelID: resChannel,
|
||||||
|
}
|
||||||
|
err := cluster.addQueryChannel(baseCtx, nodeID, addQueryChannelReq)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Test RemoveQueryChannel", func(t *testing.T) {
|
||||||
|
reqChannel, resChannel := cluster.clusterMeta.GetQueryChannel(defaultCollectionID)
|
||||||
|
removeQueryChannelReq := &querypb.RemoveQueryChannelRequest{
|
||||||
|
NodeID: nodeID,
|
||||||
|
CollectionID: defaultCollectionID,
|
||||||
|
RequestChannelID: reqChannel,
|
||||||
|
ResultChannelID: resChannel,
|
||||||
|
}
|
||||||
|
err := cluster.removeQueryChannel(baseCtx, nodeID, removeQueryChannelReq)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
node.stop()
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,13 @@
|
||||||
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||||
|
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||||
package querycoord
|
package querycoord
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -16,6 +26,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestGrpcTask(t *testing.T) {
|
func TestGrpcTask(t *testing.T) {
|
||||||
|
refreshParams()
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
queryCoord, err := startQueryCoord(ctx)
|
queryCoord, err := startQueryCoord(ctx)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
@ -321,6 +332,7 @@ func TestGrpcTask(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLoadBalanceTask(t *testing.T) {
|
func TestLoadBalanceTask(t *testing.T) {
|
||||||
|
refreshParams()
|
||||||
baseCtx := context.Background()
|
baseCtx := context.Background()
|
||||||
|
|
||||||
queryCoord, err := startQueryCoord(baseCtx)
|
queryCoord, err := startQueryCoord(baseCtx)
|
||||||
|
|
|
@ -1,3 +1,13 @@
|
||||||
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||||
|
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||||
package querycoord
|
package querycoord
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -43,7 +53,6 @@ func newQueryNodeTest(ctx context.Context, address string, id UniqueID, kv *etcd
|
||||||
kvClient: kv,
|
kvClient: kv,
|
||||||
collectionInfos: collectionInfo,
|
collectionInfos: collectionInfo,
|
||||||
watchedQueryChannels: watchedChannels,
|
watchedQueryChannels: watchedChannels,
|
||||||
onService: false,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return node, nil
|
return node, nil
|
||||||
|
@ -80,7 +89,10 @@ func (client *queryNodeClientMock) Start() error {
|
||||||
|
|
||||||
func (client *queryNodeClientMock) Stop() error {
|
func (client *queryNodeClientMock) Stop() error {
|
||||||
client.cancel()
|
client.cancel()
|
||||||
return client.conn.Close()
|
if client.conn != nil {
|
||||||
|
return client.conn.Close()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (client *queryNodeClientMock) Register() error {
|
func (client *queryNodeClientMock) Register() error {
|
||||||
|
|
|
@ -1,3 +1,13 @@
|
||||||
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||||
|
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||||
package querycoord
|
package querycoord
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -11,6 +21,7 @@ import (
|
||||||
|
|
||||||
"github.com/milvus-io/milvus/internal/log"
|
"github.com/milvus-io/milvus/internal/log"
|
||||||
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
||||||
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/milvuspb"
|
"github.com/milvus-io/milvus/internal/proto/milvuspb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/internal/util/funcutil"
|
"github.com/milvus-io/milvus/internal/util/funcutil"
|
||||||
|
@ -31,12 +42,13 @@ type queryNodeServerMock struct {
|
||||||
queryNodePort int64
|
queryNodePort int64
|
||||||
queryNodeID int64
|
queryNodeID int64
|
||||||
|
|
||||||
addQueryChannels func() (*commonpb.Status, error)
|
addQueryChannels func() (*commonpb.Status, error)
|
||||||
watchDmChannels func() (*commonpb.Status, error)
|
removeQueryChannels func() (*commonpb.Status, error)
|
||||||
loadSegment func() (*commonpb.Status, error)
|
watchDmChannels func() (*commonpb.Status, error)
|
||||||
releaseCollection func() (*commonpb.Status, error)
|
loadSegment func() (*commonpb.Status, error)
|
||||||
releasePartition func() (*commonpb.Status, error)
|
releaseCollection func() (*commonpb.Status, error)
|
||||||
releaseSegment func() (*commonpb.Status, error)
|
releasePartition func() (*commonpb.Status, error)
|
||||||
|
releaseSegments func() (*commonpb.Status, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func newQueryNodeServerMock(ctx context.Context) *queryNodeServerMock {
|
func newQueryNodeServerMock(ctx context.Context) *queryNodeServerMock {
|
||||||
|
@ -46,12 +58,13 @@ func newQueryNodeServerMock(ctx context.Context) *queryNodeServerMock {
|
||||||
cancel: cancel,
|
cancel: cancel,
|
||||||
grpcErrChan: make(chan error),
|
grpcErrChan: make(chan error),
|
||||||
|
|
||||||
addQueryChannels: returnSuccessResult,
|
addQueryChannels: returnSuccessResult,
|
||||||
watchDmChannels: returnSuccessResult,
|
removeQueryChannels: returnSuccessResult,
|
||||||
loadSegment: returnSuccessResult,
|
watchDmChannels: returnSuccessResult,
|
||||||
releaseCollection: returnSuccessResult,
|
loadSegment: returnSuccessResult,
|
||||||
releasePartition: returnSuccessResult,
|
releaseCollection: returnSuccessResult,
|
||||||
releaseSegment: returnSuccessResult,
|
releasePartition: returnSuccessResult,
|
||||||
|
releaseSegments: returnSuccessResult,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,7 +96,7 @@ func (qs *queryNodeServerMock) init() error {
|
||||||
grpcPort = 0
|
grpcPort = 0
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}, retry.Attempts(10))
|
}, retry.Attempts(2))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
qs.grpcErrChan <- err
|
qs.grpcErrChan <- err
|
||||||
}
|
}
|
||||||
|
@ -133,10 +146,22 @@ func (qs *queryNodeServerMock) run() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (qs *queryNodeServerMock) GetComponentStates(ctx context.Context, req *internalpb.GetComponentStatesRequest) (*internalpb.ComponentStates, error) {
|
||||||
|
return &internalpb.ComponentStates{
|
||||||
|
Status: &commonpb.Status{
|
||||||
|
ErrorCode: commonpb.ErrorCode_Success,
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (qs *queryNodeServerMock) AddQueryChannel(ctx context.Context, req *querypb.AddQueryChannelRequest) (*commonpb.Status, error) {
|
func (qs *queryNodeServerMock) AddQueryChannel(ctx context.Context, req *querypb.AddQueryChannelRequest) (*commonpb.Status, error) {
|
||||||
return qs.addQueryChannels()
|
return qs.addQueryChannels()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (qs *queryNodeServerMock) RemoveQueryChannel(ctx context.Context, req *querypb.RemoveQueryChannelRequest) (*commonpb.Status, error) {
|
||||||
|
return qs.removeQueryChannels()
|
||||||
|
}
|
||||||
|
|
||||||
func (qs *queryNodeServerMock) WatchDmChannels(ctx context.Context, req *querypb.WatchDmChannelsRequest) (*commonpb.Status, error) {
|
func (qs *queryNodeServerMock) WatchDmChannels(ctx context.Context, req *querypb.WatchDmChannelsRequest) (*commonpb.Status, error) {
|
||||||
return qs.watchDmChannels()
|
return qs.watchDmChannels()
|
||||||
}
|
}
|
||||||
|
@ -154,7 +179,7 @@ func (qs *queryNodeServerMock) ReleasePartitions(ctx context.Context, req *query
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qs *queryNodeServerMock) ReleaseSegments(ctx context.Context, req *querypb.ReleaseSegmentsRequest) (*commonpb.Status, error) {
|
func (qs *queryNodeServerMock) ReleaseSegments(ctx context.Context, req *querypb.ReleaseSegmentsRequest) (*commonpb.Status, error) {
|
||||||
return qs.releaseSegment()
|
return qs.releaseSegments()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qs *queryNodeServerMock) GetSegmentInfo(context.Context, *querypb.GetSegmentInfoRequest) (*querypb.GetSegmentInfoResponse, error) {
|
func (qs *queryNodeServerMock) GetSegmentInfo(context.Context, *querypb.GetSegmentInfoRequest) (*querypb.GetSegmentInfoResponse, error) {
|
||||||
|
|
|
@ -20,8 +20,6 @@ import (
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus/internal/util/metricsinfo"
|
|
||||||
|
|
||||||
"github.com/golang/protobuf/proto"
|
"github.com/golang/protobuf/proto"
|
||||||
"go.etcd.io/etcd/api/v3/mvccpb"
|
"go.etcd.io/etcd/api/v3/mvccpb"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
@ -33,6 +31,7 @@ import (
|
||||||
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/internal/types"
|
"github.com/milvus-io/milvus/internal/types"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/metricsinfo"
|
||||||
"github.com/milvus-io/milvus/internal/util/retry"
|
"github.com/milvus-io/milvus/internal/util/retry"
|
||||||
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
||||||
"github.com/milvus-io/milvus/internal/util/typeutil"
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
||||||
|
@ -53,7 +52,7 @@ type QueryCoord struct {
|
||||||
|
|
||||||
queryCoordID uint64
|
queryCoordID uint64
|
||||||
meta Meta
|
meta Meta
|
||||||
cluster *queryNodeCluster
|
cluster Cluster
|
||||||
newNodeFn newQueryNodeFn
|
newNodeFn newQueryNodeFn
|
||||||
scheduler *TaskScheduler
|
scheduler *TaskScheduler
|
||||||
|
|
||||||
|
@ -103,7 +102,7 @@ func (qc *QueryCoord) Init() error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
qc.cluster, err = newQueryNodeCluster(qc.meta, qc.kvClient, qc.newNodeFn)
|
qc.cluster, err = newQueryNodeCluster(qc.loopCtx, qc.meta, qc.kvClient, qc.newNodeFn, qc.session)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("query coordinator init cluster failed", zap.Error(err))
|
log.Error("query coordinator init cluster failed", zap.Error(err))
|
||||||
return err
|
return err
|
||||||
|
@ -189,50 +188,37 @@ func (qc *QueryCoord) watchNodeLoop() {
|
||||||
defer qc.loopWg.Done()
|
defer qc.loopWg.Done()
|
||||||
log.Debug("query coordinator start watch node loop")
|
log.Debug("query coordinator start watch node loop")
|
||||||
|
|
||||||
clusterStartSession, version, _ := qc.session.GetSessions(typeutil.QueryNodeRole)
|
offlineNodes, err := qc.cluster.offlineNodes()
|
||||||
sessionMap := make(map[int64]*sessionutil.Session)
|
if err == nil {
|
||||||
for _, session := range clusterStartSession {
|
offlineNodeIDs := make([]int64, 0)
|
||||||
nodeID := session.ServerID
|
for id := range offlineNodes {
|
||||||
sessionMap[nodeID] = session
|
offlineNodeIDs = append(offlineNodeIDs, id)
|
||||||
}
|
}
|
||||||
for nodeID, session := range sessionMap {
|
loadBalanceSegment := &querypb.LoadBalanceRequest{
|
||||||
if _, ok := qc.cluster.nodes[nodeID]; !ok {
|
Base: &commonpb.MsgBase{
|
||||||
serverID := session.ServerID
|
MsgType: commonpb.MsgType_LoadBalanceSegments,
|
||||||
log.Debug("start add a queryNode to cluster", zap.Any("nodeID", serverID))
|
SourceID: qc.session.ServerID,
|
||||||
err := qc.cluster.registerNode(ctx, session, serverID)
|
},
|
||||||
if err != nil {
|
SourceNodeIDs: offlineNodeIDs,
|
||||||
log.Error("query node failed to register", zap.Int64("nodeID", serverID), zap.String("error info", err.Error()))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
for nodeID := range qc.cluster.nodes {
|
|
||||||
if _, ok := sessionMap[nodeID]; !ok {
|
|
||||||
qc.cluster.stopNode(nodeID)
|
|
||||||
loadBalanceSegment := &querypb.LoadBalanceRequest{
|
|
||||||
Base: &commonpb.MsgBase{
|
|
||||||
MsgType: commonpb.MsgType_LoadBalanceSegments,
|
|
||||||
SourceID: qc.session.ServerID,
|
|
||||||
},
|
|
||||||
SourceNodeIDs: []int64{nodeID},
|
|
||||||
}
|
|
||||||
|
|
||||||
loadBalanceTask := &LoadBalanceTask{
|
loadBalanceTask := &LoadBalanceTask{
|
||||||
BaseTask: BaseTask{
|
BaseTask: BaseTask{
|
||||||
ctx: qc.loopCtx,
|
ctx: qc.loopCtx,
|
||||||
Condition: NewTaskCondition(qc.loopCtx),
|
Condition: NewTaskCondition(qc.loopCtx),
|
||||||
triggerCondition: querypb.TriggerCondition_nodeDown,
|
triggerCondition: querypb.TriggerCondition_nodeDown,
|
||||||
},
|
},
|
||||||
LoadBalanceRequest: loadBalanceSegment,
|
LoadBalanceRequest: loadBalanceSegment,
|
||||||
rootCoord: qc.rootCoordClient,
|
rootCoord: qc.rootCoordClient,
|
||||||
dataCoord: qc.dataCoordClient,
|
dataCoord: qc.dataCoordClient,
|
||||||
cluster: qc.cluster,
|
cluster: qc.cluster,
|
||||||
meta: qc.meta,
|
meta: qc.meta,
|
||||||
}
|
|
||||||
qc.scheduler.Enqueue([]task{loadBalanceTask})
|
|
||||||
}
|
}
|
||||||
|
qc.scheduler.Enqueue([]task{loadBalanceTask})
|
||||||
|
log.Debug("start a loadBalance task", zap.Any("task", loadBalanceTask))
|
||||||
}
|
}
|
||||||
|
|
||||||
qc.eventChan = qc.session.WatchServices(typeutil.QueryNodeRole, version+1)
|
qc.eventChan = qc.session.WatchServices(typeutil.QueryNodeRole, qc.cluster.getSessionVersion()+1)
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
|
@ -242,7 +228,7 @@ func (qc *QueryCoord) watchNodeLoop() {
|
||||||
case sessionutil.SessionAddEvent:
|
case sessionutil.SessionAddEvent:
|
||||||
serverID := event.Session.ServerID
|
serverID := event.Session.ServerID
|
||||||
log.Debug("start add a queryNode to cluster", zap.Any("nodeID", serverID))
|
log.Debug("start add a queryNode to cluster", zap.Any("nodeID", serverID))
|
||||||
err := qc.cluster.registerNode(ctx, event.Session, serverID)
|
err := qc.cluster.registerNode(ctx, event.Session, serverID, disConnect)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("query node failed to register", zap.Int64("nodeID", serverID), zap.String("error info", err.Error()))
|
log.Error("query node failed to register", zap.Int64("nodeID", serverID), zap.String("error info", err.Error()))
|
||||||
}
|
}
|
||||||
|
@ -279,6 +265,7 @@ func (qc *QueryCoord) watchNodeLoop() {
|
||||||
meta: qc.meta,
|
meta: qc.meta,
|
||||||
}
|
}
|
||||||
qc.scheduler.Enqueue([]task{loadBalanceTask})
|
qc.scheduler.Enqueue([]task{loadBalanceTask})
|
||||||
|
log.Debug("start a loadBalance task", zap.Any("task", loadBalanceTask))
|
||||||
qc.metricsCacheManager.InvalidateSystemInfoMetrics()
|
qc.metricsCacheManager.InvalidateSystemInfoMetrics()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,17 +13,21 @@ package querycoord
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"go.uber.org/zap"
|
"github.com/golang/protobuf/proto"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus/internal/log"
|
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
||||||
"github.com/milvus-io/milvus/internal/msgstream"
|
"github.com/milvus-io/milvus/internal/msgstream"
|
||||||
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
|
"github.com/milvus-io/milvus/internal/util/sessionutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
func setup() {
|
func setup() {
|
||||||
|
@ -46,29 +50,158 @@ func TestMain(m *testing.M) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewQueryCoordTest(ctx context.Context, factory msgstream.Factory) (*QueryCoord, error) {
|
func NewQueryCoordTest(ctx context.Context, factory msgstream.Factory) (*QueryCoord, error) {
|
||||||
refreshParams()
|
queryCoord, err := NewQueryCoord(ctx, factory)
|
||||||
rand.Seed(time.Now().UnixNano())
|
if err != nil {
|
||||||
queryChannels := make([]*queryChannelInfo, 0)
|
return nil, err
|
||||||
channelID := len(queryChannels)
|
}
|
||||||
searchPrefix := Params.SearchChannelPrefix
|
queryCoord.newNodeFn = newQueryNodeTest
|
||||||
searchResultPrefix := Params.SearchResultChannelPrefix
|
return queryCoord, nil
|
||||||
allocatedQueryChannel := searchPrefix + "-" + strconv.FormatInt(int64(channelID), 10)
|
}
|
||||||
allocatedQueryResultChannel := searchResultPrefix + "-" + strconv.FormatInt(int64(channelID), 10)
|
|
||||||
|
|
||||||
queryChannels = append(queryChannels, &queryChannelInfo{
|
func startQueryCoord(ctx context.Context) (*QueryCoord, error) {
|
||||||
requestChannel: allocatedQueryChannel,
|
factory := msgstream.NewPmsFactory()
|
||||||
responseChannel: allocatedQueryResultChannel,
|
|
||||||
})
|
|
||||||
|
|
||||||
ctx1, cancel := context.WithCancel(ctx)
|
coord, err := NewQueryCoordTest(ctx, factory)
|
||||||
service := &QueryCoord{
|
if err != nil {
|
||||||
loopCtx: ctx1,
|
return nil, err
|
||||||
loopCancel: cancel,
|
|
||||||
msFactory: factory,
|
|
||||||
newNodeFn: newQueryNodeTest,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
service.UpdateStateCode(internalpb.StateCode_Abnormal)
|
rootCoord := newRootCoordMock()
|
||||||
log.Debug("query coordinator", zap.Any("queryChannels", queryChannels))
|
rootCoord.createCollection(defaultCollectionID)
|
||||||
return service, nil
|
rootCoord.createPartition(defaultCollectionID, defaultPartitionID)
|
||||||
|
|
||||||
|
dataCoord, err := newDataCoordMock(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
coord.SetRootCoord(rootCoord)
|
||||||
|
coord.SetDataCoord(dataCoord)
|
||||||
|
|
||||||
|
err = coord.Register()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = coord.Init()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = coord.Start()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return coord, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func startUnHealthyQueryCoord(ctx context.Context) (*QueryCoord, error) {
|
||||||
|
factory := msgstream.NewPmsFactory()
|
||||||
|
|
||||||
|
coord, err := NewQueryCoordTest(ctx, factory)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
rootCoord := newRootCoordMock()
|
||||||
|
rootCoord.createCollection(defaultCollectionID)
|
||||||
|
rootCoord.createPartition(defaultCollectionID, defaultPartitionID)
|
||||||
|
|
||||||
|
dataCoord, err := newDataCoordMock(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
coord.SetRootCoord(rootCoord)
|
||||||
|
coord.SetDataCoord(dataCoord)
|
||||||
|
|
||||||
|
err = coord.Register()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = coord.Init()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return coord, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWatchNodeLoop(t *testing.T) {
|
||||||
|
baseCtx := context.Background()
|
||||||
|
|
||||||
|
t.Run("Test OfflineNodes", func(t *testing.T) {
|
||||||
|
refreshParams()
|
||||||
|
kv, err := etcdkv.NewEtcdKV(Params.EtcdEndpoints, Params.MetaRootPath)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
kvs := make(map[string]string)
|
||||||
|
session := &sessionutil.Session{
|
||||||
|
ServerID: 100,
|
||||||
|
Address: "localhost",
|
||||||
|
}
|
||||||
|
sessionBlob, err := json.Marshal(session)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
sessionKey := fmt.Sprintf("%s/%d", queryNodeInfoPrefix, 100)
|
||||||
|
kvs[sessionKey] = string(sessionBlob)
|
||||||
|
|
||||||
|
collectionInfo := &querypb.CollectionInfo{
|
||||||
|
CollectionID: defaultCollectionID,
|
||||||
|
}
|
||||||
|
collectionBlobs := proto.MarshalTextString(collectionInfo)
|
||||||
|
nodeKey := fmt.Sprintf("%s/%d", queryNodeMetaPrefix, 100)
|
||||||
|
kvs[nodeKey] = collectionBlobs
|
||||||
|
|
||||||
|
err = kv.MultiSave(kvs)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
queryCoord, err := startQueryCoord(baseCtx)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
for {
|
||||||
|
_, err = queryCoord.cluster.offlineNodes()
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
queryCoord.Stop()
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Test RegisterNewNode", func(t *testing.T) {
|
||||||
|
refreshParams()
|
||||||
|
queryCoord, err := startQueryCoord(baseCtx)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
queryNode1, err := startQueryNodeServer(baseCtx)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
nodeID := queryNode1.queryNodeID
|
||||||
|
for {
|
||||||
|
_, err = queryCoord.cluster.getNodeByID(nodeID)
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
queryCoord.Stop()
|
||||||
|
queryNode1.stop()
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Test RemoveNode", func(t *testing.T) {
|
||||||
|
refreshParams()
|
||||||
|
queryNode1, err := startQueryNodeServer(baseCtx)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
queryCoord, err := startQueryCoord(baseCtx)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
nodeID := queryNode1.queryNodeID
|
||||||
|
queryNode1.stop()
|
||||||
|
for {
|
||||||
|
_, err = queryCoord.cluster.getNodeByID(nodeID)
|
||||||
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
queryCoord.Stop()
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,8 +17,6 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/milvuspb"
|
|
||||||
|
|
||||||
"github.com/golang/protobuf/proto"
|
"github.com/golang/protobuf/proto"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
|
||||||
|
@ -27,6 +25,7 @@ import (
|
||||||
"github.com/milvus-io/milvus/internal/log"
|
"github.com/milvus-io/milvus/internal/log"
|
||||||
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
||||||
|
"github.com/milvus-io/milvus/internal/proto/milvuspb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/schemapb"
|
"github.com/milvus-io/milvus/internal/proto/schemapb"
|
||||||
"github.com/milvus-io/milvus/internal/types"
|
"github.com/milvus-io/milvus/internal/types"
|
||||||
|
@ -46,15 +45,17 @@ type Node interface {
|
||||||
releasePartitions(ctx context.Context, in *querypb.ReleasePartitionsRequest) error
|
releasePartitions(ctx context.Context, in *querypb.ReleasePartitionsRequest) error
|
||||||
|
|
||||||
watchDmChannels(ctx context.Context, in *querypb.WatchDmChannelsRequest) error
|
watchDmChannels(ctx context.Context, in *querypb.WatchDmChannelsRequest) error
|
||||||
removeDmChannel(collectionID UniqueID, channels []string) error
|
//removeDmChannel(collectionID UniqueID, channels []string) error
|
||||||
|
|
||||||
hasWatchedQueryChannel(collectionID UniqueID) bool
|
hasWatchedQueryChannel(collectionID UniqueID) bool
|
||||||
showWatchedQueryChannels() []*querypb.QueryChannelInfo
|
//showWatchedQueryChannels() []*querypb.QueryChannelInfo
|
||||||
addQueryChannel(ctx context.Context, in *querypb.AddQueryChannelRequest) error
|
addQueryChannel(ctx context.Context, in *querypb.AddQueryChannelRequest) error
|
||||||
removeQueryChannel(ctx context.Context, in *querypb.RemoveQueryChannelRequest) error
|
removeQueryChannel(ctx context.Context, in *querypb.RemoveQueryChannelRequest) error
|
||||||
|
|
||||||
setNodeState(onService bool)
|
setState(state nodeState)
|
||||||
isOnService() bool
|
getState() nodeState
|
||||||
|
isOnline() bool
|
||||||
|
isOffline() bool
|
||||||
|
|
||||||
getSegmentInfo(ctx context.Context, in *querypb.GetSegmentInfoRequest) (*querypb.GetSegmentInfoResponse, error)
|
getSegmentInfo(ctx context.Context, in *querypb.GetSegmentInfoRequest) (*querypb.GetSegmentInfoResponse, error)
|
||||||
loadSegments(ctx context.Context, in *querypb.LoadSegmentsRequest) error
|
loadSegments(ctx context.Context, in *querypb.LoadSegmentsRequest) error
|
||||||
|
@ -75,8 +76,8 @@ type queryNode struct {
|
||||||
sync.RWMutex
|
sync.RWMutex
|
||||||
collectionInfos map[UniqueID]*querypb.CollectionInfo
|
collectionInfos map[UniqueID]*querypb.CollectionInfo
|
||||||
watchedQueryChannels map[UniqueID]*querypb.QueryChannelInfo
|
watchedQueryChannels map[UniqueID]*querypb.QueryChannelInfo
|
||||||
onService bool
|
state nodeState
|
||||||
serviceLock sync.RWMutex
|
stateLock sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func newQueryNode(ctx context.Context, address string, id UniqueID, kv *etcdkv.EtcdKV) (Node, error) {
|
func newQueryNode(ctx context.Context, address string, id UniqueID, kv *etcdkv.EtcdKV) (Node, error) {
|
||||||
|
@ -97,7 +98,7 @@ func newQueryNode(ctx context.Context, address string, id UniqueID, kv *etcdkv.E
|
||||||
kvClient: kv,
|
kvClient: kv,
|
||||||
collectionInfos: collectionInfo,
|
collectionInfos: collectionInfo,
|
||||||
watchedQueryChannels: watchedChannels,
|
watchedQueryChannels: watchedChannels,
|
||||||
onService: false,
|
state: disConnect,
|
||||||
}
|
}
|
||||||
|
|
||||||
return node, nil
|
return node, nil
|
||||||
|
@ -105,23 +106,27 @@ func newQueryNode(ctx context.Context, address string, id UniqueID, kv *etcdkv.E
|
||||||
|
|
||||||
func (qn *queryNode) start() error {
|
func (qn *queryNode) start() error {
|
||||||
if err := qn.client.Init(); err != nil {
|
if err := qn.client.Init(); err != nil {
|
||||||
|
log.Error("Start: init queryNode client failed", zap.Int64("nodeID", qn.id), zap.String("error", err.Error()))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := qn.client.Start(); err != nil {
|
if err := qn.client.Start(); err != nil {
|
||||||
|
log.Error("Start: start queryNode client failed", zap.Int64("nodeID", qn.id), zap.String("error", err.Error()))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
qn.serviceLock.Lock()
|
qn.stateLock.Lock()
|
||||||
qn.onService = true
|
if qn.state < online {
|
||||||
qn.serviceLock.Unlock()
|
qn.state = online
|
||||||
|
}
|
||||||
|
qn.stateLock.Unlock()
|
||||||
log.Debug("Start: queryNode client start success", zap.Int64("nodeID", qn.id), zap.String("address", qn.address))
|
log.Debug("Start: queryNode client start success", zap.Int64("nodeID", qn.id), zap.String("address", qn.address))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) stop() {
|
func (qn *queryNode) stop() {
|
||||||
qn.serviceLock.Lock()
|
qn.stateLock.Lock()
|
||||||
defer qn.serviceLock.Unlock()
|
defer qn.stateLock.Unlock()
|
||||||
qn.onService = false
|
qn.state = offline
|
||||||
if qn.client != nil {
|
if qn.client != nil {
|
||||||
qn.client.Stop()
|
qn.client.Stop()
|
||||||
}
|
}
|
||||||
|
@ -272,37 +277,37 @@ func (qn *queryNode) addDmChannel(collectionID UniqueID, channels []string) erro
|
||||||
return errors.New("AddDmChannels: can't find collection in watchedQueryChannel")
|
return errors.New("AddDmChannels: can't find collection in watchedQueryChannel")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) removeDmChannel(collectionID UniqueID, channels []string) error {
|
//func (qn *queryNode) removeDmChannel(collectionID UniqueID, channels []string) error {
|
||||||
qn.Lock()
|
// qn.Lock()
|
||||||
defer qn.Unlock()
|
// defer qn.Unlock()
|
||||||
|
//
|
||||||
if info, ok := qn.collectionInfos[collectionID]; ok {
|
// if info, ok := qn.collectionInfos[collectionID]; ok {
|
||||||
for _, channelInfo := range info.ChannelInfos {
|
// for _, channelInfo := range info.ChannelInfos {
|
||||||
if channelInfo.NodeIDLoaded == qn.id {
|
// if channelInfo.NodeIDLoaded == qn.id {
|
||||||
newChannelIDs := make([]string, 0)
|
// newChannelIDs := make([]string, 0)
|
||||||
for _, channelID := range channelInfo.ChannelIDs {
|
// for _, channelID := range channelInfo.ChannelIDs {
|
||||||
findChannel := false
|
// findChannel := false
|
||||||
for _, channel := range channels {
|
// for _, channel := range channels {
|
||||||
if channelID == channel {
|
// if channelID == channel {
|
||||||
findChannel = true
|
// findChannel = true
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
if !findChannel {
|
// if !findChannel {
|
||||||
newChannelIDs = append(newChannelIDs, channelID)
|
// newChannelIDs = append(newChannelIDs, channelID)
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
channelInfo.ChannelIDs = newChannelIDs
|
// channelInfo.ChannelIDs = newChannelIDs
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
err := saveNodeCollectionInfo(collectionID, info, qn.id, qn.kvClient)
|
// err := saveNodeCollectionInfo(collectionID, info, qn.id, qn.kvClient)
|
||||||
if err != nil {
|
// if err != nil {
|
||||||
log.Error("RemoveDmChannel: save collectionInfo error", zap.Any("error", err.Error()), zap.Int64("collectionID", collectionID))
|
// log.Error("RemoveDmChannel: save collectionInfo error", zap.Any("error", err.Error()), zap.Int64("collectionID", collectionID))
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
return errors.New("RemoveDmChannel: can't find collection in watchedQueryChannel")
|
// return errors.New("RemoveDmChannel: can't find collection in watchedQueryChannel")
|
||||||
}
|
//}
|
||||||
|
|
||||||
func (qn *queryNode) hasWatchedQueryChannel(collectionID UniqueID) bool {
|
func (qn *queryNode) hasWatchedQueryChannel(collectionID UniqueID) bool {
|
||||||
qn.RLock()
|
qn.RLock()
|
||||||
|
@ -315,17 +320,17 @@ func (qn *queryNode) hasWatchedQueryChannel(collectionID UniqueID) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) showWatchedQueryChannels() []*querypb.QueryChannelInfo {
|
//func (qn *queryNode) showWatchedQueryChannels() []*querypb.QueryChannelInfo {
|
||||||
qn.RLock()
|
// qn.RLock()
|
||||||
defer qn.RUnlock()
|
// defer qn.RUnlock()
|
||||||
|
//
|
||||||
results := make([]*querypb.QueryChannelInfo, 0)
|
// results := make([]*querypb.QueryChannelInfo, 0)
|
||||||
for _, info := range qn.watchedQueryChannels {
|
// for _, info := range qn.watchedQueryChannels {
|
||||||
results = append(results, proto.Clone(info).(*querypb.QueryChannelInfo))
|
// results = append(results, proto.Clone(info).(*querypb.QueryChannelInfo))
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
return results
|
// return results
|
||||||
}
|
//}
|
||||||
|
|
||||||
func (qn *queryNode) setQueryChannelInfo(info *querypb.QueryChannelInfo) {
|
func (qn *queryNode) setQueryChannelInfo(info *querypb.QueryChannelInfo) {
|
||||||
qn.Lock()
|
qn.Lock()
|
||||||
|
@ -354,26 +359,37 @@ func (qn *queryNode) clearNodeInfo() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) setNodeState(onService bool) {
|
func (qn *queryNode) setState(state nodeState) {
|
||||||
qn.serviceLock.Lock()
|
qn.stateLock.Lock()
|
||||||
defer qn.serviceLock.Unlock()
|
defer qn.stateLock.Unlock()
|
||||||
|
|
||||||
qn.onService = onService
|
qn.state = state
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) isOnService() bool {
|
func (qn *queryNode) getState() nodeState {
|
||||||
qn.serviceLock.RLock()
|
qn.stateLock.RLock()
|
||||||
defer qn.serviceLock.RUnlock()
|
defer qn.stateLock.RUnlock()
|
||||||
|
|
||||||
return qn.onService
|
return qn.state
|
||||||
|
}
|
||||||
|
|
||||||
|
func (qn *queryNode) isOnline() bool {
|
||||||
|
qn.stateLock.RLock()
|
||||||
|
defer qn.stateLock.RUnlock()
|
||||||
|
|
||||||
|
return qn.state == online
|
||||||
|
}
|
||||||
|
|
||||||
|
func (qn *queryNode) isOffline() bool {
|
||||||
|
qn.stateLock.RLock()
|
||||||
|
defer qn.stateLock.RUnlock()
|
||||||
|
|
||||||
|
return qn.state == offline
|
||||||
}
|
}
|
||||||
|
|
||||||
//***********************grpc req*************************//
|
//***********************grpc req*************************//
|
||||||
func (qn *queryNode) watchDmChannels(ctx context.Context, in *querypb.WatchDmChannelsRequest) error {
|
func (qn *queryNode) watchDmChannels(ctx context.Context, in *querypb.WatchDmChannelsRequest) error {
|
||||||
qn.serviceLock.RLock()
|
if !qn.isOnline() {
|
||||||
onService := qn.onService
|
|
||||||
qn.serviceLock.RUnlock()
|
|
||||||
if !onService {
|
|
||||||
return errors.New("WatchDmChannels: queryNode is offline")
|
return errors.New("WatchDmChannels: queryNode is offline")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -397,10 +413,7 @@ func (qn *queryNode) watchDmChannels(ctx context.Context, in *querypb.WatchDmCha
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) addQueryChannel(ctx context.Context, in *querypb.AddQueryChannelRequest) error {
|
func (qn *queryNode) addQueryChannel(ctx context.Context, in *querypb.AddQueryChannelRequest) error {
|
||||||
qn.serviceLock.RLock()
|
if !qn.isOnline() {
|
||||||
onService := qn.onService
|
|
||||||
qn.serviceLock.RUnlock()
|
|
||||||
if !onService {
|
|
||||||
return errors.New("AddQueryChannel: queryNode is offline")
|
return errors.New("AddQueryChannel: queryNode is offline")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -422,10 +435,7 @@ func (qn *queryNode) addQueryChannel(ctx context.Context, in *querypb.AddQueryCh
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) removeQueryChannel(ctx context.Context, in *querypb.RemoveQueryChannelRequest) error {
|
func (qn *queryNode) removeQueryChannel(ctx context.Context, in *querypb.RemoveQueryChannelRequest) error {
|
||||||
qn.serviceLock.RLock()
|
if !qn.isOnline() {
|
||||||
onService := qn.onService
|
|
||||||
qn.serviceLock.RUnlock()
|
|
||||||
if !onService {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -442,10 +452,7 @@ func (qn *queryNode) removeQueryChannel(ctx context.Context, in *querypb.RemoveQ
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) releaseCollection(ctx context.Context, in *querypb.ReleaseCollectionRequest) error {
|
func (qn *queryNode) releaseCollection(ctx context.Context, in *querypb.ReleaseCollectionRequest) error {
|
||||||
qn.serviceLock.RLock()
|
if qn.isOnline() {
|
||||||
onService := qn.onService
|
|
||||||
qn.serviceLock.RUnlock()
|
|
||||||
if !onService {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -466,10 +473,7 @@ func (qn *queryNode) releaseCollection(ctx context.Context, in *querypb.ReleaseC
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) releasePartitions(ctx context.Context, in *querypb.ReleasePartitionsRequest) error {
|
func (qn *queryNode) releasePartitions(ctx context.Context, in *querypb.ReleasePartitionsRequest) error {
|
||||||
qn.serviceLock.RLock()
|
if !qn.isOnline() {
|
||||||
onService := qn.onService
|
|
||||||
qn.serviceLock.RUnlock()
|
|
||||||
if !onService {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -489,11 +493,9 @@ func (qn *queryNode) releasePartitions(ctx context.Context, in *querypb.ReleaseP
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) getSegmentInfo(ctx context.Context, in *querypb.GetSegmentInfoRequest) (*querypb.GetSegmentInfoResponse, error) {
|
func (qn *queryNode) getSegmentInfo(ctx context.Context, in *querypb.GetSegmentInfoRequest) (*querypb.GetSegmentInfoResponse, error) {
|
||||||
qn.serviceLock.RLock()
|
if !qn.isOnline() {
|
||||||
if !qn.onService {
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
qn.serviceLock.RUnlock()
|
|
||||||
|
|
||||||
res, err := qn.client.GetSegmentInfo(ctx, in)
|
res, err := qn.client.GetSegmentInfo(ctx, in)
|
||||||
if err == nil && res.Status.ErrorCode == commonpb.ErrorCode_Success {
|
if err == nil && res.Status.ErrorCode == commonpb.ErrorCode_Success {
|
||||||
|
@ -504,14 +506,12 @@ func (qn *queryNode) getSegmentInfo(ctx context.Context, in *querypb.GetSegmentI
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) getComponentInfo(ctx context.Context) *internalpb.ComponentInfo {
|
func (qn *queryNode) getComponentInfo(ctx context.Context) *internalpb.ComponentInfo {
|
||||||
qn.serviceLock.RLock()
|
if !qn.isOnline() {
|
||||||
if !qn.onService {
|
|
||||||
return &internalpb.ComponentInfo{
|
return &internalpb.ComponentInfo{
|
||||||
NodeID: qn.id,
|
NodeID: qn.id,
|
||||||
StateCode: internalpb.StateCode_Abnormal,
|
StateCode: internalpb.StateCode_Abnormal,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
qn.serviceLock.RUnlock()
|
|
||||||
|
|
||||||
res, err := qn.client.GetComponentStates(ctx)
|
res, err := qn.client.GetComponentStates(ctx)
|
||||||
if err != nil || res.Status.ErrorCode != commonpb.ErrorCode_Success {
|
if err != nil || res.Status.ErrorCode != commonpb.ErrorCode_Success {
|
||||||
|
@ -525,20 +525,15 @@ func (qn *queryNode) getComponentInfo(ctx context.Context) *internalpb.Component
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) getMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
|
func (qn *queryNode) getMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
|
||||||
qn.serviceLock.RLock()
|
if !qn.isOnline() {
|
||||||
if !qn.onService {
|
|
||||||
return nil, errQueryNodeIsNotOnService(qn.id)
|
return nil, errQueryNodeIsNotOnService(qn.id)
|
||||||
}
|
}
|
||||||
qn.serviceLock.RUnlock()
|
|
||||||
|
|
||||||
return qn.client.GetMetrics(ctx, in)
|
return qn.client.GetMetrics(ctx, in)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) loadSegments(ctx context.Context, in *querypb.LoadSegmentsRequest) error {
|
func (qn *queryNode) loadSegments(ctx context.Context, in *querypb.LoadSegmentsRequest) error {
|
||||||
qn.serviceLock.RLock()
|
if !qn.isOnline() {
|
||||||
onService := qn.onService
|
|
||||||
qn.serviceLock.RUnlock()
|
|
||||||
if !onService {
|
|
||||||
return errors.New("LoadSegments: queryNode is offline")
|
return errors.New("LoadSegments: queryNode is offline")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -564,10 +559,7 @@ func (qn *queryNode) loadSegments(ctx context.Context, in *querypb.LoadSegmentsR
|
||||||
}
|
}
|
||||||
|
|
||||||
func (qn *queryNode) releaseSegments(ctx context.Context, in *querypb.ReleaseSegmentsRequest) error {
|
func (qn *queryNode) releaseSegments(ctx context.Context, in *querypb.ReleaseSegmentsRequest) error {
|
||||||
qn.serviceLock.RLock()
|
if !qn.isOnline() {
|
||||||
onService := qn.onService
|
|
||||||
qn.serviceLock.RUnlock()
|
|
||||||
if !onService {
|
|
||||||
return errors.New("ReleaseSegments: queryNode is offline")
|
return errors.New("ReleaseSegments: queryNode is offline")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,82 +18,15 @@ import (
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
||||||
"github.com/milvus-io/milvus/internal/log"
|
"github.com/milvus-io/milvus/internal/log"
|
||||||
"github.com/milvus-io/milvus/internal/msgstream"
|
|
||||||
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
)
|
)
|
||||||
|
|
||||||
func startQueryCoord(ctx context.Context) (*QueryCoord, error) {
|
|
||||||
factory := msgstream.NewPmsFactory()
|
|
||||||
|
|
||||||
coord, err := NewQueryCoordTest(ctx, factory)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
rootCoord := newRootCoordMock()
|
|
||||||
rootCoord.createCollection(defaultCollectionID)
|
|
||||||
rootCoord.createPartition(defaultCollectionID, defaultPartitionID)
|
|
||||||
|
|
||||||
dataCoord, err := newDataCoordMock(ctx)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
coord.SetRootCoord(rootCoord)
|
|
||||||
coord.SetDataCoord(dataCoord)
|
|
||||||
|
|
||||||
err = coord.Register()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
err = coord.Init()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
err = coord.Start()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return coord, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func startUnHealthyQueryCoord(ctx context.Context) (*QueryCoord, error) {
|
|
||||||
factory := msgstream.NewPmsFactory()
|
|
||||||
|
|
||||||
coord, err := NewQueryCoordTest(ctx, factory)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
rootCoord := newRootCoordMock()
|
|
||||||
rootCoord.createCollection(defaultCollectionID)
|
|
||||||
rootCoord.createPartition(defaultCollectionID, defaultPartitionID)
|
|
||||||
|
|
||||||
dataCoord, err := newDataCoordMock(ctx)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
coord.SetRootCoord(rootCoord)
|
|
||||||
coord.SetDataCoord(dataCoord)
|
|
||||||
|
|
||||||
err = coord.Register()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
err = coord.Init()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return coord, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
//func waitQueryNodeOnline(cluster *queryNodeCluster, nodeID int64)
|
//func waitQueryNodeOnline(cluster *queryNodeCluster, nodeID int64)
|
||||||
|
|
||||||
func waitAllQueryNodeOffline(cluster *queryNodeCluster, nodes map[int64]Node) bool {
|
func waitAllQueryNodeOffline(cluster Cluster, nodes map[int64]Node) bool {
|
||||||
reDoCount := 20
|
reDoCount := 20
|
||||||
for {
|
for {
|
||||||
if reDoCount <= 0 {
|
if reDoCount <= 0 {
|
||||||
|
@ -117,6 +50,7 @@ func waitAllQueryNodeOffline(cluster *queryNodeCluster, nodes map[int64]Node) bo
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestQueryNode_MultiNode_stop(t *testing.T) {
|
func TestQueryNode_MultiNode_stop(t *testing.T) {
|
||||||
|
refreshParams()
|
||||||
baseCtx := context.Background()
|
baseCtx := context.Background()
|
||||||
|
|
||||||
queryCoord, err := startQueryCoord(baseCtx)
|
queryCoord, err := startQueryCoord(baseCtx)
|
||||||
|
@ -147,7 +81,7 @@ func TestQueryNode_MultiNode_stop(t *testing.T) {
|
||||||
})
|
})
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
time.Sleep(2 * time.Second)
|
time.Sleep(2 * time.Second)
|
||||||
nodes, err := queryCoord.cluster.onServiceNodes()
|
nodes, err := queryCoord.cluster.onlineNodes()
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
queryNode5.stop()
|
queryNode5.stop()
|
||||||
|
|
||||||
|
@ -157,6 +91,7 @@ func TestQueryNode_MultiNode_stop(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestQueryNode_MultiNode_reStart(t *testing.T) {
|
func TestQueryNode_MultiNode_reStart(t *testing.T) {
|
||||||
|
refreshParams()
|
||||||
baseCtx := context.Background()
|
baseCtx := context.Background()
|
||||||
|
|
||||||
queryCoord, err := startQueryCoord(baseCtx)
|
queryCoord, err := startQueryCoord(baseCtx)
|
||||||
|
@ -185,7 +120,7 @@ func TestQueryNode_MultiNode_reStart(t *testing.T) {
|
||||||
CollectionID: defaultCollectionID,
|
CollectionID: defaultCollectionID,
|
||||||
})
|
})
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
nodes, err := queryCoord.cluster.onServiceNodes()
|
nodes, err := queryCoord.cluster.onlineNodes()
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
queryNode3.stop()
|
queryNode3.stop()
|
||||||
|
|
||||||
|
@ -197,3 +132,25 @@ func TestQueryNode_MultiNode_reStart(t *testing.T) {
|
||||||
func TestQueryNode_getMetrics(t *testing.T) {
|
func TestQueryNode_getMetrics(t *testing.T) {
|
||||||
log.Info("TestQueryNode_getMetrics, todo")
|
log.Info("TestQueryNode_getMetrics, todo")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNewQueryNode(t *testing.T) {
|
||||||
|
refreshParams()
|
||||||
|
baseCtx, cancel := context.WithCancel(context.Background())
|
||||||
|
kv, err := etcdkv.NewEtcdKV(Params.EtcdEndpoints, Params.MetaRootPath)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
queryNode1, err := startQueryNodeServer(baseCtx)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
addr := queryNode1.session.Address
|
||||||
|
nodeID := queryNode1.queryNodeID
|
||||||
|
node, err := newQueryNode(baseCtx, addr, nodeID, kv)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
err = node.start()
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
cancel()
|
||||||
|
node.stop()
|
||||||
|
queryNode1.stop()
|
||||||
|
}
|
||||||
|
|
|
@ -134,7 +134,7 @@ type LoadCollectionTask struct {
|
||||||
*querypb.LoadCollectionRequest
|
*querypb.LoadCollectionRequest
|
||||||
rootCoord types.RootCoord
|
rootCoord types.RootCoord
|
||||||
dataCoord types.DataCoord
|
dataCoord types.DataCoord
|
||||||
cluster *queryNodeCluster
|
cluster Cluster
|
||||||
meta Meta
|
meta Meta
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -323,7 +323,7 @@ func (lct *LoadCollectionTask) PostExecute(ctx context.Context) error {
|
||||||
}
|
}
|
||||||
if lct.result.ErrorCode != commonpb.ErrorCode_Success {
|
if lct.result.ErrorCode != commonpb.ErrorCode_Success {
|
||||||
lct.childTasks = make([]task, 0)
|
lct.childTasks = make([]task, 0)
|
||||||
nodes, err := lct.cluster.onServiceNodes()
|
nodes, err := lct.cluster.onlineNodes()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug(err.Error())
|
log.Debug(err.Error())
|
||||||
}
|
}
|
||||||
|
@ -362,7 +362,7 @@ func (lct *LoadCollectionTask) PostExecute(ctx context.Context) error {
|
||||||
type ReleaseCollectionTask struct {
|
type ReleaseCollectionTask struct {
|
||||||
BaseTask
|
BaseTask
|
||||||
*querypb.ReleaseCollectionRequest
|
*querypb.ReleaseCollectionRequest
|
||||||
cluster *queryNodeCluster
|
cluster Cluster
|
||||||
meta Meta
|
meta Meta
|
||||||
rootCoord types.RootCoord
|
rootCoord types.RootCoord
|
||||||
}
|
}
|
||||||
|
@ -427,7 +427,7 @@ func (rct *ReleaseCollectionTask) Execute(ctx context.Context) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
nodes, err := rct.cluster.onServiceNodes()
|
nodes, err := rct.cluster.onlineNodes()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug(err.Error())
|
log.Debug(err.Error())
|
||||||
}
|
}
|
||||||
|
@ -477,7 +477,7 @@ type LoadPartitionTask struct {
|
||||||
BaseTask
|
BaseTask
|
||||||
*querypb.LoadPartitionsRequest
|
*querypb.LoadPartitionsRequest
|
||||||
dataCoord types.DataCoord
|
dataCoord types.DataCoord
|
||||||
cluster *queryNodeCluster
|
cluster Cluster
|
||||||
meta Meta
|
meta Meta
|
||||||
addCol bool
|
addCol bool
|
||||||
}
|
}
|
||||||
|
@ -606,7 +606,7 @@ func (lpt *LoadPartitionTask) PostExecute(ctx context.Context) error {
|
||||||
if lpt.result.ErrorCode != commonpb.ErrorCode_Success {
|
if lpt.result.ErrorCode != commonpb.ErrorCode_Success {
|
||||||
lpt.childTasks = make([]task, 0)
|
lpt.childTasks = make([]task, 0)
|
||||||
if lpt.addCol {
|
if lpt.addCol {
|
||||||
nodes, err := lpt.cluster.onServiceNodes()
|
nodes, err := lpt.cluster.onlineNodes()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug(err.Error())
|
log.Debug(err.Error())
|
||||||
}
|
}
|
||||||
|
@ -635,7 +635,7 @@ func (lpt *LoadPartitionTask) PostExecute(ctx context.Context) error {
|
||||||
log.Debug("loadPartitionTask: add a releaseCollectionTask to loadPartitionTask's childTask", zap.Any("task", releaseCollectionTask))
|
log.Debug("loadPartitionTask: add a releaseCollectionTask to loadPartitionTask's childTask", zap.Any("task", releaseCollectionTask))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
nodes, err := lpt.cluster.onServiceNodes()
|
nodes, err := lpt.cluster.onlineNodes()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug(err.Error())
|
log.Debug(err.Error())
|
||||||
}
|
}
|
||||||
|
@ -678,7 +678,7 @@ func (lpt *LoadPartitionTask) PostExecute(ctx context.Context) error {
|
||||||
type ReleasePartitionTask struct {
|
type ReleasePartitionTask struct {
|
||||||
BaseTask
|
BaseTask
|
||||||
*querypb.ReleasePartitionsRequest
|
*querypb.ReleasePartitionsRequest
|
||||||
cluster *queryNodeCluster
|
cluster Cluster
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rpt *ReleasePartitionTask) MsgBase() *commonpb.MsgBase {
|
func (rpt *ReleasePartitionTask) MsgBase() *commonpb.MsgBase {
|
||||||
|
@ -717,7 +717,7 @@ func (rpt *ReleasePartitionTask) Execute(ctx context.Context) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
if rpt.NodeID <= 0 {
|
if rpt.NodeID <= 0 {
|
||||||
nodes, err := rpt.cluster.onServiceNodes()
|
nodes, err := rpt.cluster.onlineNodes()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug(err.Error())
|
log.Debug(err.Error())
|
||||||
}
|
}
|
||||||
|
@ -772,7 +772,7 @@ type LoadSegmentTask struct {
|
||||||
BaseTask
|
BaseTask
|
||||||
*querypb.LoadSegmentsRequest
|
*querypb.LoadSegmentsRequest
|
||||||
meta Meta
|
meta Meta
|
||||||
cluster *queryNodeCluster
|
cluster Cluster
|
||||||
}
|
}
|
||||||
|
|
||||||
func (lst *LoadSegmentTask) MsgBase() *commonpb.MsgBase {
|
func (lst *LoadSegmentTask) MsgBase() *commonpb.MsgBase {
|
||||||
|
@ -784,7 +784,7 @@ func (lst *LoadSegmentTask) Marshal() ([]byte, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (lst *LoadSegmentTask) IsValid() bool {
|
func (lst *LoadSegmentTask) IsValid() bool {
|
||||||
onService, err := lst.cluster.isOnService(lst.NodeID)
|
onService, err := lst.cluster.isOnline(lst.NodeID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -909,7 +909,7 @@ func (lst *LoadSegmentTask) Reschedule() ([]task, error) {
|
||||||
type ReleaseSegmentTask struct {
|
type ReleaseSegmentTask struct {
|
||||||
BaseTask
|
BaseTask
|
||||||
*querypb.ReleaseSegmentsRequest
|
*querypb.ReleaseSegmentsRequest
|
||||||
cluster *queryNodeCluster
|
cluster Cluster
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rst *ReleaseSegmentTask) MsgBase() *commonpb.MsgBase {
|
func (rst *ReleaseSegmentTask) MsgBase() *commonpb.MsgBase {
|
||||||
|
@ -921,7 +921,7 @@ func (rst *ReleaseSegmentTask) Marshal() ([]byte, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rst *ReleaseSegmentTask) IsValid() bool {
|
func (rst *ReleaseSegmentTask) IsValid() bool {
|
||||||
onService, err := rst.cluster.isOnService(rst.NodeID)
|
onService, err := rst.cluster.isOnline(rst.NodeID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -979,7 +979,7 @@ type WatchDmChannelTask struct {
|
||||||
BaseTask
|
BaseTask
|
||||||
*querypb.WatchDmChannelsRequest
|
*querypb.WatchDmChannelsRequest
|
||||||
meta Meta
|
meta Meta
|
||||||
cluster *queryNodeCluster
|
cluster Cluster
|
||||||
}
|
}
|
||||||
|
|
||||||
func (wdt *WatchDmChannelTask) MsgBase() *commonpb.MsgBase {
|
func (wdt *WatchDmChannelTask) MsgBase() *commonpb.MsgBase {
|
||||||
|
@ -991,7 +991,7 @@ func (wdt *WatchDmChannelTask) Marshal() ([]byte, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (wdt *WatchDmChannelTask) IsValid() bool {
|
func (wdt *WatchDmChannelTask) IsValid() bool {
|
||||||
onService, err := wdt.cluster.isOnService(wdt.NodeID)
|
onService, err := wdt.cluster.isOnline(wdt.NodeID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -1120,7 +1120,7 @@ func (wdt *WatchDmChannelTask) Reschedule() ([]task, error) {
|
||||||
type WatchQueryChannelTask struct {
|
type WatchQueryChannelTask struct {
|
||||||
BaseTask
|
BaseTask
|
||||||
*querypb.AddQueryChannelRequest
|
*querypb.AddQueryChannelRequest
|
||||||
cluster *queryNodeCluster
|
cluster Cluster
|
||||||
}
|
}
|
||||||
|
|
||||||
func (wqt *WatchQueryChannelTask) MsgBase() *commonpb.MsgBase {
|
func (wqt *WatchQueryChannelTask) MsgBase() *commonpb.MsgBase {
|
||||||
|
@ -1132,7 +1132,7 @@ func (wqt *WatchQueryChannelTask) Marshal() ([]byte, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (wqt *WatchQueryChannelTask) IsValid() bool {
|
func (wqt *WatchQueryChannelTask) IsValid() bool {
|
||||||
onService, err := wqt.cluster.isOnService(wqt.NodeID)
|
onService, err := wqt.cluster.isOnline(wqt.NodeID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -1201,7 +1201,7 @@ type LoadBalanceTask struct {
|
||||||
*querypb.LoadBalanceRequest
|
*querypb.LoadBalanceRequest
|
||||||
rootCoord types.RootCoord
|
rootCoord types.RootCoord
|
||||||
dataCoord types.DataCoord
|
dataCoord types.DataCoord
|
||||||
cluster *queryNodeCluster
|
cluster Cluster
|
||||||
meta Meta
|
meta Meta
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1379,12 +1379,12 @@ func (lbt *LoadBalanceTask) PostExecute(context.Context) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func shuffleChannelsToQueryNode(dmChannels []string, cluster *queryNodeCluster) []int64 {
|
func shuffleChannelsToQueryNode(dmChannels []string, cluster Cluster) []int64 {
|
||||||
maxNumChannels := 0
|
maxNumChannels := 0
|
||||||
nodes := make(map[int64]Node)
|
nodes := make(map[int64]Node)
|
||||||
var err error
|
var err error
|
||||||
for {
|
for {
|
||||||
nodes, err = cluster.onServiceNodes()
|
nodes, err = cluster.onlineNodes()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug(err.Error())
|
log.Debug(err.Error())
|
||||||
time.Sleep(1 * time.Second)
|
time.Sleep(1 * time.Second)
|
||||||
|
@ -1435,12 +1435,12 @@ func shuffleChannelsToQueryNode(dmChannels []string, cluster *queryNodeCluster)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func shuffleSegmentsToQueryNode(segmentIDs []UniqueID, cluster *queryNodeCluster) []int64 {
|
func shuffleSegmentsToQueryNode(segmentIDs []UniqueID, cluster Cluster) []int64 {
|
||||||
maxNumSegments := 0
|
maxNumSegments := 0
|
||||||
nodes := make(map[int64]Node)
|
nodes := make(map[int64]Node)
|
||||||
var err error
|
var err error
|
||||||
for {
|
for {
|
||||||
nodes, err = cluster.onServiceNodes()
|
nodes, err = cluster.onlineNodes()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug(err.Error())
|
log.Debug(err.Error())
|
||||||
time.Sleep(1 * time.Second)
|
time.Sleep(1 * time.Second)
|
||||||
|
@ -1526,7 +1526,7 @@ func assignInternalTask(ctx context.Context,
|
||||||
collectionID UniqueID,
|
collectionID UniqueID,
|
||||||
parentTask task,
|
parentTask task,
|
||||||
meta Meta,
|
meta Meta,
|
||||||
cluster *queryNodeCluster,
|
cluster Cluster,
|
||||||
loadSegmentRequests []*querypb.LoadSegmentsRequest,
|
loadSegmentRequests []*querypb.LoadSegmentsRequest,
|
||||||
watchDmChannelRequests []*querypb.WatchDmChannelsRequest) {
|
watchDmChannelRequests []*querypb.WatchDmChannelsRequest) {
|
||||||
|
|
||||||
|
|
|
@ -122,7 +122,7 @@ type TaskScheduler struct {
|
||||||
triggerTaskQueue *TaskQueue
|
triggerTaskQueue *TaskQueue
|
||||||
activateTaskChan chan task
|
activateTaskChan chan task
|
||||||
meta Meta
|
meta Meta
|
||||||
cluster *queryNodeCluster
|
cluster Cluster
|
||||||
taskIDAllocator func() (UniqueID, error)
|
taskIDAllocator func() (UniqueID, error)
|
||||||
client *etcdkv.EtcdKV
|
client *etcdkv.EtcdKV
|
||||||
|
|
||||||
|
@ -134,7 +134,7 @@ type TaskScheduler struct {
|
||||||
cancel context.CancelFunc
|
cancel context.CancelFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewTaskScheduler(ctx context.Context, meta Meta, cluster *queryNodeCluster, kv *etcdkv.EtcdKV, rootCoord types.RootCoord, dataCoord types.DataCoord) (*TaskScheduler, error) {
|
func NewTaskScheduler(ctx context.Context, meta Meta, cluster Cluster, kv *etcdkv.EtcdKV, rootCoord types.RootCoord, dataCoord types.DataCoord) (*TaskScheduler, error) {
|
||||||
ctx1, cancel := context.WithCancel(ctx)
|
ctx1, cancel := context.WithCancel(ctx)
|
||||||
taskChan := make(chan task, 1024)
|
taskChan := make(chan task, 1024)
|
||||||
s := &TaskScheduler{
|
s := &TaskScheduler{
|
||||||
|
|
|
@ -1,3 +1,13 @@
|
||||||
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||||
|
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||||
package querycoord
|
package querycoord
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -7,17 +17,18 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
|
||||||
"github.com/milvus-io/milvus/internal/log"
|
"github.com/milvus-io/milvus/internal/log"
|
||||||
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
"github.com/milvus-io/milvus/internal/proto/commonpb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/querypb"
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type testTask struct {
|
type testTask struct {
|
||||||
BaseTask
|
BaseTask
|
||||||
baseMsg *commonpb.MsgBase
|
baseMsg *commonpb.MsgBase
|
||||||
cluster *queryNodeCluster
|
cluster Cluster
|
||||||
meta Meta
|
meta Meta
|
||||||
nodeID int64
|
nodeID int64
|
||||||
}
|
}
|
||||||
|
@ -108,6 +119,7 @@ func (tt *testTask) PostExecute(ctx context.Context) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWatchQueryChannel_ClearEtcdInfoAfterAssignedNodeDown(t *testing.T) {
|
func TestWatchQueryChannel_ClearEtcdInfoAfterAssignedNodeDown(t *testing.T) {
|
||||||
|
refreshParams()
|
||||||
baseCtx := context.Background()
|
baseCtx := context.Background()
|
||||||
queryCoord, err := startQueryCoord(baseCtx)
|
queryCoord, err := startQueryCoord(baseCtx)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
@ -117,14 +129,12 @@ func TestWatchQueryChannel_ClearEtcdInfoAfterAssignedNodeDown(t *testing.T) {
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
queryNode.addQueryChannels = returnFailedResult
|
queryNode.addQueryChannels = returnFailedResult
|
||||||
|
|
||||||
time.Sleep(time.Second)
|
nodeID := queryNode.queryNodeID
|
||||||
nodes, err := queryCoord.cluster.onServiceNodes()
|
for {
|
||||||
assert.Nil(t, err)
|
_, err = queryCoord.cluster.getNodeByID(nodeID)
|
||||||
assert.Equal(t, len(nodes), 1)
|
if err == nil {
|
||||||
var nodeID int64
|
break
|
||||||
for id := range nodes {
|
}
|
||||||
nodeID = id
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
testTask := &testTask{
|
testTask := &testTask{
|
||||||
BaseTask: BaseTask{
|
BaseTask: BaseTask{
|
||||||
|
@ -142,16 +152,16 @@ func TestWatchQueryChannel_ClearEtcdInfoAfterAssignedNodeDown(t *testing.T) {
|
||||||
queryCoord.scheduler.Enqueue([]task{testTask})
|
queryCoord.scheduler.Enqueue([]task{testTask})
|
||||||
|
|
||||||
time.Sleep(time.Second)
|
time.Sleep(time.Second)
|
||||||
queryNode.stop()
|
queryCoord.cluster.stopNode(nodeID)
|
||||||
|
for {
|
||||||
allNodeOffline := waitAllQueryNodeOffline(queryCoord.cluster, nodes)
|
newActiveTaskIDKeys, _, err := queryCoord.scheduler.client.LoadWithPrefix(activeTaskPrefix)
|
||||||
assert.Equal(t, allNodeOffline, true)
|
assert.Nil(t, err)
|
||||||
|
if len(newActiveTaskIDKeys) == len(activeTaskIDKeys) {
|
||||||
time.Sleep(time.Second)
|
break
|
||||||
newActiveTaskIDKeys, _, err := queryCoord.scheduler.client.LoadWithPrefix(activeTaskPrefix)
|
}
|
||||||
assert.Nil(t, err)
|
}
|
||||||
assert.Equal(t, len(newActiveTaskIDKeys), len(activeTaskIDKeys))
|
|
||||||
queryCoord.Stop()
|
queryCoord.Stop()
|
||||||
|
queryNode.stop()
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestUnMarshalTask(t *testing.T) {
|
func TestUnMarshalTask(t *testing.T) {
|
||||||
|
|
|
@ -1,3 +1,13 @@
|
||||||
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||||
|
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
||||||
package querycoord
|
package querycoord
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -11,6 +21,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestTriggerTask(t *testing.T) {
|
func TestTriggerTask(t *testing.T) {
|
||||||
|
refreshParams()
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
queryCoord, err := startQueryCoord(ctx)
|
queryCoord, err := startQueryCoord(ctx)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
Loading…
Reference in New Issue