mirror of https://github.com/milvus-io/milvus.git
Fix channel lost after data node crash (#6545)
If we start up 2 data nodes and one of them crashes. We expect that all channels of crashed node will be allcoated to the alive node. But now we discover that these channels are lost after data node crash. The reason is we pass a NodeInfo with empty channel info. We fix it and improve log print. issue: #6501 Signed-off-by: sunby <bingyi.sun@zilliz.com>pull/6508/head
parent
b46d700f9d
commit
ee38d14f5d
|
@ -212,25 +212,19 @@ func (c *Cluster) handleNodeEvent() {
|
|||
}
|
||||
|
||||
func (c *Cluster) handleEvent(node *NodeInfo) {
|
||||
log.Debug("start handle event", zap.Any("node", node))
|
||||
ctx := node.ctx
|
||||
ch := node.GetEventChannel()
|
||||
var cli types.DataNode
|
||||
var err error
|
||||
version := node.Info.GetVersion()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case event := <-ch:
|
||||
cli = node.GetClient()
|
||||
if cli == nil {
|
||||
cli, err = createClient(ctx, node.info.GetAddress())
|
||||
if err != nil {
|
||||
log.Warn("failed to create client", zap.Any("node", node), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
c.mu.Lock()
|
||||
c.nodes.SetClient(node.info.GetVersion(), cli)
|
||||
c.mu.Unlock()
|
||||
cli, err := c.getOrCreateClient(ctx, version)
|
||||
if err != nil {
|
||||
log.Warn("failed to get client", zap.Int64("nodeID", version), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
switch event.Type {
|
||||
case Watch:
|
||||
|
@ -239,15 +233,15 @@ func (c *Cluster) handleEvent(node *NodeInfo) {
|
|||
log.Warn("request type is not Watch")
|
||||
continue
|
||||
}
|
||||
log.Debug("receive watch event", zap.Any("event", event), zap.Any("node", node))
|
||||
tCtx, cancel := context.WithTimeout(ctx, eventTimeout)
|
||||
resp, err := cli.WatchDmChannels(tCtx, req)
|
||||
cancel()
|
||||
if err = VerifyResponse(resp, err); err != nil {
|
||||
log.Warn("Failed to watch dm channels", zap.String("addr", node.info.GetAddress()))
|
||||
log.Warn("Failed to watch dm channels", zap.String("addr", node.Info.GetAddress()))
|
||||
}
|
||||
c.mu.Lock()
|
||||
c.nodes.SetWatched(node.info.GetVersion(), parseChannelsFromReq(req))
|
||||
node = c.nodes.GetNode(node.info.GetVersion())
|
||||
c.nodes.SetWatched(node.Info.GetVersion(), parseChannelsFromReq(req))
|
||||
c.mu.Unlock()
|
||||
if err = c.saveNode(node); err != nil {
|
||||
log.Warn("failed to save node info", zap.Any("node", node))
|
||||
|
@ -263,15 +257,37 @@ func (c *Cluster) handleEvent(node *NodeInfo) {
|
|||
resp, err := cli.FlushSegments(tCtx, req)
|
||||
cancel()
|
||||
if err = VerifyResponse(resp, err); err != nil {
|
||||
log.Warn("Failed to flush segments", zap.String("addr", node.info.GetAddress()))
|
||||
log.Warn("failed to flush segments", zap.String("addr", node.Info.GetAddress()))
|
||||
}
|
||||
default:
|
||||
log.Warn("Wrong event type", zap.Any("type", event.Type))
|
||||
log.Warn("unknown event type", zap.Any("type", event.Type))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Cluster) getOrCreateClient(ctx context.Context, id UniqueID) (types.DataNode, error) {
|
||||
c.mu.Lock()
|
||||
node := c.nodes.GetNode(id)
|
||||
c.mu.Unlock()
|
||||
if node == nil {
|
||||
return nil, fmt.Errorf("node %d is not alive", id)
|
||||
}
|
||||
cli := node.GetClient()
|
||||
if cli != nil {
|
||||
return cli, nil
|
||||
}
|
||||
var err error
|
||||
cli, err = createClient(ctx, node.Info.GetAddress())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
c.nodes.SetClient(node.Info.GetVersion(), cli)
|
||||
return cli, nil
|
||||
}
|
||||
|
||||
func parseChannelsFromReq(req *datapb.WatchDmChannelsRequest) []string {
|
||||
channels := make([]string, 0, len(req.GetVchannels()))
|
||||
for _, vc := range req.GetVchannels() {
|
||||
|
@ -314,8 +330,8 @@ func (c *Cluster) updateCluster(nodes []*NodeInfo) (newNodes []*NodeInfo, offlin
|
|||
var onCnt, offCnt float64
|
||||
currentOnline := make(map[int64]struct{})
|
||||
for _, n := range nodes {
|
||||
currentOnline[n.info.GetVersion()] = struct{}{}
|
||||
node := c.nodes.GetNode(n.info.GetVersion())
|
||||
currentOnline[n.Info.GetVersion()] = struct{}{}
|
||||
node := c.nodes.GetNode(n.Info.GetVersion())
|
||||
if node == nil {
|
||||
newNodes = append(newNodes, n)
|
||||
}
|
||||
|
@ -324,7 +340,7 @@ func (c *Cluster) updateCluster(nodes []*NodeInfo) (newNodes []*NodeInfo, offlin
|
|||
|
||||
currNodes := c.nodes.GetNodes()
|
||||
for _, node := range currNodes {
|
||||
_, has := currentOnline[node.info.GetVersion()]
|
||||
_, has := currentOnline[node.Info.GetVersion()]
|
||||
if !has {
|
||||
offlines = append(offlines, node)
|
||||
offCnt++
|
||||
|
@ -339,13 +355,13 @@ func (c *Cluster) handleRegister(n *NodeInfo) {
|
|||
c.mu.Lock()
|
||||
cNodes := c.nodes.GetNodes()
|
||||
var nodes []*NodeInfo
|
||||
log.Debug("before register policy applied", zap.Any("n.Channels", n.info.GetChannels()), zap.Any("buffer", c.chanBuffer))
|
||||
log.Debug("before register policy applied", zap.Any("n.Channels", n.Info.GetChannels()), zap.Any("buffer", c.chanBuffer))
|
||||
nodes, c.chanBuffer = c.registerPolicy(cNodes, n, c.chanBuffer)
|
||||
log.Debug("after register policy applied", zap.Any("ret", nodes), zap.Any("buffer", c.chanBuffer))
|
||||
go c.handleEvent(n)
|
||||
c.txnSaveNodesAndBuffer(nodes, c.chanBuffer)
|
||||
for _, node := range nodes {
|
||||
c.nodes.SetNode(node.info.GetVersion(), node)
|
||||
c.nodes.SetNode(node.Info.GetVersion(), node)
|
||||
}
|
||||
c.mu.Unlock()
|
||||
for _, node := range nodes {
|
||||
|
@ -355,27 +371,32 @@ func (c *Cluster) handleRegister(n *NodeInfo) {
|
|||
|
||||
func (c *Cluster) handleUnRegister(n *NodeInfo) {
|
||||
c.mu.Lock()
|
||||
node := c.nodes.GetNode(n.info.GetVersion())
|
||||
node := c.nodes.GetNode(n.Info.GetVersion())
|
||||
if node == nil {
|
||||
c.mu.Unlock()
|
||||
return
|
||||
}
|
||||
node.Dispose()
|
||||
c.nodes.DeleteNode(n.info.GetVersion())
|
||||
// save deleted node to kv
|
||||
deleted := node.Clone(SetChannels(nil))
|
||||
c.saveNode(deleted)
|
||||
c.nodes.DeleteNode(n.Info.GetVersion())
|
||||
|
||||
cNodes := c.nodes.GetNodes()
|
||||
log.Debug("before unregister policy applied", zap.Any("node.Channels", node.info.GetChannels()), zap.Any("buffer", c.chanBuffer))
|
||||
log.Debug("before unregister policy applied", zap.Any("node.Channels", node.Info.GetChannels()), zap.Any("buffer", c.chanBuffer), zap.Any("nodes", cNodes))
|
||||
var rets []*NodeInfo
|
||||
if len(cNodes) == 0 {
|
||||
for _, chStat := range node.info.GetChannels() {
|
||||
for _, chStat := range node.Info.GetChannels() {
|
||||
chStat.State = datapb.ChannelWatchState_Uncomplete
|
||||
c.chanBuffer = append(c.chanBuffer, chStat)
|
||||
}
|
||||
} else {
|
||||
rets = c.unregisterPolicy(cNodes, n)
|
||||
rets = c.unregisterPolicy(cNodes, node)
|
||||
}
|
||||
log.Debug("after unregister policy", zap.Any("rets", rets))
|
||||
c.txnSaveNodesAndBuffer(rets, c.chanBuffer)
|
||||
for _, node := range rets {
|
||||
c.nodes.SetNode(node.info.GetVersion(), node)
|
||||
c.nodes.SetNode(node.Info.GetVersion(), node)
|
||||
}
|
||||
c.mu.Unlock()
|
||||
for _, node := range rets {
|
||||
|
@ -398,7 +419,7 @@ func (c *Cluster) handleWatchChannel(channel string, collectionID UniqueID) {
|
|||
}
|
||||
c.txnSaveNodesAndBuffer(rets, c.chanBuffer)
|
||||
for _, node := range rets {
|
||||
c.nodes.SetNode(node.info.GetVersion(), node)
|
||||
c.nodes.SetNode(node.Info.GetVersion(), node)
|
||||
}
|
||||
c.mu.Unlock()
|
||||
for _, node := range rets {
|
||||
|
@ -422,7 +443,7 @@ func (c *Cluster) handleFlush(segments []*datapb.SegmentInfo) {
|
|||
|
||||
channel2Node := make(map[string]*NodeInfo)
|
||||
for _, node := range dataNodes {
|
||||
for _, chstatus := range node.info.GetChannels() {
|
||||
for _, chstatus := range node.Info.GetChannels() {
|
||||
channel2Node[chstatus.Name] = node
|
||||
}
|
||||
}
|
||||
|
@ -452,15 +473,11 @@ func (c *Cluster) handleFlush(segments []*datapb.SegmentInfo) {
|
|||
}
|
||||
|
||||
func (c *Cluster) watch(n *NodeInfo) {
|
||||
var logMsg string
|
||||
uncompletes := make([]vchannel, 0, len(n.info.Channels))
|
||||
for _, ch := range n.info.GetChannels() {
|
||||
channelNames := make([]string, 0)
|
||||
uncompletes := make([]vchannel, 0, len(n.Info.Channels))
|
||||
for _, ch := range n.Info.GetChannels() {
|
||||
if ch.State == datapb.ChannelWatchState_Uncomplete {
|
||||
if len(uncompletes) == 0 {
|
||||
logMsg += ch.Name
|
||||
} else {
|
||||
logMsg += "," + ch.Name
|
||||
}
|
||||
channelNames = append(channelNames, ch.GetName())
|
||||
uncompletes = append(uncompletes, vchannel{
|
||||
CollectionID: ch.CollectionID,
|
||||
DmlChannel: ch.Name,
|
||||
|
@ -471,7 +488,8 @@ func (c *Cluster) watch(n *NodeInfo) {
|
|||
if len(uncompletes) == 0 {
|
||||
return // all set, just return
|
||||
}
|
||||
log.Debug(logMsg)
|
||||
log.Debug("plan to watch channel", zap.String("node", n.Info.GetAddress()),
|
||||
zap.Int64("version", n.Info.GetVersion()), zap.Strings("channels", channelNames))
|
||||
|
||||
vchanInfos, err := c.posProvider.GetVChanPositions(uncompletes, true)
|
||||
if err != nil {
|
||||
|
@ -489,12 +507,13 @@ func (c *Cluster) watch(n *NodeInfo) {
|
|||
Req: req,
|
||||
}
|
||||
ch := n.GetEventChannel()
|
||||
log.Debug("put watch event to node channel", zap.Any("e", e), zap.Any("n", n.Info))
|
||||
ch <- e
|
||||
}
|
||||
|
||||
func (c *Cluster) saveNode(n *NodeInfo) error {
|
||||
key := fmt.Sprintf("%s%d", clusterPrefix, n.info.GetVersion())
|
||||
value := proto.MarshalTextString(n.info)
|
||||
key := fmt.Sprintf("%s%d", clusterPrefix, n.Info.GetVersion())
|
||||
value := proto.MarshalTextString(n.Info)
|
||||
return c.kv.Save(key, value)
|
||||
}
|
||||
|
||||
|
@ -504,8 +523,8 @@ func (c *Cluster) txnSaveNodesAndBuffer(nodes []*NodeInfo, buffer []*datapb.Chan
|
|||
}
|
||||
data := make(map[string]string)
|
||||
for _, n := range nodes {
|
||||
key := fmt.Sprintf("%s%d", clusterPrefix, n.info.GetVersion())
|
||||
value := proto.MarshalTextString(n.info)
|
||||
key := fmt.Sprintf("%s%d", clusterPrefix, n.Info.GetVersion())
|
||||
value := proto.MarshalTextString(n.Info)
|
||||
data[key] = value
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ type ClusterStore interface {
|
|||
}
|
||||
|
||||
type NodeInfo struct {
|
||||
info *datapb.DataNodeInfo
|
||||
Info *datapb.DataNodeInfo
|
||||
eventCh chan *NodeEvent
|
||||
client types.DataNode
|
||||
ctx context.Context
|
||||
|
@ -46,7 +46,7 @@ type NodeEvent struct {
|
|||
func NewNodeInfo(ctx context.Context, info *datapb.DataNodeInfo) *NodeInfo {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
return &NodeInfo{
|
||||
info: info,
|
||||
Info: info,
|
||||
eventCh: make(chan *NodeEvent, eventChBuffer),
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
|
@ -55,7 +55,7 @@ func NewNodeInfo(ctx context.Context, info *datapb.DataNodeInfo) *NodeInfo {
|
|||
|
||||
func (n *NodeInfo) ShadowClone(opts ...NodeOpt) *NodeInfo {
|
||||
cloned := &NodeInfo{
|
||||
info: n.info,
|
||||
Info: n.Info,
|
||||
eventCh: n.eventCh,
|
||||
client: n.client,
|
||||
ctx: n.ctx,
|
||||
|
@ -68,9 +68,9 @@ func (n *NodeInfo) ShadowClone(opts ...NodeOpt) *NodeInfo {
|
|||
}
|
||||
|
||||
func (n *NodeInfo) Clone(opts ...NodeOpt) *NodeInfo {
|
||||
info := proto.Clone(n.info).(*datapb.DataNodeInfo)
|
||||
info := proto.Clone(n.Info).(*datapb.DataNodeInfo)
|
||||
cloned := &NodeInfo{
|
||||
info: info,
|
||||
Info: info,
|
||||
eventCh: n.eventCh,
|
||||
client: n.client,
|
||||
ctx: n.ctx,
|
||||
|
@ -156,7 +156,7 @@ func SetWatched(channelsName []string) NodeOpt {
|
|||
for _, channelName := range channelsName {
|
||||
channelsMap[channelName] = struct{}{}
|
||||
}
|
||||
for _, ch := range n.info.Channels {
|
||||
for _, ch := range n.Info.Channels {
|
||||
_, ok := channelsMap[ch.GetName()]
|
||||
if !ok {
|
||||
continue
|
||||
|
@ -176,12 +176,12 @@ func SetClient(client types.DataNode) NodeOpt {
|
|||
|
||||
func AddChannels(channels []*datapb.ChannelStatus) NodeOpt {
|
||||
return func(n *NodeInfo) {
|
||||
n.info.Channels = append(n.info.Channels, channels...)
|
||||
n.Info.Channels = append(n.Info.Channels, channels...)
|
||||
}
|
||||
}
|
||||
|
||||
func SetChannels(channels []*datapb.ChannelStatus) NodeOpt {
|
||||
return func(n *NodeInfo) {
|
||||
n.info.Channels = channels
|
||||
n.Info.Channels = channels
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ func (s *SpyClusterStore) DeleteNode(nodeID UniqueID) {
|
|||
func spyWatchPolicy(ch chan interface{}) channelAssignPolicy {
|
||||
return func(cluster []*NodeInfo, channel string, collectionID UniqueID) []*NodeInfo {
|
||||
for _, node := range cluster {
|
||||
for _, c := range node.info.GetChannels() {
|
||||
for _, c := range node.Info.GetChannels() {
|
||||
if c.GetName() == channel && c.GetCollectionID() == collectionID {
|
||||
ch <- struct{}{}
|
||||
return nil
|
||||
|
@ -78,7 +78,7 @@ func TestClusterCreate(t *testing.T) {
|
|||
<-ch
|
||||
dataNodes := cluster.GetNodes()
|
||||
assert.EqualValues(t, 1, len(dataNodes))
|
||||
assert.EqualValues(t, "localhost:8080", dataNodes[0].info.GetAddress())
|
||||
assert.EqualValues(t, "localhost:8080", dataNodes[0].Info.GetAddress())
|
||||
}
|
||||
|
||||
func TestRegister(t *testing.T) {
|
||||
|
@ -105,36 +105,132 @@ func TestRegister(t *testing.T) {
|
|||
<-ch
|
||||
dataNodes := cluster.GetNodes()
|
||||
assert.EqualValues(t, 1, len(dataNodes))
|
||||
assert.EqualValues(t, "localhost:8080", dataNodes[0].info.GetAddress())
|
||||
assert.EqualValues(t, "localhost:8080", dataNodes[0].Info.GetAddress())
|
||||
}
|
||||
|
||||
func TestUnregister(t *testing.T) {
|
||||
unregisterPolicy := newEmptyUnregisterPolicy()
|
||||
ch := make(chan interface{})
|
||||
kv := memkv.NewMemoryKV()
|
||||
spyClusterStore := &SpyClusterStore{
|
||||
NodesInfo: NewNodesInfo(),
|
||||
ch: ch,
|
||||
}
|
||||
cluster, err := NewCluster(context.TODO(), kv, spyClusterStore, dummyPosProvider{}, withUnregistorPolicy(unregisterPolicy))
|
||||
assert.Nil(t, err)
|
||||
defer cluster.Close()
|
||||
addr := "localhost:8080"
|
||||
info := &datapb.DataNodeInfo{
|
||||
Address: addr,
|
||||
Version: 1,
|
||||
Channels: []*datapb.ChannelStatus{},
|
||||
}
|
||||
nodes := []*NodeInfo{NewNodeInfo(context.TODO(), info)}
|
||||
cluster.Startup(nodes)
|
||||
<-ch
|
||||
dataNodes := cluster.GetNodes()
|
||||
assert.EqualValues(t, 1, len(dataNodes))
|
||||
assert.EqualValues(t, "localhost:8080", dataNodes[0].info.GetAddress())
|
||||
cluster.UnRegister(nodes[0])
|
||||
<-ch
|
||||
dataNodes = cluster.GetNodes()
|
||||
assert.EqualValues(t, 0, len(dataNodes))
|
||||
t.Run("remove node after unregister", func(t *testing.T) {
|
||||
unregisterPolicy := newEmptyUnregisterPolicy()
|
||||
ch := make(chan interface{})
|
||||
kv := memkv.NewMemoryKV()
|
||||
spyClusterStore := &SpyClusterStore{
|
||||
NodesInfo: NewNodesInfo(),
|
||||
ch: ch,
|
||||
}
|
||||
cluster, err := NewCluster(context.TODO(), kv, spyClusterStore, dummyPosProvider{}, withUnregistorPolicy(unregisterPolicy))
|
||||
assert.Nil(t, err)
|
||||
defer cluster.Close()
|
||||
addr := "localhost:8080"
|
||||
info := &datapb.DataNodeInfo{
|
||||
Address: addr,
|
||||
Version: 1,
|
||||
Channels: []*datapb.ChannelStatus{},
|
||||
}
|
||||
nodes := []*NodeInfo{NewNodeInfo(context.TODO(), info)}
|
||||
cluster.Startup(nodes)
|
||||
<-ch
|
||||
dataNodes := cluster.GetNodes()
|
||||
assert.EqualValues(t, 1, len(dataNodes))
|
||||
assert.EqualValues(t, "localhost:8080", dataNodes[0].Info.GetAddress())
|
||||
cluster.UnRegister(nodes[0])
|
||||
<-ch
|
||||
dataNodes = cluster.GetNodes()
|
||||
assert.EqualValues(t, 0, len(dataNodes))
|
||||
})
|
||||
|
||||
t.Run("move channels to online nodes after unregister", func(t *testing.T) {
|
||||
ch := make(chan interface{})
|
||||
kv := memkv.NewMemoryKV()
|
||||
spyClusterStore := &SpyClusterStore{
|
||||
NodesInfo: NewNodesInfo(),
|
||||
ch: ch,
|
||||
}
|
||||
cluster, err := NewCluster(context.TODO(), kv, spyClusterStore, dummyPosProvider{})
|
||||
assert.Nil(t, err)
|
||||
defer cluster.Close()
|
||||
ch1 := &datapb.ChannelStatus{
|
||||
Name: "ch_1",
|
||||
State: datapb.ChannelWatchState_Uncomplete,
|
||||
CollectionID: 100,
|
||||
}
|
||||
nodeInfo1 := &datapb.DataNodeInfo{
|
||||
Address: "localhost:8080",
|
||||
Version: 1,
|
||||
Channels: []*datapb.ChannelStatus{ch1},
|
||||
}
|
||||
nodeInfo2 := &datapb.DataNodeInfo{
|
||||
Address: "localhost:8081",
|
||||
Version: 2,
|
||||
Channels: []*datapb.ChannelStatus{},
|
||||
}
|
||||
node1 := NewNodeInfo(context.TODO(), nodeInfo1)
|
||||
node2 := NewNodeInfo(context.TODO(), nodeInfo2)
|
||||
cli1, err := newMockDataNodeClient(1, make(chan interface{}))
|
||||
assert.Nil(t, err)
|
||||
cli2, err := newMockDataNodeClient(2, make(chan interface{}))
|
||||
assert.Nil(t, err)
|
||||
node1.client = cli1
|
||||
node2.client = cli2
|
||||
nodes := []*NodeInfo{node1, node2}
|
||||
cluster.Startup(nodes)
|
||||
<-ch
|
||||
<-ch
|
||||
dataNodes := cluster.GetNodes()
|
||||
assert.EqualValues(t, 2, len(dataNodes))
|
||||
for _, node := range dataNodes {
|
||||
if node.Info.GetVersion() == 1 {
|
||||
cluster.UnRegister(node)
|
||||
<-ch
|
||||
<-ch
|
||||
break
|
||||
}
|
||||
}
|
||||
dataNodes = cluster.GetNodes()
|
||||
assert.EqualValues(t, 1, len(dataNodes))
|
||||
assert.EqualValues(t, 2, dataNodes[0].Info.GetVersion())
|
||||
assert.EqualValues(t, ch1.Name, dataNodes[0].Info.GetChannels()[0].Name)
|
||||
})
|
||||
|
||||
t.Run("remove all channels after unregsiter", func(t *testing.T) {
|
||||
ch := make(chan interface{}, 10)
|
||||
kv := memkv.NewMemoryKV()
|
||||
spyClusterStore := &SpyClusterStore{
|
||||
NodesInfo: NewNodesInfo(),
|
||||
ch: ch,
|
||||
}
|
||||
cluster, err := NewCluster(context.TODO(), kv, spyClusterStore, dummyPosProvider{})
|
||||
assert.Nil(t, err)
|
||||
defer cluster.Close()
|
||||
chstatus := &datapb.ChannelStatus{
|
||||
Name: "ch_1",
|
||||
State: datapb.ChannelWatchState_Uncomplete,
|
||||
CollectionID: 100,
|
||||
}
|
||||
nodeInfo := &datapb.DataNodeInfo{
|
||||
Address: "localhost:8080",
|
||||
Version: 1,
|
||||
Channels: []*datapb.ChannelStatus{chstatus},
|
||||
}
|
||||
node := NewNodeInfo(context.TODO(), nodeInfo)
|
||||
cli, err := newMockDataNodeClient(1, make(chan interface{}))
|
||||
assert.Nil(t, err)
|
||||
node.client = cli
|
||||
cluster.Startup([]*NodeInfo{node})
|
||||
<-ch
|
||||
cluster.UnRegister(node)
|
||||
<-ch
|
||||
spyClusterStore2 := &SpyClusterStore{
|
||||
NodesInfo: NewNodesInfo(),
|
||||
ch: ch,
|
||||
}
|
||||
cluster2, err := NewCluster(context.TODO(), kv, spyClusterStore2, dummyPosProvider{})
|
||||
<-ch
|
||||
assert.Nil(t, err)
|
||||
nodes := cluster2.GetNodes()
|
||||
assert.EqualValues(t, 1, len(nodes))
|
||||
assert.EqualValues(t, 1, nodes[0].Info.GetVersion())
|
||||
assert.EqualValues(t, 0, len(nodes[0].Info.GetChannels()))
|
||||
})
|
||||
}
|
||||
|
||||
func TestWatchIfNeeded(t *testing.T) {
|
||||
|
@ -168,8 +264,8 @@ func TestWatchIfNeeded(t *testing.T) {
|
|||
fmt.Println("222")
|
||||
<-ch
|
||||
dataNodes := cluster.GetNodes()
|
||||
assert.EqualValues(t, 1, len(dataNodes[0].info.GetChannels()))
|
||||
assert.EqualValues(t, chName, dataNodes[0].info.Channels[0].Name)
|
||||
assert.EqualValues(t, 1, len(dataNodes[0].Info.GetChannels()))
|
||||
assert.EqualValues(t, chName, dataNodes[0].Info.Channels[0].Name)
|
||||
cluster.Watch(chName, 0)
|
||||
<-pch
|
||||
}
|
||||
|
|
|
@ -62,15 +62,12 @@ var emptyUnregisterFunc dataNodeUnregisterPolicy = func(cluster []*NodeInfo, ses
|
|||
var randomAssignRegisterFunc dataNodeUnregisterPolicy = func(cluster []*NodeInfo, session *NodeInfo) []*NodeInfo {
|
||||
if len(cluster) == 0 || // no available node
|
||||
session == nil ||
|
||||
len(session.info.GetChannels()) == 0 { // lost node not watching any channels
|
||||
len(session.Info.GetChannels()) == 0 { // lost node not watching any channels
|
||||
return []*NodeInfo{}
|
||||
}
|
||||
|
||||
appliedNodes := make([]*NodeInfo, 0, len(session.info.GetChannels()))
|
||||
channels := session.info.GetChannels()
|
||||
// clear unregistered node's channels
|
||||
node := session.Clone(SetChannels(nil))
|
||||
appliedNodes = append(appliedNodes, node)
|
||||
appliedNodes := make([]*NodeInfo, 0, len(session.Info.GetChannels()))
|
||||
channels := session.Info.GetChannels()
|
||||
|
||||
raResult := make(map[int][]*datapb.ChannelStatus)
|
||||
for _, chanSt := range channels {
|
||||
|
@ -117,7 +114,7 @@ var assignAllFunc channelAssignPolicy = func(cluster []*NodeInfo, channel string
|
|||
ret := make([]*NodeInfo, 0)
|
||||
for _, node := range cluster {
|
||||
has := false
|
||||
for _, ch := range node.info.GetChannels() {
|
||||
for _, ch := range node.Info.GetChannels() {
|
||||
if ch.Name == channel {
|
||||
has = true
|
||||
break
|
||||
|
@ -145,7 +142,7 @@ var balancedAssignFunc channelAssignPolicy = func(cluster []*NodeInfo, channel s
|
|||
}
|
||||
// filter existed channel
|
||||
for _, node := range cluster {
|
||||
for _, c := range node.info.GetChannels() {
|
||||
for _, c := range node.Info.GetChannels() {
|
||||
if c.GetName() == channel && c.GetCollectionID() == collectionID {
|
||||
return nil
|
||||
}
|
||||
|
@ -153,9 +150,9 @@ var balancedAssignFunc channelAssignPolicy = func(cluster []*NodeInfo, channel s
|
|||
}
|
||||
target, min := -1, math.MaxInt32
|
||||
for k, v := range cluster {
|
||||
if len(v.info.GetChannels()) < min {
|
||||
if len(v.Info.GetChannels()) < min {
|
||||
target = k
|
||||
min = len(v.info.GetChannels())
|
||||
min = len(v.Info.GetChannels())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -51,21 +51,21 @@ func TestRandomReassign(t *testing.T) {
|
|||
},
|
||||
}
|
||||
cases := []*NodeInfo{
|
||||
{info: caseInfo1},
|
||||
{info: caseInfo2},
|
||||
{Info: caseInfo1},
|
||||
{Info: caseInfo2},
|
||||
nil,
|
||||
}
|
||||
|
||||
for _, ca := range cases {
|
||||
nodes := p(clusters, ca)
|
||||
if ca == nil || len(ca.info.GetChannels()) == 0 {
|
||||
if ca == nil || len(ca.Info.GetChannels()) == 0 {
|
||||
assert.Equal(t, 0, len(nodes))
|
||||
} else {
|
||||
for _, ch := range ca.info.GetChannels() {
|
||||
for _, ch := range ca.Info.GetChannels() {
|
||||
found := false
|
||||
loop:
|
||||
for _, node := range nodes {
|
||||
for _, nch := range node.info.GetChannels() {
|
||||
for _, nch := range node.Info.GetChannels() {
|
||||
if nch.Name == ch.Name {
|
||||
found = true
|
||||
assert.EqualValues(t, datapb.ChannelWatchState_Uncomplete, nch.State)
|
||||
|
|
Loading…
Reference in New Issue