milvus/internal/datacoord/policy.go

491 lines
14 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"sort"
"strconv"
"time"
"github.com/golang/protobuf/proto"
"github.com/milvus-io/milvus/internal/kv"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/proto/datapb"
"go.uber.org/zap"
"stathat.com/c/consistent"
)
// RegisterPolicy decides the channels mapping after registering the nodeID
type RegisterPolicy func(store ROChannelStore, nodeID int64) ChannelOpSet
// EmptyRegister does nothing
func EmptyRegister(store ROChannelStore, nodeID int64) ChannelOpSet {
return nil
}
// BufferChannelAssignPolicy assigns buffer channels to new registered node
func BufferChannelAssignPolicy(store ROChannelStore, nodeID int64) ChannelOpSet {
info := store.GetBufferChannelInfo()
if info == nil || len(info.Channels) == 0 {
return nil
}
opSet := ChannelOpSet{}
opSet.Delete(info.NodeID, info.Channels)
opSet.Add(nodeID, info.Channels)
return opSet
}
// AvgAssignRegisterPolicy assigns channels with average to new registered node
// Register will not directly delete the node-channel pair. Channel manager will handle channel release.
func AvgAssignRegisterPolicy(store ROChannelStore, nodeID int64) ChannelOpSet {
opSet := BufferChannelAssignPolicy(store, nodeID)
if len(opSet) != 0 {
return opSet
}
// Get a list of available node-channel info.
avaNodeChannel := filterNode(store.GetNodesChannels(), nodeID)
channelNum := 0
for _, info := range avaNodeChannel {
channelNum += len(info.Channels)
}
chPerNode := channelNum / (len(store.GetNodes()) + 1)
if chPerNode == 0 {
return nil
}
// sort in descending order and reallocate
sort.Slice(avaNodeChannel, func(i, j int) bool {
return len(avaNodeChannel[i].Channels) > len(avaNodeChannel[j].Channels)
})
releases := make(map[int64][]*channel)
for i := 0; i < chPerNode; i++ {
// Pick a node with its channel to release.
toRelease := avaNodeChannel[i%len(avaNodeChannel)]
// Pick a channel that will be reassigned to the new node later.
chIdx := i / len(avaNodeChannel)
if chIdx >= len(toRelease.Channels) {
// Node has too few channels, simply skip. No re-picking.
// TODO: Consider re-picking in case assignment is extremely uneven?
continue
}
releases[toRelease.NodeID] = append(releases[toRelease.NodeID], toRelease.Channels[chIdx])
}
opSet = ChannelOpSet{}
// Channels in `releases` are reassigned eventually by channel manager.
for k, v := range releases {
opSet.Add(k, v)
}
return opSet
}
// filterNode filters out node-channel info where node ID == `nodeID`.
func filterNode(infos []*NodeChannelInfo, nodeID int64) []*NodeChannelInfo {
filtered := make([]*NodeChannelInfo, 0)
for _, info := range infos {
if info.NodeID == nodeID {
continue
}
filtered = append(filtered, info)
}
return filtered
}
// ConsistentHashRegisterPolicy use a consistent hash to maintain the mapping
func ConsistentHashRegisterPolicy(hashRing *consistent.Consistent) RegisterPolicy {
return func(store ROChannelStore, nodeID int64) ChannelOpSet {
elems := formatNodeIDs(store.GetNodes())
hashRing.Set(elems)
releases := make(map[int64][]*channel)
// If there are buffer channels, then nodeID is the first node.
opSet := BufferChannelAssignPolicy(store, nodeID)
if len(opSet) != 0 {
return opSet
}
opSet = ChannelOpSet{}
// If there are other nodes, channels on these nodes may be reassigned to
// the new registered node. We should find these channels.
channelsInfo := store.GetNodesChannels()
for _, c := range channelsInfo {
for _, ch := range c.Channels {
idStr, err := hashRing.Get(ch.Name)
if err != nil {
log.Warn("receive error when getting from hashRing",
zap.String("channel", ch.Name), zap.Error(err))
return nil
}
did, err := deformatNodeID(idStr)
if err != nil {
log.Warn("failed to deformat node id", zap.Int64("nodeID", did))
return nil
}
if did != c.NodeID {
releases[c.NodeID] = append(releases[c.NodeID], ch)
}
}
}
// Channels in `releases` are reassigned eventually by channel manager.
for id, channels := range releases {
opSet.Add(id, channels)
}
return opSet
}
}
func formatNodeID(nodeID int64) string {
return strconv.FormatInt(nodeID, 10)
}
func deformatNodeID(node string) (int64, error) {
return strconv.ParseInt(node, 10, 64)
}
// ChannelAssignPolicy assign channels to registered nodes.
type ChannelAssignPolicy func(store ROChannelStore, channels []*channel) ChannelOpSet
// AverageAssignPolicy ensure that the number of channels per nodes is approximately the same
func AverageAssignPolicy(store ROChannelStore, channels []*channel) ChannelOpSet {
filteredChannels := filterChannels(store, channels)
if len(filteredChannels) == 0 {
return nil
}
opSet := ChannelOpSet{}
dataNodesChannels := store.GetNodesChannels()
// If no datanode alive, save channels in buffer
if len(dataNodesChannels) == 0 {
opSet.Add(bufferID, channels)
return opSet
}
// sort and assign
sort.Slice(dataNodesChannels, func(i, j int) bool {
return len(dataNodesChannels[i].Channels) <= len(dataNodesChannels[j].Channels)
})
updates := make(map[int64][]*channel)
for i, channel := range filteredChannels {
n := dataNodesChannels[i%len(dataNodesChannels)].NodeID
updates[n] = append(updates[n], channel)
}
for id, chs := range updates {
opSet.Add(id, chs)
}
return opSet
}
// ConsistentHashChannelAssignPolicy use a consistent hash algorithm to determine channel assignment
func ConsistentHashChannelAssignPolicy(hashRing *consistent.Consistent) ChannelAssignPolicy {
return func(store ROChannelStore, channels []*channel) ChannelOpSet {
hashRing.Set(formatNodeIDs(store.GetNodes()))
filteredChannels := filterChannels(store, channels)
if len(filteredChannels) == 0 {
return nil
}
if len(hashRing.Members()) == 0 {
opSet := ChannelOpSet{}
opSet.Add(bufferID, channels)
return opSet
}
adds := make(map[int64][]*channel)
for _, c := range filteredChannels {
idStr, err := hashRing.Get(c.Name)
if err != nil {
log.Warn("receive error when getting from hashRing",
zap.String("channel", c.Name), zap.Error(err))
return nil
}
did, err := deformatNodeID(idStr)
if err != nil {
log.Warn("failed to deformat node id", zap.Int64("nodeID", did))
return nil
}
adds[did] = append(adds[did], c)
}
if len(adds) == 0 {
return nil
}
opSet := ChannelOpSet{}
for id, chs := range adds {
opSet.Add(id, chs)
}
return opSet
}
}
func filterChannels(store ROChannelStore, channels []*channel) []*channel {
channelsMap := make(map[string]*channel)
for _, c := range channels {
channelsMap[c.Name] = c
}
allChannelsInfo := store.GetChannels()
for _, info := range allChannelsInfo {
for _, c := range info.Channels {
delete(channelsMap, c.Name)
}
}
if len(channelsMap) == 0 {
return nil
}
filtered := make([]*channel, 0, len(channelsMap))
for _, v := range channelsMap {
filtered = append(filtered, v)
}
return filtered
}
// DeregisterPolicy determine the mapping after deregistering the nodeID
type DeregisterPolicy func(store ROChannelStore, nodeID int64) ChannelOpSet
// EmptyDeregisterPolicy do nothing
func EmptyDeregisterPolicy(store ROChannelStore, nodeID int64) ChannelOpSet {
return nil
}
// AvgAssignUnregisteredChannels evenly assign the unregistered channels
func AvgAssignUnregisteredChannels(store ROChannelStore, nodeID int64) ChannelOpSet {
channels := store.GetNodesChannels()
filteredChannels := make([]*NodeChannelInfo, 0, len(channels))
unregisteredChannels := make([]*channel, 0)
opSet := ChannelOpSet{}
for _, c := range channels {
if c.NodeID == nodeID {
opSet.Delete(nodeID, c.Channels)
unregisteredChannels = append(unregisteredChannels, c.Channels...)
continue
}
filteredChannels = append(filteredChannels, c)
}
if len(filteredChannels) == 0 {
opSet.Add(bufferID, unregisteredChannels)
return opSet
}
// sort and assign
sort.Slice(filteredChannels, func(i, j int) bool {
return len(filteredChannels[i].Channels) <= len(filteredChannels[j].Channels)
})
updates := make(map[int64][]*channel)
for i, channel := range unregisteredChannels {
n := filteredChannels[i%len(filteredChannels)].NodeID
updates[n] = append(updates[n], channel)
}
for id, chs := range updates {
opSet.Add(id, chs)
}
return opSet
}
// ConsistentHashDeregisterPolicy return a DeregisterPolicy that uses consistent hash
func ConsistentHashDeregisterPolicy(hashRing *consistent.Consistent) DeregisterPolicy {
return func(store ROChannelStore, nodeID int64) ChannelOpSet {
hashRing.Set(formatNodeIDsWithFilter(store.GetNodes(), nodeID))
channels := store.GetNodesChannels()
opSet := ChannelOpSet{}
var deletedInfo *NodeChannelInfo
for _, cinfo := range channels {
if cinfo.NodeID == nodeID {
deletedInfo = cinfo
break
}
}
if deletedInfo == nil {
log.Warn("failed to find node when applying deregister policy", zap.Int64("nodeID", nodeID))
return nil
}
opSet.Delete(nodeID, deletedInfo.Channels)
// If no members in hash ring, store channels in buffer
if len(hashRing.Members()) == 0 {
opSet.Add(bufferID, deletedInfo.Channels)
return opSet
}
// reassign channels of deleted node
updates := make(map[int64][]*channel)
for _, c := range deletedInfo.Channels {
idStr, err := hashRing.Get(c.Name)
if err != nil {
log.Warn("failed to get channel in hash ring", zap.String("channel", c.Name))
return nil
}
did, err := deformatNodeID(idStr)
if err != nil {
log.Warn("failed to deformat id", zap.String("id", idStr))
}
updates[did] = append(updates[did], c)
}
for id, chs := range updates {
opSet.Add(id, chs)
}
return opSet
}
}
// ChannelReassignPolicy is a policy for reassigning channels
type ChannelReassignPolicy func(store ROChannelStore, reassigns []*NodeChannelInfo) ChannelOpSet
// EmptyReassignPolicy is a dummy reassign policy
func EmptyReassignPolicy(store ROChannelStore, reassigns []*NodeChannelInfo) ChannelOpSet {
return nil
}
// AverageReassignPolicy is a reassigning policy that evenly assign channels
func AverageReassignPolicy(store ROChannelStore, reassigns []*NodeChannelInfo) ChannelOpSet {
channels := store.GetNodesChannels()
filterMap := make(map[int64]struct{})
for _, reassign := range reassigns {
filterMap[reassign.NodeID] = struct{}{}
}
filterChannels := make([]*NodeChannelInfo, 0, len(channels))
for _, c := range channels {
if _, ok := filterMap[c.NodeID]; ok {
continue
}
filterChannels = append(filterChannels, c)
}
if len(filterChannels) == 0 {
// if no node is left, do not reassign
return nil
}
// reassign channels to remaining nodes
i := 0
ret := make([]*ChannelOp, 0)
addUpdates := make(map[int64]*ChannelOp)
for _, reassign := range reassigns {
deleteUpdate := &ChannelOp{
Type: Delete,
Channels: reassign.Channels,
NodeID: reassign.NodeID,
}
ret = append(ret, deleteUpdate)
for _, ch := range reassign.Channels {
targetID := filterChannels[i%len(filterChannels)].NodeID
i++
if _, ok := addUpdates[targetID]; !ok {
addUpdates[targetID] = &ChannelOp{
Type: Add,
NodeID: targetID,
Channels: []*channel{ch},
}
} else {
addUpdates[targetID].Channels = append(addUpdates[targetID].Channels, ch)
}
}
}
for _, update := range addUpdates {
ret = append(ret, update)
}
return ret
}
// ChannelBGChecker check nodes' channels and return the channels needed to be reallocated.
type ChannelBGChecker func(channels []*NodeChannelInfo, ts time.Time) ([]*NodeChannelInfo, error)
// EmptyBgChecker does nothing
func EmptyBgChecker(channels []*NodeChannelInfo, ts time.Time) ([]*NodeChannelInfo, error) {
return nil, nil
}
// BgCheckWithMaxWatchDuration returns a ChannelBGChecker with the maxWatchDuration
func BgCheckWithMaxWatchDuration(kv kv.TxnKV) ChannelBGChecker {
return func(channels []*NodeChannelInfo, ts time.Time) ([]*NodeChannelInfo, error) {
reAllocations := make([]*NodeChannelInfo, 0, len(channels))
for _, ch := range channels {
cinfo := &NodeChannelInfo{
NodeID: ch.NodeID,
Channels: make([]*channel, 0),
}
for _, c := range ch.Channels {
k := buildNodeChannelKey(ch.NodeID, c.Name)
v, err := kv.Load(k)
if err != nil {
return nil, err
}
watchInfo := &datapb.ChannelWatchInfo{}
if err := proto.Unmarshal([]byte(v), watchInfo); err != nil {
return nil, err
}
reviseVChannelInfo(watchInfo.GetVchan())
// if a channel is not watched after maxWatchDuration,
// then we reallocate it to another node
if watchInfo.State == datapb.ChannelWatchState_Complete || watchInfo.State == datapb.ChannelWatchState_WatchSuccess {
continue
}
startTime := time.Unix(watchInfo.StartTs, 0)
d := ts.Sub(startTime)
if d >= maxWatchDuration {
cinfo.Channels = append(cinfo.Channels, c)
}
}
if len(cinfo.Channels) != 0 {
reAllocations = append(reAllocations, cinfo)
}
}
return reAllocations, nil
}
}
func formatNodeIDs(ids []int64) []string {
formatted := make([]string, 0, len(ids))
for _, id := range ids {
formatted = append(formatted, formatNodeID(id))
}
return formatted
}
func formatNodeIDsWithFilter(ids []int64, filter int64) []string {
formatted := make([]string, 0, len(ids))
for _, id := range ids {
if id == filter {
continue
}
formatted = append(formatted, formatNodeID(id))
}
return formatted
}