milvus/internal/querycoordv2/meta/resource_manager_test.go

1008 lines
36 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package meta
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/rgpb"
"github.com/milvus-io/milvus/internal/json"
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
"github.com/milvus-io/milvus/internal/kv/mocks"
"github.com/milvus-io/milvus/internal/metastore/kv/querycoord"
"github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/querycoordv2/session"
"github.com/milvus-io/milvus/pkg/v2/kv"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/util/etcd"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
"github.com/milvus-io/milvus/pkg/v2/util/metricsinfo"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)
type ResourceManagerSuite struct {
suite.Suite
kv kv.MetaKv
manager *ResourceManager
ctx context.Context
}
func (suite *ResourceManagerSuite) SetupSuite() {
paramtable.Init()
}
func (suite *ResourceManagerSuite) SetupTest() {
config := params.GenerateEtcdConfig()
cli, err := etcd.GetEtcdClient(
config.UseEmbedEtcd.GetAsBool(),
config.EtcdUseSSL.GetAsBool(),
config.Endpoints.GetAsStrings(),
config.EtcdTLSCert.GetValue(),
config.EtcdTLSKey.GetValue(),
config.EtcdTLSCACert.GetValue(),
config.EtcdTLSMinVersion.GetValue())
suite.Require().NoError(err)
suite.kv = etcdkv.NewEtcdKV(cli, config.MetaRootPath.GetValue())
store := querycoord.NewCatalog(suite.kv)
suite.manager = NewResourceManager(store, session.NewNodeManager())
suite.ctx = context.Background()
}
func (suite *ResourceManagerSuite) TearDownSuite() {
suite.kv.Close()
}
func TestResourceManager(t *testing.T) {
suite.Run(t, new(ResourceManagerSuite))
}
func (suite *ResourceManagerSuite) TestValidateConfiguration() {
ctx := suite.ctx
err := suite.manager.validateResourceGroupConfig("rg1", newResourceGroupConfig(0, 0))
suite.NoError(err)
err = suite.manager.validateResourceGroupConfig("rg1", &rgpb.ResourceGroupConfig{})
suite.ErrorIs(err, merr.ErrResourceGroupIllegalConfig)
err = suite.manager.validateResourceGroupConfig("rg1", newResourceGroupConfig(-1, 2))
suite.ErrorIs(err, merr.ErrResourceGroupIllegalConfig)
err = suite.manager.validateResourceGroupConfig("rg1", newResourceGroupConfig(2, -1))
suite.ErrorIs(err, merr.ErrResourceGroupIllegalConfig)
err = suite.manager.validateResourceGroupConfig("rg1", newResourceGroupConfig(3, 2))
suite.ErrorIs(err, merr.ErrResourceGroupIllegalConfig)
cfg := newResourceGroupConfig(0, 0)
cfg.TransferFrom = []*rgpb.ResourceGroupTransfer{{ResourceGroup: "rg1"}}
err = suite.manager.validateResourceGroupConfig("rg1", cfg)
suite.ErrorIs(err, merr.ErrResourceGroupIllegalConfig)
cfg = newResourceGroupConfig(0, 0)
cfg.TransferFrom = []*rgpb.ResourceGroupTransfer{{ResourceGroup: "rg2"}}
err = suite.manager.validateResourceGroupConfig("rg1", cfg)
suite.ErrorIs(err, merr.ErrResourceGroupIllegalConfig)
cfg = newResourceGroupConfig(0, 0)
cfg.TransferTo = []*rgpb.ResourceGroupTransfer{{ResourceGroup: "rg1"}}
err = suite.manager.validateResourceGroupConfig("rg1", cfg)
suite.ErrorIs(err, merr.ErrResourceGroupIllegalConfig)
cfg = newResourceGroupConfig(0, 0)
cfg.TransferTo = []*rgpb.ResourceGroupTransfer{{ResourceGroup: "rg2"}}
err = suite.manager.validateResourceGroupConfig("rg1", cfg)
suite.ErrorIs(err, merr.ErrResourceGroupIllegalConfig)
err = suite.manager.AddResourceGroup(ctx, "rg2", newResourceGroupConfig(0, 0))
suite.NoError(err)
err = suite.manager.RemoveResourceGroup(ctx, "rg2")
suite.NoError(err)
}
func (suite *ResourceManagerSuite) TestValidateDelete() {
ctx := suite.ctx
// Non empty resource group can not be removed.
err := suite.manager.AddResourceGroup(ctx, "rg1", newResourceGroupConfig(1, 1))
suite.NoError(err)
err = suite.manager.validateResourceGroupIsDeletable(DefaultResourceGroupName)
suite.ErrorIs(err, merr.ErrParameterInvalid)
err = suite.manager.validateResourceGroupIsDeletable("rg1")
suite.ErrorIs(err, merr.ErrParameterInvalid)
cfg := newResourceGroupConfig(0, 0)
cfg.TransferFrom = []*rgpb.ResourceGroupTransfer{{ResourceGroup: "rg1"}}
suite.manager.AddResourceGroup(ctx, "rg2", cfg)
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg1": newResourceGroupConfig(0, 0),
})
err = suite.manager.validateResourceGroupIsDeletable("rg1")
suite.ErrorIs(err, merr.ErrParameterInvalid)
cfg = newResourceGroupConfig(0, 0)
cfg.TransferTo = []*rgpb.ResourceGroupTransfer{{ResourceGroup: "rg1"}}
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg2": cfg,
})
err = suite.manager.validateResourceGroupIsDeletable("rg1")
suite.ErrorIs(err, merr.ErrParameterInvalid)
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg2": newResourceGroupConfig(0, 0),
})
err = suite.manager.validateResourceGroupIsDeletable("rg1")
suite.NoError(err)
err = suite.manager.RemoveResourceGroup(ctx, "rg1")
suite.NoError(err)
err = suite.manager.RemoveResourceGroup(ctx, "rg2")
suite.NoError(err)
}
func (suite *ResourceManagerSuite) TestManipulateResourceGroup() {
ctx := suite.ctx
// test add rg
err := suite.manager.AddResourceGroup(ctx, "rg1", newResourceGroupConfig(0, 0))
suite.NoError(err)
suite.True(suite.manager.ContainResourceGroup(ctx, "rg1"))
suite.Len(suite.manager.ListResourceGroups(ctx), 2)
// test add duplicate rg but same configuration is ok
err = suite.manager.AddResourceGroup(ctx, "rg1", newResourceGroupConfig(0, 0))
suite.NoError(err)
err = suite.manager.AddResourceGroup(ctx, "rg1", newResourceGroupConfig(1, 1))
suite.Error(err)
// test delete rg
err = suite.manager.RemoveResourceGroup(ctx, "rg1")
suite.NoError(err)
// test delete rg which doesn't exist
err = suite.manager.RemoveResourceGroup(ctx, "rg1")
suite.NoError(err)
// test delete default rg
err = suite.manager.RemoveResourceGroup(ctx, DefaultResourceGroupName)
suite.ErrorIs(err, merr.ErrParameterInvalid)
// test delete a rg not empty.
err = suite.manager.AddResourceGroup(ctx, "rg2", newResourceGroupConfig(1, 1))
suite.NoError(err)
err = suite.manager.RemoveResourceGroup(ctx, "rg2")
suite.ErrorIs(err, merr.ErrParameterInvalid)
// test delete a rg after update
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg2": newResourceGroupConfig(0, 0),
})
err = suite.manager.RemoveResourceGroup(ctx, "rg2")
suite.NoError(err)
// assign a node to rg.
err = suite.manager.AddResourceGroup(ctx, "rg2", newResourceGroupConfig(1, 1))
suite.NoError(err)
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 1,
Address: "localhost",
Hostname: "localhost",
}))
defer suite.manager.nodeMgr.Remove(1)
suite.manager.HandleNodeUp(ctx, 1)
err = suite.manager.RemoveResourceGroup(ctx, "rg2")
suite.ErrorIs(err, merr.ErrParameterInvalid)
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg2": newResourceGroupConfig(0, 0),
})
log.Info("xxxxx")
// RemoveResourceGroup will remove all nodes from the resource group.
err = suite.manager.RemoveResourceGroup(ctx, "rg2")
suite.NoError(err)
}
func (suite *ResourceManagerSuite) TestNodeUpAndDown() {
ctx := suite.ctx
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 1,
Address: "localhost",
Hostname: "localhost",
}))
err := suite.manager.AddResourceGroup(ctx, "rg1", newResourceGroupConfig(1, 1))
suite.NoError(err)
// test add node to rg
suite.manager.HandleNodeUp(ctx, 1)
suite.Equal(1, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
// test add non-exist node to rg
err = suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg1": newResourceGroupConfig(2, 3),
})
suite.NoError(err)
suite.manager.HandleNodeUp(ctx, 2)
suite.Equal(1, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// teardown a non-exist node from rg.
suite.manager.HandleNodeDown(ctx, 2)
suite.Equal(1, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// test add exist node to rg
suite.manager.HandleNodeUp(ctx, 1)
suite.Equal(1, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// teardown a exist node from rg.
suite.manager.HandleNodeDown(ctx, 1)
suite.Zero(suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// teardown a exist node from rg.
suite.manager.HandleNodeDown(ctx, 1)
suite.Zero(suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
suite.manager.HandleNodeUp(ctx, 1)
suite.Equal(1, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
err = suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg1": newResourceGroupConfig(4, 4),
})
suite.NoError(err)
suite.manager.AddResourceGroup(ctx, "rg2", newResourceGroupConfig(1, 1))
suite.NoError(err)
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 11,
Address: "localhost",
Hostname: "localhost",
}))
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 12,
Address: "localhost",
Hostname: "localhost",
}))
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 13,
Address: "localhost",
Hostname: "localhost",
}))
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 14,
Address: "localhost",
Hostname: "localhost",
}))
suite.manager.HandleNodeUp(ctx, 11)
suite.manager.HandleNodeUp(ctx, 12)
suite.manager.HandleNodeUp(ctx, 13)
suite.manager.HandleNodeUp(ctx, 14)
suite.Equal(4, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(1, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
suite.manager.HandleNodeDown(ctx, 11)
suite.manager.HandleNodeDown(ctx, 12)
suite.manager.HandleNodeDown(ctx, 13)
suite.manager.HandleNodeDown(ctx, 14)
suite.Equal(1, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
suite.manager.HandleNodeDown(ctx, 1)
suite.Zero(suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Zero(suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg1": newResourceGroupConfig(20, 30),
"rg2": newResourceGroupConfig(30, 40),
})
for i := 1; i <= 100; i++ {
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: int64(i),
Address: "localhost",
Hostname: "localhost",
}))
suite.manager.HandleNodeUp(ctx, int64(i))
}
suite.Equal(20, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(30, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(50, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// down all nodes
for i := 1; i <= 100; i++ {
suite.manager.HandleNodeDown(ctx, int64(i))
suite.Equal(100-i, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum()+
suite.manager.GetResourceGroup(ctx, "rg2").NodeNum()+
suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
}
// if there are all rgs reach limit, should be fall back to default rg.
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg1": newResourceGroupConfig(0, 0),
"rg2": newResourceGroupConfig(0, 0),
DefaultResourceGroupName: newResourceGroupConfig(0, 0),
})
for i := 1; i <= 100; i++ {
suite.manager.HandleNodeUp(ctx, int64(i))
suite.Equal(i, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
suite.Equal(0, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(0, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
}
}
func (suite *ResourceManagerSuite) TestAutoRecover() {
ctx := suite.ctx
for i := 1; i <= 100; i++ {
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: int64(i),
Address: "localhost",
Hostname: "localhost",
}))
suite.manager.HandleNodeUp(ctx, int64(i))
}
suite.Equal(100, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// Recover 10 nodes from default resource group
suite.manager.AddResourceGroup(ctx, "rg1", newResourceGroupConfig(10, 30))
suite.Zero(suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").MissingNumOfNodes())
suite.Equal(100, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
suite.manager.AutoRecoverResourceGroup(ctx, "rg1")
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(0, suite.manager.GetResourceGroup(ctx, "rg1").MissingNumOfNodes())
suite.Equal(90, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// Recover 20 nodes from default resource group
suite.manager.AddResourceGroup(ctx, "rg2", newResourceGroupConfig(20, 30))
suite.Zero(suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(20, suite.manager.GetResourceGroup(ctx, "rg2").MissingNumOfNodes())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(90, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
suite.manager.AutoRecoverResourceGroup(ctx, "rg2")
suite.Equal(20, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(70, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// Recover 5 redundant nodes from resource group
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg1": newResourceGroupConfig(5, 5),
})
suite.manager.AutoRecoverResourceGroup(ctx, "rg1")
suite.Equal(20, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(5, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(75, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// Recover 10 redundant nodes from resource group 2 to resource group 1 and default resource group.
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg1": newResourceGroupConfig(10, 20),
"rg2": newResourceGroupConfig(5, 10),
})
suite.manager.AutoRecoverResourceGroup(ctx, "rg2")
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(80, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// recover redundant nodes from default resource group
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg1": newResourceGroupConfig(10, 20),
"rg2": newResourceGroupConfig(20, 30),
DefaultResourceGroupName: newResourceGroupConfig(10, 20),
})
suite.manager.AutoRecoverResourceGroup(ctx, "rg1")
suite.manager.AutoRecoverResourceGroup(ctx, "rg2")
suite.manager.AutoRecoverResourceGroup(ctx, DefaultResourceGroupName)
// Even though the default resource group has 20 nodes limits,
// all redundant nodes will be assign to default resource group.
suite.Equal(20, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(30, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(50, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// Test recover missing from high priority resource group by set `from`.
suite.manager.AddResourceGroup(ctx, "rg3", &rgpb.ResourceGroupConfig{
Requests: &rgpb.ResourceGroupLimit{
NodeNum: 15,
},
Limits: &rgpb.ResourceGroupLimit{
NodeNum: 15,
},
TransferFrom: []*rgpb.ResourceGroupTransfer{{
ResourceGroup: "rg1",
}},
})
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
DefaultResourceGroupName: newResourceGroupConfig(30, 40),
})
suite.manager.AutoRecoverResourceGroup(ctx, "rg1")
suite.manager.AutoRecoverResourceGroup(ctx, "rg2")
suite.manager.AutoRecoverResourceGroup(ctx, DefaultResourceGroupName)
suite.manager.AutoRecoverResourceGroup(ctx, "rg3")
// Get 10 from default group for redundant nodes, get 5 from rg1 for rg3 at high priority.
suite.Equal(15, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(30, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(15, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(40, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// Test recover redundant to high priority resource group by set `to`.
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg3": {
Requests: &rgpb.ResourceGroupLimit{
NodeNum: 0,
},
Limits: &rgpb.ResourceGroupLimit{
NodeNum: 0,
},
TransferTo: []*rgpb.ResourceGroupTransfer{{
ResourceGroup: "rg2",
}},
},
"rg1": newResourceGroupConfig(15, 100),
"rg2": newResourceGroupConfig(15, 40),
})
suite.manager.AutoRecoverResourceGroup(ctx, "rg1")
suite.manager.AutoRecoverResourceGroup(ctx, "rg2")
suite.manager.AutoRecoverResourceGroup(ctx, DefaultResourceGroupName)
suite.manager.AutoRecoverResourceGroup(ctx, "rg3")
// Recover rg3 by transfer 10 nodes to rg2 with high priority, 5 to rg1.
suite.Equal(20, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(40, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(0, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(40, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
suite.testTransferNode()
// Test redundant nodes recover to default resource group.
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
DefaultResourceGroupName: newResourceGroupConfig(1, 1),
"rg3": newResourceGroupConfig(0, 0),
"rg2": newResourceGroupConfig(0, 0),
"rg1": newResourceGroupConfig(0, 0),
})
// Even default resource group has 1 node limit,
// all redundant nodes will be assign to default resource group if there's no resource group can hold.
suite.manager.AutoRecoverResourceGroup(ctx, DefaultResourceGroupName)
suite.manager.AutoRecoverResourceGroup(ctx, "rg1")
suite.manager.AutoRecoverResourceGroup(ctx, "rg2")
suite.manager.AutoRecoverResourceGroup(ctx, "rg3")
suite.Equal(0, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(0, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(0, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(100, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// Test redundant recover to missing nodes and missing nodes from redundant nodes.
// Initialize
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
DefaultResourceGroupName: newResourceGroupConfig(0, 0),
"rg3": newResourceGroupConfig(10, 10),
"rg2": newResourceGroupConfig(80, 80),
"rg1": newResourceGroupConfig(10, 10),
})
suite.manager.AutoRecoverResourceGroup(ctx, DefaultResourceGroupName)
suite.manager.AutoRecoverResourceGroup(ctx, "rg1")
suite.manager.AutoRecoverResourceGroup(ctx, "rg2")
suite.manager.AutoRecoverResourceGroup(ctx, "rg3")
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(80, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(0, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
DefaultResourceGroupName: newResourceGroupConfig(0, 5),
"rg3": newResourceGroupConfig(5, 5),
"rg2": newResourceGroupConfig(80, 80),
"rg1": newResourceGroupConfig(20, 30),
})
suite.manager.AutoRecoverResourceGroup(ctx, "rg3") // recover redundant to missing rg.
suite.Equal(15, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(80, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(5, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(0, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
suite.manager.updateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
DefaultResourceGroupName: newResourceGroupConfig(5, 5),
"rg3": newResourceGroupConfig(5, 10),
"rg2": newResourceGroupConfig(80, 80),
"rg1": newResourceGroupConfig(10, 10),
})
suite.manager.AutoRecoverResourceGroup(ctx, DefaultResourceGroupName) // recover missing from redundant rg.
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(80, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(5, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(5, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
}
func (suite *ResourceManagerSuite) testTransferNode() {
ctx := suite.ctx
// Test redundant nodes recover to default resource group.
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
DefaultResourceGroupName: newResourceGroupConfig(40, 40),
"rg3": newResourceGroupConfig(0, 0),
"rg2": newResourceGroupConfig(40, 40),
"rg1": newResourceGroupConfig(20, 20),
})
suite.manager.AutoRecoverResourceGroup(ctx, "rg1")
suite.manager.AutoRecoverResourceGroup(ctx, "rg2")
suite.manager.AutoRecoverResourceGroup(ctx, DefaultResourceGroupName)
suite.manager.AutoRecoverResourceGroup(ctx, "rg3")
suite.Equal(20, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(40, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(0, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(40, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// Test TransferNode.
// param error.
err := suite.manager.TransferNode(ctx, "rg1", "rg1", 1)
suite.Error(err)
err = suite.manager.TransferNode(ctx, "rg1", "rg2", 0)
suite.Error(err)
err = suite.manager.TransferNode(ctx, "rg3", "rg2", 1)
suite.Error(err)
err = suite.manager.TransferNode(ctx, "rg1", "rg10086", 1)
suite.Error(err)
err = suite.manager.TransferNode(ctx, "rg10086", "rg2", 1)
suite.Error(err)
// success
err = suite.manager.TransferNode(ctx, "rg1", "rg3", 5)
suite.NoError(err)
suite.manager.AutoRecoverResourceGroup(ctx, "rg1")
suite.manager.AutoRecoverResourceGroup(ctx, "rg2")
suite.manager.AutoRecoverResourceGroup(ctx, DefaultResourceGroupName)
suite.manager.AutoRecoverResourceGroup(ctx, "rg3")
suite.Equal(15, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(40, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(5, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(40, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
}
func (suite *ResourceManagerSuite) TestIncomingNode() {
ctx := suite.ctx
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 1,
Address: "localhost",
Hostname: "localhost",
}))
suite.manager.incomingNode.Insert(1)
suite.Equal(1, suite.manager.CheckIncomingNodeNum(ctx))
suite.manager.AssignPendingIncomingNode(ctx)
suite.Equal(0, suite.manager.CheckIncomingNodeNum(ctx))
nodes, err := suite.manager.GetNodes(ctx, DefaultResourceGroupName)
suite.NoError(err)
suite.Len(nodes, 1)
}
func (suite *ResourceManagerSuite) TestUnassignFail() {
ctx := suite.ctx
// suite.man
mockKV := mocks.NewMetaKv(suite.T())
mockKV.EXPECT().MultiSave(mock.Anything, mock.Anything).Return(nil).Once()
store := querycoord.NewCatalog(mockKV)
suite.manager = NewResourceManager(store, session.NewNodeManager())
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg1": newResourceGroupConfig(20, 30),
})
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: 1,
Address: "localhost",
Hostname: "localhost",
}))
suite.manager.HandleNodeUp(ctx, 1)
mockKV.EXPECT().MultiSave(mock.Anything, mock.Anything).Return(merr.WrapErrServiceInternal("mocked")).Once()
suite.Panics(func() {
suite.manager.HandleNodeDown(ctx, 1)
})
}
func TestGetResourceGroupsJSON(t *testing.T) {
ctx := context.Background()
nodeManager := session.NewNodeManager()
manager := &ResourceManager{groups: make(map[string]*ResourceGroup)}
rg1 := NewResourceGroup("rg1", newResourceGroupConfig(0, 10), nodeManager)
rg1.nodes = typeutil.NewUniqueSet(1, 2)
rg2 := NewResourceGroup("rg2", newResourceGroupConfig(0, 20), nodeManager)
rg2.nodes = typeutil.NewUniqueSet(3, 4)
manager.groups["rg1"] = rg1
manager.groups["rg2"] = rg2
jsonOutput := manager.GetResourceGroupsJSON(ctx)
var resourceGroups []*metricsinfo.ResourceGroup
err := json.Unmarshal([]byte(jsonOutput), &resourceGroups)
assert.NoError(t, err)
assert.Len(t, resourceGroups, 2)
checkResult := func(rg *metricsinfo.ResourceGroup) {
if rg.Name == "rg1" {
assert.ElementsMatch(t, []int64{1, 2}, rg.Nodes)
} else if rg.Name == "rg2" {
assert.ElementsMatch(t, []int64{3, 4}, rg.Nodes)
} else {
assert.Failf(t, "unexpected resource group name", "unexpected resource group name %s", rg.Name)
}
}
for _, rg := range resourceGroups {
checkResult(rg)
}
}
func (suite *ResourceManagerSuite) TestNodeLabels_NodeAssign() {
ctx := suite.ctx
suite.manager.AddResourceGroup(ctx, "rg1", &rgpb.ResourceGroupConfig{
Requests: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
Limits: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
NodeFilter: &rgpb.ResourceGroupNodeFilter{
NodeLabels: []*commonpb.KeyValuePair{
{
Key: "dc_name",
Value: "label1",
},
},
},
})
suite.manager.AddResourceGroup(ctx, "rg2", &rgpb.ResourceGroupConfig{
Requests: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
Limits: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
NodeFilter: &rgpb.ResourceGroupNodeFilter{
NodeLabels: []*commonpb.KeyValuePair{
{
Key: "dc_name",
Value: "label2",
},
},
},
})
suite.manager.AddResourceGroup(ctx, "rg3", &rgpb.ResourceGroupConfig{
Requests: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
Limits: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
NodeFilter: &rgpb.ResourceGroupNodeFilter{
NodeLabels: []*commonpb.KeyValuePair{
{
Key: "dc_name",
Value: "label3",
},
},
},
})
// test that all query nodes has been marked label1
for i := 1; i <= 30; i++ {
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: int64(i),
Address: "localhost",
Hostname: "localhost",
Labels: map[string]string{
"dc_name": "label1",
},
}))
suite.manager.HandleNodeUp(ctx, int64(i))
}
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(0, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(0, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(20, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// test new querynode with label2
for i := 31; i <= 40; i++ {
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: int64(i),
Address: "localhost",
Hostname: "localhost",
Labels: map[string]string{
"dc_name": "label2",
},
}))
suite.manager.HandleNodeUp(ctx, int64(i))
}
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(0, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(20, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
nodesInRG, _ := suite.manager.GetNodes(ctx, "rg2")
for _, node := range nodesInRG {
suite.Equal("label2", suite.manager.nodeMgr.Get(node).Labels()["dc_name"])
}
// test new querynode with label3
for i := 41; i <= 50; i++ {
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: int64(i),
Address: "localhost",
Hostname: "localhost",
Labels: map[string]string{
"dc_name": "label3",
},
}))
suite.manager.HandleNodeUp(ctx, int64(i))
}
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(20, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
nodesInRG, _ = suite.manager.GetNodes(ctx, "rg3")
for _, node := range nodesInRG {
suite.Equal("label3", suite.manager.nodeMgr.Get(node).Labels()["dc_name"])
}
// test swap rg's label
suite.manager.UpdateResourceGroups(ctx, map[string]*rgpb.ResourceGroupConfig{
"rg1": {
Requests: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
Limits: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
NodeFilter: &rgpb.ResourceGroupNodeFilter{
NodeLabels: []*commonpb.KeyValuePair{
{
Key: "dc_name",
Value: "label2",
},
},
},
},
"rg2": {
Requests: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
Limits: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
NodeFilter: &rgpb.ResourceGroupNodeFilter{
NodeLabels: []*commonpb.KeyValuePair{
{
Key: "dc_name",
Value: "label3",
},
},
},
},
"rg3": {
Requests: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
Limits: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
NodeFilter: &rgpb.ResourceGroupNodeFilter{
NodeLabels: []*commonpb.KeyValuePair{
{
Key: "dc_name",
Value: "label1",
},
},
},
},
})
log.Info("test swap rg's label")
for i := 0; i < 4; i++ {
suite.manager.AutoRecoverResourceGroup(ctx, "rg1")
suite.manager.AutoRecoverResourceGroup(ctx, "rg2")
suite.manager.AutoRecoverResourceGroup(ctx, "rg3")
suite.manager.AutoRecoverResourceGroup(ctx, DefaultResourceGroupName)
}
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(20, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
nodesInRG, _ = suite.manager.GetNodes(ctx, "rg1")
for _, node := range nodesInRG {
suite.Equal("label2", suite.manager.nodeMgr.Get(node).Labels()["dc_name"])
}
nodesInRG, _ = suite.manager.GetNodes(ctx, "rg2")
for _, node := range nodesInRG {
suite.Equal("label3", suite.manager.nodeMgr.Get(node).Labels()["dc_name"])
}
nodesInRG, _ = suite.manager.GetNodes(ctx, "rg3")
for _, node := range nodesInRG {
suite.Equal("label1", suite.manager.nodeMgr.Get(node).Labels()["dc_name"])
}
}
func (suite *ResourceManagerSuite) TestNodeLabels_NodeDown() {
ctx := suite.ctx
suite.manager.AddResourceGroup(ctx, "rg1", &rgpb.ResourceGroupConfig{
Requests: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
Limits: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
NodeFilter: &rgpb.ResourceGroupNodeFilter{
NodeLabels: []*commonpb.KeyValuePair{
{
Key: "dc_name",
Value: "label1",
},
},
},
})
suite.manager.AddResourceGroup(ctx, "rg2", &rgpb.ResourceGroupConfig{
Requests: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
Limits: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
NodeFilter: &rgpb.ResourceGroupNodeFilter{
NodeLabels: []*commonpb.KeyValuePair{
{
Key: "dc_name",
Value: "label2",
},
},
},
})
suite.manager.AddResourceGroup(ctx, "rg3", &rgpb.ResourceGroupConfig{
Requests: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
Limits: &rgpb.ResourceGroupLimit{
NodeNum: 10,
},
NodeFilter: &rgpb.ResourceGroupNodeFilter{
NodeLabels: []*commonpb.KeyValuePair{
{
Key: "dc_name",
Value: "label3",
},
},
},
})
// test that all query nodes has been marked label1
for i := 1; i <= 10; i++ {
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: int64(i),
Address: "localhost",
Hostname: "localhost",
Labels: map[string]string{
"dc_name": "label1",
},
}))
suite.manager.HandleNodeUp(ctx, int64(i))
}
// test new querynode with label2
for i := 31; i <= 40; i++ {
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: int64(i),
Address: "localhost",
Hostname: "localhost",
Labels: map[string]string{
"dc_name": "label2",
},
}))
suite.manager.HandleNodeUp(ctx, int64(i))
}
// test new querynode with label3
for i := 41; i <= 50; i++ {
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: int64(i),
Address: "localhost",
Hostname: "localhost",
Labels: map[string]string{
"dc_name": "label3",
},
}))
suite.manager.HandleNodeUp(ctx, int64(i))
}
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
// test node down with label1
suite.manager.HandleNodeDown(ctx, int64(1))
suite.manager.nodeMgr.Remove(int64(1))
suite.Equal(9, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
// test node up with label2
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: int64(101),
Address: "localhost",
Hostname: "localhost",
Labels: map[string]string{
"dc_name": "label2",
},
}))
suite.manager.HandleNodeUp(ctx, int64(101))
suite.Equal(9, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(1, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
// test node up with label1
suite.manager.nodeMgr.Add(session.NewNodeInfo(session.ImmutableNodeInfo{
NodeID: int64(102),
Address: "localhost",
Hostname: "localhost",
Labels: map[string]string{
"dc_name": "label1",
},
}))
suite.manager.HandleNodeUp(ctx, int64(102))
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg1").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg2").NodeNum())
suite.Equal(10, suite.manager.GetResourceGroup(ctx, "rg3").NodeNum())
suite.Equal(1, suite.manager.GetResourceGroup(ctx, DefaultResourceGroupName).NodeNum())
nodesInRG, _ := suite.manager.GetNodes(ctx, "rg1")
for _, node := range nodesInRG {
suite.Equal("label1", suite.manager.nodeMgr.Get(node).Labels()["dc_name"])
}
suite.manager.AutoRecoverResourceGroup(ctx, "rg1")
suite.manager.AutoRecoverResourceGroup(ctx, "rg2")
suite.manager.AutoRecoverResourceGroup(ctx, "rg3")
suite.manager.AutoRecoverResourceGroup(ctx, DefaultResourceGroupName)
nodesInRG, _ = suite.manager.GetNodes(ctx, DefaultResourceGroupName)
for _, node := range nodesInRG {
suite.Equal("label2", suite.manager.nodeMgr.Get(node).Labels()["dc_name"])
}
}