milvus/tests/python_client/resource_group/test_resource_group.py

945 lines
40 KiB
Python

import pytest
import time
from typing import Union, List
from pymilvus import connections, utility, Collection
from pymilvus.client.constants import DEFAULT_RESOURCE_GROUP
from pymilvus.client.types import ResourceGroupConfig, ResourceGroupInfo
from utils.util_log import test_log as log
from base.client_base import TestcaseBase
from chaos.checker import (InsertChecker,
UpsertChecker,
SearchChecker,
HybridSearchChecker,
QueryChecker,
DeleteChecker,
Op,
ResultAnalyzer
)
from chaos import chaos_commons as cc
from common import common_func as cf
from utils.util_k8s import get_querynode_id_pod_pairs
from common import common_type as ct
from customize.milvus_operator import MilvusOperator
from common.milvus_sys import MilvusSys
from common.common_type import CaseLabel
from chaos.chaos_commons import assert_statistic
from delayed_assert import assert_expectations
namespace = 'chaos-testing'
prefix = "test_rg"
from rich.table import Table
from rich.console import Console
def display_resource_group_info(info: Union[ResourceGroupInfo, List[ResourceGroupInfo]]):
table = Table(title="Resource Group Info")
table.width = 200
table.add_column("Name", style="cyan")
table.add_column("Capacity", style="cyan")
table.add_column("Available Node", style="cyan")
table.add_column("Loaded Replica", style="cyan")
table.add_column("Outgoing Node", style="cyan")
table.add_column("Incoming Node", style="cyan")
table.add_column("Request", style="cyan")
table.add_column("Limit", style="cyan")
table.add_column("Nodes", style="cyan")
if isinstance(info, list):
for i in info:
table.add_row(
i.name,
str(i.capacity),
str(i.num_available_node),
str(i.num_loaded_replica),
str(i.num_outgoing_node),
str(i.num_incoming_node),
str(i.config.requests.node_num),
str(i.config.limits.node_num),
"\n".join([str(node.hostname) for node in i.nodes])
)
else:
table.add_row(
info.name,
str(info.capacity),
str(info.num_available_node),
str(info.num_loaded_replica),
str(info.num_outgoing_node),
str(info.num_incoming_node),
str(info.config.requests.node_num),
str(info.config.limits.node_num),
"\n".join([str(node.hostname) for node in info.nodes])
)
console = Console()
console.width = 300
console.print(table)
def display_segment_distribution_info(collection_name, release_name):
table = Table(title=f"{collection_name} Segment Distribution Info")
table.width = 200
table.add_column("Segment ID", style="cyan")
table.add_column("Collection ID", style="cyan")
table.add_column("Partition ID", style="cyan")
table.add_column("Num Rows", style="cyan")
table.add_column("State", style="cyan")
table.add_column("Node ID", style="cyan")
table.add_column("Node Name", style="cyan")
res = utility.get_query_segment_info(collection_name)
label = f"app.kubernetes.io/instance={release_name}, app.kubernetes.io/component=querynode"
querynode_id_pod_pair = get_querynode_id_pod_pairs("chaos-testing", label)
for r in res:
table.add_row(
str(r.segmentID),
str(r.collectionID),
str(r.partitionID),
str(r.num_rows),
str(r.state),
str(r.nodeIds),
str([querynode_id_pod_pair.get(node_id) for node_id in r.nodeIds])
)
console = Console()
console.width = 300
console.print(table)
def list_all_resource_groups():
rg_names = utility.list_resource_groups()
resource_groups = []
for rg_name in rg_names:
resource_group = utility.describe_resource_group(rg_name)
resource_groups.append(resource_group)
display_resource_group_info(resource_groups)
def _install_milvus(image_tag="master-latest"):
release_name = f"rg-test-{cf.gen_digits_by_length(6)}"
cus_configs = {'spec.mode': 'cluster',
'spec.dependencies.msgStreamType': 'kafka',
'spec.components.image': f'harbor.milvus.io/milvus/milvus:{image_tag}',
'metadata.namespace': namespace,
'metadata.name': release_name,
'spec.components.proxy.serviceType': 'LoadBalancer',
}
milvus_op = MilvusOperator()
log.info(f"install milvus with configs: {cus_configs}")
milvus_op.install(cus_configs)
healthy = milvus_op.wait_for_healthy(release_name, namespace, timeout=1200)
log.info(f"milvus healthy: {healthy}")
if healthy:
endpoint = milvus_op.endpoint(release_name, namespace).split(':')
log.info(f"milvus endpoint: {endpoint}")
host = endpoint[0]
port = endpoint[1]
return release_name, host, port
else:
return release_name, None, None
class TestResourceGroup(TestcaseBase):
def teardown_method(self, method):
log.info(("*" * 35) + " teardown " + ("*" * 35))
log.info("[teardown_method] Start teardown test case %s..." % method.__name__)
milvus_op = MilvusOperator()
milvus_op.uninstall(self.release_name, namespace)
connections.disconnect("default")
connections.remove_connection("default")
@pytest.mark.tags(CaseLabel.L3)
def test_resource_group_scale_up(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
# create rg1 with request node_num=4, limit node_num=6
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 4},
limits={"node_num": 6},
))
# scale up rg1 to 8 nodes one by one
for replicas in range(1, 8):
milvus_op.scale(release_name, 'queryNode', replicas, namespace)
time.sleep(10)
# get querynode info
qn = mil.query_nodes
log.info(f"query node info: {len(qn)}")
resource_group = self.utility.describe_resource_group(name)
log.info(f"Resource group {name} info:\n {display_resource_group_info(resource_group)}")
list_all_resource_groups()
# assert the node in rg >= 4
resource_group = self.utility.describe_resource_group(name)
assert resource_group.num_available_node >= 4
@pytest.mark.tags(CaseLabel.L3)
def test_resource_group_scale_down(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
milvus_op.scale(release_name, 'queryNode', 8, namespace)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
# create rg1 with request node_num=4, limit node_num=6
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 4},
limits={"node_num": 6},
))
# scale down rg1 from 8 to 1 node one by one
for replicas in range(8, 1, -1):
milvus_op.scale(release_name, 'queryNode', replicas, namespace)
time.sleep(10)
resource_group = self.utility.describe_resource_group(name)
log.info(f"Resource group {name} info:\n {display_resource_group_info(resource_group)}")
list_all_resource_groups()
# assert the node in rg <= 1
resource_group = self.utility.describe_resource_group(name)
assert resource_group.num_available_node <= 1
@pytest.mark.tags(CaseLabel.L3)
def test_resource_group_all_querynode_add_into_two_different_config_rg(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
milvus_op.scale(release_name, 'queryNode', 8, namespace)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
rg_list = []
# create rg1 with request node_num=4, limit node_num=6
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 4},
limits={"node_num": 6},
))
rg_list.append(name)
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 3},
limits={"node_num": 6},
))
rg_list.append(name)
# assert two rg satisfy the request node_num
list_all_resource_groups()
for rg in rg_list:
resource_group = self.utility.describe_resource_group(rg)
assert resource_group.num_available_node >= resource_group.config.requests.node_num
# scale down rg1 from 8 to 1 node one by one
for replicas in range(8, 1, -1):
milvus_op.scale(release_name, 'queryNode', replicas, namespace)
time.sleep(10)
for name in rg_list:
resource_group = self.utility.describe_resource_group(name)
log.info(f"Resource group {name} info:\n {display_resource_group_info(resource_group)}")
list_all_resource_groups()
@pytest.mark.tags(CaseLabel.L3)
def test_resource_group_querynode_add_into_two_different_config_rg_one_by_one(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
rg_list = []
# create rg1 with request node_num=4, limit node_num=6
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 4},
limits={"node_num": 6},
))
rg_list.append(name)
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 3},
limits={"node_num": 6},
))
rg_list.append(name)
for replicas in range(1, 8):
milvus_op.scale(release_name, 'queryNode', replicas, namespace)
time.sleep(10)
list_all_resource_groups()
for rg in rg_list:
resource_group = self.utility.describe_resource_group(rg)
assert resource_group.num_available_node >= resource_group.config.requests.node_num
# scale down rg1 from 8 to 1 node one by one
for replicas in range(8, 1, -1):
milvus_op.scale(release_name, 'queryNode', replicas, namespace)
time.sleep(10)
list_all_resource_groups()
for rg in rg_list:
resource_group = self.utility.describe_resource_group(rg)
assert resource_group.num_available_node >= 1
@pytest.mark.tags(CaseLabel.L3)
def test_resource_group_querynode_add_into_new_rg(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
self.release_name = release_name
milvus_op.scale(release_name, 'queryNode', 10, namespace)
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
rg_list = []
# create rg1 with request node_num=4, limit node_num=6
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 4},
limits={"node_num": 6},
))
rg_list.append(name)
for rg in rg_list:
resource_group = self.utility.describe_resource_group(rg)
assert resource_group.num_available_node >= resource_group.config.requests.node_num
# create a new rg with request node_num=3, limit node_num=6
# the querynode will be added into the new rg from default rg
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 3},
limits={"node_num": 6},
))
rg_list.append(name)
list_all_resource_groups()
for rg in rg_list:
resource_group = self.utility.describe_resource_group(rg)
assert resource_group.num_available_node >= resource_group.config.requests.node_num
@pytest.mark.tags(CaseLabel.L3)
def test_resource_group_with_two_rg_link_to_each_other_when_all_not_reached_to_request(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
milvus_op.scale(release_name, 'queryNode', 8, namespace)
utility.update_resource_groups(
{DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 1})})
# create rg1 with request node_num=4, limit node_num=6
name = cf.gen_unique_str("rg")
rg1_name = name
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 4},
limits={"node_num": 6},
))
name = cf.gen_unique_str("rg")
rg2_name = name
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 4},
limits={"node_num": 6},
))
list_all_resource_groups()
log.info("update resource group")
utility.update_resource_groups(
{rg1_name: ResourceGroupConfig(requests={"node_num": 6},
limits={"node_num": 8},
transfer_from=[{"resource_group": rg2_name}],
transfer_to=[{"resource_group": rg2_name}], )})
time.sleep(10)
list_all_resource_groups()
utility.update_resource_groups(
{rg2_name: ResourceGroupConfig(requests={"node_num": 6},
limits={"node_num": 8},
transfer_from=[{"resource_group": rg1_name}],
transfer_to=[{"resource_group": rg1_name}], )})
time.sleep(10)
list_all_resource_groups()
# no querynode was transferred between rg1 and rg2
resource_group = self.utility.describe_resource_group(rg1_name)
assert resource_group.num_available_node == 4
resource_group = self.utility.describe_resource_group(rg2_name)
assert resource_group.num_available_node == 4
@pytest.mark.tags(CaseLabel.L3)
def test_resource_group_with_rg_transfer_from_non_default_rg(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
milvus_op.scale(release_name, 'queryNode', 15, namespace)
utility.update_resource_groups(
{DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 3})})
# create rg1 with request node_num=4, limit node_num=6
name = cf.gen_unique_str("rg")
rg1_name = name
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 2},
limits={"node_num": 2},
))
name = cf.gen_unique_str("rg")
rg2_name = name
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 6},
limits={"node_num": 10},
))
list_all_resource_groups()
rg2_available_node_before = self.utility.describe_resource_group(rg2_name).num_available_node
log.info("update resource group")
utility.update_resource_groups(
{rg1_name: ResourceGroupConfig(requests={"node_num": 4},
limits={"node_num": 6},
transfer_from=[{"resource_group": rg2_name}],
transfer_to=[{"resource_group": rg2_name}], )})
time.sleep(10)
list_all_resource_groups()
# expect qn in rg 1 transfer from rg2 not the default rg
rg2_available_node_after = self.utility.describe_resource_group(rg2_name).num_available_node
assert rg2_available_node_before > rg2_available_node_after
@pytest.mark.tags(CaseLabel.L3)
def test_resource_group_with_rg_transfer_to_non_default_rg(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
milvus_op.scale(release_name, 'queryNode', 10, namespace)
utility.update_resource_groups(
{DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 10})})
# create rg1 with request node_num=4, limit node_num=6
name = cf.gen_unique_str("rg")
rg1_name = name
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 2},
limits={"node_num": 10},
))
name = cf.gen_unique_str("rg")
rg2_name = name
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 4},
limits={"node_num": 4},
))
list_all_resource_groups()
rg1_node_available_before = self.utility.describe_resource_group(rg1_name).num_available_node
log.info("update resource group")
utility.update_resource_groups(
{rg2_name: ResourceGroupConfig(requests={"node_num": 2},
limits={"node_num": 2},
transfer_from=[{"resource_group": rg1_name}],
transfer_to=[{"resource_group": rg1_name}], )})
time.sleep(10)
list_all_resource_groups()
# expect qn in rg 2 transfer to rg1 not the default rg
rg1_node_available_after = self.utility.describe_resource_group(rg1_name).num_available_node
assert rg1_node_available_after > rg1_node_available_before
@pytest.mark.tags(CaseLabel.L3)
def test_resource_group_with_rg_transfer_with_rg_list(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
milvus_op.scale(release_name, 'queryNode', 12, namespace)
utility.update_resource_groups(
{DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 1})})
# create rg1 with request node_num=4, limit node_num=6
name = cf.gen_unique_str("rg")
source_rg = name
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 1},
limits={"node_num": 1},
))
name = cf.gen_unique_str("rg")
small_rg = name
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 2},
limits={"node_num": 4},
))
name = cf.gen_unique_str("rg")
big_rg = name
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 3},
limits={"node_num": 6},
))
list_all_resource_groups()
small_rg_node_available_before = self.utility.describe_resource_group(small_rg).num_available_node
big_rg_node_available_before = self.utility.describe_resource_group(big_rg).num_available_node
log.info("update resource group")
utility.update_resource_groups(
{source_rg: ResourceGroupConfig(requests={"node_num": 6},
limits={"node_num": 6},
transfer_from=[{"resource_group": small_rg}, {"resource_group": big_rg}],
)})
time.sleep(10)
list_all_resource_groups()
# expect source rg transfer from small rg and big rg
small_rg_node_available_after = self.utility.describe_resource_group(small_rg).num_available_node
big_rg_node_available_after = self.utility.describe_resource_group(big_rg).num_available_node
assert (small_rg_node_available_before + big_rg_node_available_before > small_rg_node_available_after +
big_rg_node_available_after)
class TestReplicasManagement(TestcaseBase):
def teardown_method(self, method):
log.info(("*" * 35) + " teardown " + ("*" * 35))
log.info("[teardown_method] Start teardown test case %s..." % method.__name__)
milvus_op = MilvusOperator()
milvus_op.uninstall(self.release_name, namespace)
connections.disconnect("default")
connections.remove_connection("default")
@pytest.mark.tags(CaseLabel.L3)
def test_load_replicas_one_collection_multi_replicas_to_multi_rg(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
milvus_op.scale(release_name, 'queryNode', 12, namespace)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
resource_groups = []
for i in range(4):
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 2},
limits={"node_num": 6},
))
resource_groups.append(name)
list_all_resource_groups()
# create collection and load with 2 replicase
self.skip_connection = True
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,
enable_dynamic_field=True)[0:2]
collection_w.release()
log.info(f"resource groups: {resource_groups}")
collection_w.load(replica_number=len(resource_groups), _resource_groups=resource_groups)
list_all_resource_groups()
# list replicas
replicas = collection_w.get_replicas()
log.info(f"replicas: {replicas}")
rg_to_scale_down = resource_groups[0]
# scale down a rg to 1 node
self.utility.update_resource_groups(
{rg_to_scale_down: ResourceGroupConfig(requests={"node_num": 1},
limits={"node_num": 1}, )}
)
list_all_resource_groups()
replicas = collection_w.get_replicas()
log.info(f"replicas: {replicas}")
# scale down a rg t0 0 node
self.utility.update_resource_groups(
{rg_to_scale_down: ResourceGroupConfig(requests={"node_num": 0},
limits={"node_num": 0}, )}
)
list_all_resource_groups()
replicas = collection_w.get_replicas()
log.info(f"replicas: {replicas}")
@pytest.mark.tags(CaseLabel.L3)
def test_load_multi_collection_multi_replicas_to_multi_rg(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
milvus_op.scale(release_name, 'queryNode', 12, namespace)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
# create two rg with request node_num=4, limit node_num=6
resource_groups = []
for i in range(3):
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 3},
limits={"node_num": 6},
))
resource_groups.append(name)
log.info(f"resource groups: {resource_groups}")
list_all_resource_groups()
col_list = []
# create collection and load with multi replicase
self.skip_connection = True
for i in range(3):
prefix = cf.gen_unique_str("test_rg")
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,
enable_dynamic_field=True)[0:2]
collection_w.release()
col_list.append(collection_w)
collection_w.load(replica_number=len(resource_groups), _resource_groups=resource_groups)
list_all_resource_groups()
# list replicas
for col in col_list:
replicas = col.get_replicas()
log.info(f"replicas: {replicas}")
@pytest.mark.tags(CaseLabel.L3)
def test_load_multi_collection_one_replicas_to_multi_rg(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
milvus_op.scale(release_name, 'queryNode', 12, namespace)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
# create two rg with request node_num=4, limit node_num=6
resource_groups = []
for i in range(3):
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 3},
limits={"node_num": 6},
))
resource_groups.append(name)
log.info(f"resource groups: {resource_groups}")
list_all_resource_groups()
col_list = []
# create collection and load with multi replicase
self.skip_connection = True
for i in range(3):
prefix = cf.gen_unique_str("test_rg")
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,
enable_dynamic_field=True)[0:2]
collection_w.release()
col_list.append(collection_w)
collection_w.load(replica_number=1, _resource_groups=resource_groups)
list_all_resource_groups()
# list replicas
for col in col_list:
replicas = col.get_replicas()
log.info(f"replicas: {replicas}")
@pytest.mark.tags(CaseLabel.L3)
def test_transfer_replicas_to_other_rg(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
milvus_op.scale(release_name, 'queryNode', 12, namespace)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
# create two rg with request node_num=4, limit node_num=6
resource_groups = []
for i in range(3):
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 3},
limits={"node_num": 6},
))
resource_groups.append(name)
log.info(f"resource groups: {resource_groups}")
list_all_resource_groups()
col_list = []
# create collection and load with multi replicase
self.skip_connection = True
for i in range(3):
prefix = cf.gen_unique_str("test_rg")
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,
enable_dynamic_field=True)[0:2]
collection_w.release()
col_list.append(collection_w)
collection_w.load(replica_number=1, _resource_groups=[resource_groups[i]])
list_all_resource_groups()
# list replicas
for col in col_list:
replicas = col.get_replicas()
log.info(f"replicas: {replicas}")
# transfer replicas to default rg
self.utility.transfer_replica(source_group=resource_groups[0], target_group=DEFAULT_RESOURCE_GROUP,
collection_name=col_list[0].name, num_replicas=1)
list_all_resource_groups()
# list replicas
for col in col_list:
replicas = col.get_replicas()
log.info(f"replicas: {replicas}")
class TestServiceAvailableDuringScale(TestcaseBase):
def init_health_checkers(self, collection_name=None):
c_name = collection_name
shards_num = 5
checkers = {
Op.insert: InsertChecker(collection_name=c_name, shards_num=shards_num),
Op.upsert: UpsertChecker(collection_name=c_name, shards_num=shards_num),
Op.search: SearchChecker(collection_name=c_name, shards_num=shards_num),
Op.hybrid_search: HybridSearchChecker(collection_name=c_name, shards_num=shards_num),
Op.query: QueryChecker(collection_name=c_name, shards_num=shards_num),
Op.delete: DeleteChecker(collection_name=c_name, shards_num=shards_num),
}
self.health_checkers = checkers
def teardown_method(self, method):
log.info(("*" * 35) + " teardown " + ("*" * 35))
log.info("[teardown_method] Start teardown test case %s..." % method.__name__)
milvus_op = MilvusOperator()
milvus_op.uninstall(self.release_name, namespace)
connections.disconnect("default")
connections.remove_connection("default")
def test_service_available_during_scale_up(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
milvus_op.scale(release_name, 'queryNode', 3, namespace)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
utility.update_resource_groups(
{DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 10})})
# create rg
resource_groups = []
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 1},
limits={"node_num": 1},
))
resource_groups.append(name)
list_all_resource_groups()
c_name = cf.gen_unique_str("Checker_")
self.init_health_checkers(collection_name=c_name)
# load collection to non default rg
self.health_checkers[Op.search].c_wrap.release()
self.health_checkers[Op.search].c_wrap.load(_resource_groups=resource_groups)
cc.start_monitor_threads(self.health_checkers)
log.info("*********************Load Start**********************")
request_duration = 360
for i in range(10):
time.sleep(request_duration//10)
for k, v in self.health_checkers.items():
v.check_result()
# scale up querynode when progress is 3/10
if i == 3:
utility.update_resource_groups(
{name: ResourceGroupConfig(requests={"node_num": 2}, limits={"node_num": 2})})
log.info(f"scale up querynode in rg {name} from 1 to 2")
list_all_resource_groups()
display_segment_distribution_info(c_name, release_name)
time.sleep(60)
ra = ResultAnalyzer()
ra.get_stage_success_rate()
assert_statistic(self.health_checkers)
for k, v in self.health_checkers.items():
v.terminate()
def test_service_available_during_scale_down(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
milvus_op.scale(release_name, 'queryNode', 3, namespace)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
utility.update_resource_groups(
{DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 5})})
# create rg
resource_groups = []
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 2},
limits={"node_num": 2},
))
resource_groups.append(name)
list_all_resource_groups()
c_name = cf.gen_unique_str("Checker_")
self.init_health_checkers(collection_name=c_name)
# load collection to non default rg
self.health_checkers[Op.search].c_wrap.release()
self.health_checkers[Op.search].c_wrap.load(_resource_groups=resource_groups)
cc.start_monitor_threads(self.health_checkers)
list_all_resource_groups()
log.info("*********************Load Start**********************")
request_duration = 360
for i in range(10):
time.sleep(request_duration//10)
for k, v in self.health_checkers.items():
v.check_result()
# scale down querynode in rg when progress is 3/10
if i == 3:
list_all_resource_groups()
utility.update_resource_groups(
{name: ResourceGroupConfig(requests={"node_num": 1}, limits={"node_num": 1})})
log.info(f"scale down querynode in rg {name} from 2 to 1")
list_all_resource_groups()
time.sleep(60)
ra = ResultAnalyzer()
ra.get_stage_success_rate()
assert_statistic(self.health_checkers)
for k, v in self.health_checkers.items():
v.terminate()
class TestServiceAvailableDuringTransferReplicas(TestcaseBase):
def init_health_checkers(self, collection_name=None):
c_name = collection_name
shards_num = 5
checkers = {
Op.insert: InsertChecker(collection_name=c_name, shards_num=shards_num),
Op.upsert: UpsertChecker(collection_name=c_name, shards_num=shards_num),
Op.search: SearchChecker(collection_name=c_name, shards_num=shards_num),
Op.hybrid_search: HybridSearchChecker(collection_name=c_name, shards_num=shards_num),
Op.query: QueryChecker(collection_name=c_name, shards_num=shards_num),
Op.delete: DeleteChecker(collection_name=c_name, shards_num=shards_num),
}
self.health_checkers = checkers
def teardown_method(self, method):
log.info(("*" * 35) + " teardown " + ("*" * 35))
log.info("[teardown_method] Start teardown test case %s..." % method.__name__)
milvus_op = MilvusOperator()
milvus_op.uninstall(self.release_name, namespace)
connections.disconnect("default")
connections.remove_connection("default")
def test_service_available_during_transfer_replicas(self, image_tag):
"""
steps
"""
milvus_op = MilvusOperator()
release_name, host, port = _install_milvus(image_tag=image_tag)
milvus_op.scale(release_name, 'queryNode', 5, namespace)
self.release_name = release_name
assert host is not None
connections.connect("default", host=host, port=port)
mil = MilvusSys(alias="default")
log.info(f"milvus build version: {mil.build_version}")
utility.update_resource_groups(
{DEFAULT_RESOURCE_GROUP: ResourceGroupConfig(requests={"node_num": 0}, limits={"node_num": 10})})
# create rg
resource_groups = []
for i in range(2):
name = cf.gen_unique_str("rg")
self.utility = utility
self.utility.create_resource_group(name, config=ResourceGroupConfig(
requests={"node_num": 1},
limits={"node_num": 1},
))
resource_groups.append(name)
list_all_resource_groups()
c_name = cf.gen_unique_str("Checker_")
self.init_health_checkers(collection_name=c_name)
self.health_checkers[Op.search].c_wrap.release()
self.health_checkers[Op.search].c_wrap.load(_resource_groups=resource_groups[0:1])
cc.start_monitor_threads(self.health_checkers)
list_all_resource_groups()
display_segment_distribution_info(c_name, release_name)
log.info("*********************Load Start**********************")
request_duration = 360
for i in range(10):
time.sleep(request_duration//10)
for k, v in self.health_checkers.items():
v.check_result()
# transfer replicas from default to another
if i == 3:
# transfer replicas from default rg to another rg
list_all_resource_groups()
display_segment_distribution_info(c_name, release_name)
self.utility.transfer_replica(source_group=resource_groups[0], target_group=resource_groups[1],
collection_name=c_name, num_replicas=1)
list_all_resource_groups()
display_segment_distribution_info(c_name, release_name)
time.sleep(60)
ra = ResultAnalyzer()
ra.get_stage_success_rate()
assert_statistic(self.health_checkers)
for k, v in self.health_checkers.items():
v.terminate()