mirror of https://github.com/milvus-io/milvus.git
[skip ci] Add more chaos tests (#6062)
Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>pull/6042/head
parent
879c1e6436
commit
4df6168c5e
|
@ -1,9 +1,10 @@
|
|||
import os
|
||||
import threading
|
||||
import glob
|
||||
from delayed_assert import expect
|
||||
import delayed_assert
|
||||
import constants
|
||||
from yaml import full_load
|
||||
from utils.util_log import test_log as log
|
||||
|
||||
|
||||
def check_config(chaos_config):
|
||||
|
@ -31,32 +32,22 @@ def gen_experiment_config(yaml):
|
|||
|
||||
|
||||
def start_monitor_threads(checkers={}):
|
||||
for k in checkers.keys():
|
||||
v = checkers[k]
|
||||
t = threading.Thread(target=v.keep_running, args=())
|
||||
threads = {}
|
||||
for k, ch in checkers.items():
|
||||
t = threading.Thread(target=ch.keep_running, args=())
|
||||
t.start()
|
||||
|
||||
|
||||
def assert_statistic(checkers, expectations={}):
|
||||
for k in checkers.keys():
|
||||
# expect succ if no expectations
|
||||
succ_rate = checkers[k].succ_rate()
|
||||
if expectations.get(k, '') == constants.FAIL:
|
||||
print(f"Expect Fail: {str(checkers[k])} current succ rate {succ_rate}")
|
||||
expect(succ_rate < 0.49)
|
||||
else:
|
||||
print(f"Expect Succ: {str(checkers[k])} current succ rate {succ_rate}")
|
||||
expect(succ_rate > 0.90)
|
||||
threads[k] = t
|
||||
return threads
|
||||
|
||||
|
||||
def get_env_variable_by_name(name):
|
||||
""" get env variable by name"""
|
||||
try:
|
||||
env_var = os.environ[name]
|
||||
print(f"env_variable: {env_var}")
|
||||
log.debug(f"env_variable: {env_var}")
|
||||
return str(env_var)
|
||||
except Exception as e:
|
||||
print(f"fail to get env variables, error: {str(e)}")
|
||||
log.debug(f"fail to get env variables, error: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
|
@ -64,14 +55,18 @@ def get_chaos_yamls():
|
|||
chaos_env = get_env_variable_by_name(constants.CHAOS_CONFIG_ENV)
|
||||
if chaos_env is not None:
|
||||
if os.path.isdir(chaos_env):
|
||||
print(f"chaos_env is a dir: {chaos_env}")
|
||||
log.debug(f"chaos_env is a dir: {chaos_env}")
|
||||
return glob.glob(chaos_env + 'chaos_*.yaml')
|
||||
elif os.path.isfile(chaos_env):
|
||||
print(f"chaos_env is a file: {chaos_env}")
|
||||
log.debug(f"chaos_env is a file: {chaos_env}")
|
||||
return [chaos_env]
|
||||
else:
|
||||
# not a valid directory, return default
|
||||
pass
|
||||
print("not a valid directory or file, return default")
|
||||
return glob.glob(constants.TESTS_CONFIG_LOCATION + 'chaos_*.yaml')
|
||||
log.debug("not a valid directory or file, return default")
|
||||
return glob.glob(constants.TESTS_CONFIG_LOCATION + constants.ALL_CHAOS_YAMLS)
|
||||
|
||||
|
||||
def reconnect(conn, host, port):
|
||||
conn.add_connection(default={"host": host, "port": port})
|
||||
return conn.connect(alias='default')
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: test-datacoord-podkill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-kill
|
||||
mode: one
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing # target namespace of milvus deployment
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
app.kubernetes.io/name: milvus
|
||||
component: datacoord
|
||||
scheduler:
|
||||
cron: '@every 2s'
|
|
@ -1,16 +1,17 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: test-data-node-pod-kill
|
||||
namespace:
|
||||
name: test-datanode-podkill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-kill
|
||||
mode: one
|
||||
selector:
|
||||
namespaces:
|
||||
- default # target namespace of milvus deployment
|
||||
- chaos-testing # target namespace of milvus deployment
|
||||
labelSelectors:
|
||||
app.kubernetes.io/name: milvus-ha
|
||||
component: 'datanode'
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
app.kubernetes.io/name: milvus
|
||||
component: datanode
|
||||
scheduler:
|
||||
cron: '@every 20s'
|
||||
cron: '@every 2s'
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: test-indexcoord-podkill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-kill
|
||||
mode: one
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing # target namespace of milvus deployment
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
app.kubernetes.io/name: milvus
|
||||
component: indexcoord
|
||||
scheduler:
|
||||
cron: '@every 2s'
|
|
@ -0,0 +1,17 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: test-indexnode-podkill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-kill
|
||||
mode: one
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing # target namespace of milvus deployment
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
app.kubernetes.io/name: milvus
|
||||
component: indexnode
|
||||
scheduler:
|
||||
cron: '@every 2s'
|
|
@ -0,0 +1,17 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: test-proxy-podkill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-kill
|
||||
mode: one
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing # target namespace of milvus deployment
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
app.kubernetes.io/name: milvus
|
||||
component: proxy
|
||||
scheduler:
|
||||
cron: '@every 2s'
|
|
@ -1,16 +0,0 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: test-proxy-node-pod-kill
|
||||
namespace:
|
||||
spec:
|
||||
action: pod-kill
|
||||
mode: one
|
||||
selector:
|
||||
namespaces:
|
||||
- default # target namespace of milvus deployment
|
||||
labelSelectors:
|
||||
app.kubernetes.io/name: milvus-ha
|
||||
component: 'proxynode'
|
||||
scheduler:
|
||||
cron: '@every 20s'
|
|
@ -0,0 +1,17 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: test-querycoord-podkill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-kill
|
||||
mode: one
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing # target namespace of milvus deployment
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
app.kubernetes.io/name: milvus
|
||||
component: querycoord
|
||||
scheduler:
|
||||
cron: '@every 2s'
|
|
@ -1,16 +1,17 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: test-query-node-pod-kill
|
||||
namespace:
|
||||
name: test-querynode-podkill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-kill
|
||||
mode: one
|
||||
selector:
|
||||
namespaces:
|
||||
- default # target namespace of milvus deployment
|
||||
- chaos-testing # target namespace of milvus deployment
|
||||
labelSelectors:
|
||||
app.kubernetes.io/name: milvus-ha
|
||||
component: 'querynode'
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
app.kubernetes.io/name: milvus
|
||||
component: querynode
|
||||
scheduler:
|
||||
cron: '@every 20s'
|
||||
cron: '@every 2s'
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: test-rootcoord-podkill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-kill
|
||||
mode: one
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing # target namespace of milvus deployment
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
app.kubernetes.io/name: milvus
|
||||
component: rootcoord
|
||||
scheduler:
|
||||
cron: '@every 2s'
|
|
@ -1,15 +1,17 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: test-standalone-pod-kill
|
||||
namespace:
|
||||
name: test-standalone-podkill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-kill
|
||||
mode: one
|
||||
selector:
|
||||
namespaces:
|
||||
- default # target namespace of milvus deployment
|
||||
- chaos-testing # target namespace of milvus deployment
|
||||
labelSelectors:
|
||||
app.kubernetes.io/name: milvus-ha # pod of standalone milvus
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
app.kubernetes.io/name: milvus
|
||||
component: standalone
|
||||
scheduler:
|
||||
cron: '@every 20s'
|
||||
cron: '@every 10s'
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
# standalone
|
||||
# 3 pods(standalone-ha-blabla, etcd, minio)
|
||||
# cluster-1-node
|
||||
# 11 pods(proxy, master, query node, query service, data node, data service,
|
||||
# index node, index service, pulsar, etcd, minio)
|
||||
# 11 pods(proxy, rootcoord, querynode, querycoord, datanode, datacoord,
|
||||
# indexnode, indexcoord, pulsar, etcd, minio)
|
||||
# cluster-n-nodes
|
||||
# 11 pods* n: kill one and kill all
|
||||
|
||||
|
@ -33,12 +33,19 @@ Collections:
|
|||
search: fail
|
||||
query: fail
|
||||
cluster_n_nodes:
|
||||
search: degrade # keep functional, but performance degraded
|
||||
search: degrade
|
||||
query: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_queryservice_podkill
|
||||
chaos: chaos_queryservice_podkill.yaml
|
||||
name: test_querycoord_podkill
|
||||
chaos: chaos_querycoord_podkill.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
search: fail
|
||||
query: fail
|
||||
cluster_n_nodes:
|
||||
search: degrade
|
||||
query: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_datanode_podkill
|
||||
|
@ -51,16 +58,32 @@ Collections:
|
|||
insert: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_dataservice_podkill
|
||||
chaos: chaos_dataservice_podkill.yaml
|
||||
name: test_datascoord_podkill
|
||||
chaos: chaos_datacoord_podkill.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
insert: succ
|
||||
flush: fail
|
||||
cluster_n_nodes:
|
||||
insert: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_indexnode_podkill
|
||||
chaos: chaos_indexnode_podkill.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
index: fail
|
||||
cluster_n_nodes:
|
||||
index: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_indexservice_podkill
|
||||
chaos: chaos_indexservice_podkill.yaml
|
||||
name: test_indexcoord_podkill
|
||||
chaos: chaos_indexcoord_podkill.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
index: fail
|
||||
cluster_n_nodes:
|
||||
insert: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_proxy_podkill
|
||||
|
@ -74,11 +97,21 @@ Collections:
|
|||
search: fail
|
||||
query: fail
|
||||
cluster_n_nodes:
|
||||
insert: degrade
|
||||
insert: fail
|
||||
-
|
||||
testcase:
|
||||
name: test_master_podkill
|
||||
chaos: chaos_master_podkill.yaml
|
||||
name: test_rootcoord_podkill
|
||||
chaos: chaos_rootcoord_podkill.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
create: fail
|
||||
insert: fail
|
||||
flush: fail
|
||||
index: fail
|
||||
search: fail
|
||||
query: fail
|
||||
cluster_n_nodes:
|
||||
insert: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_etcd_podkill
|
||||
|
@ -87,6 +120,10 @@ Collections:
|
|||
testcase:
|
||||
name: test_minio_podkill
|
||||
chaos: chaos_minio_podkill.yaml
|
||||
-
|
||||
testcase:
|
||||
name: test_pulsar_podkill
|
||||
chaos: chaos_minio_podkill.yaml
|
||||
-
|
||||
testcase:
|
||||
name: test_querynode_cpu100p
|
||||
|
|
|
@ -1,11 +1,8 @@
|
|||
from __future__ import print_function
|
||||
import logging
|
||||
from kubernetes import client, config
|
||||
from kubernetes.client.rest import ApiException
|
||||
import constants as cf
|
||||
|
||||
|
||||
logger = logging.getLogger("milvus_benchmark.chaos.chaosOpt")
|
||||
from utils.util_log import test_log as log
|
||||
|
||||
|
||||
class ChaosOpt(object):
|
||||
|
@ -15,35 +12,30 @@ class ChaosOpt(object):
|
|||
self.namespace = namespace
|
||||
self.plural = kind.lower()
|
||||
|
||||
# def get_metadata_name(self):
|
||||
# return self.metadata_name
|
||||
|
||||
def create_chaos_object(self, body):
|
||||
# body = create_chaos_config(self.plural, self.metadata_name, spec_params)
|
||||
# logger.info(body)
|
||||
pretty = 'true'
|
||||
config.load_kube_config()
|
||||
api_instance = client.CustomObjectsApi()
|
||||
try:
|
||||
api_response = api_instance.create_namespaced_custom_object(self.group, self.version, self.namespace,
|
||||
plural=self.plural, body=body, pretty=pretty)
|
||||
print(api_response)
|
||||
logging.getLogger().info(api_instance)
|
||||
log.debug(f"create chaos response: {api_response}")
|
||||
except ApiException as e:
|
||||
logger.error("Exception when calling CustomObjectsApi->create_namespaced_custom_object: %s\n" % e)
|
||||
log.error("Exception when calling CustomObjectsApi->create_namespaced_custom_object: %s\n" % e)
|
||||
raise Exception(str(e))
|
||||
|
||||
def delete_chaos_object(self, metadata_name):
|
||||
def delete_chaos_object(self, metadata_name, raise_ex=True):
|
||||
print(metadata_name)
|
||||
try:
|
||||
config.load_kube_config()
|
||||
api_instance = client.CustomObjectsApi()
|
||||
data = api_instance.delete_namespaced_custom_object(self.group, self.version, self.namespace, self.plural,
|
||||
metadata_name)
|
||||
logger.info(data)
|
||||
log.debug(f"delete chaos response: {data}")
|
||||
except ApiException as e:
|
||||
logger.error("Exception when calling CustomObjectsApi->delete_namespaced_custom_object: %s\n" % e)
|
||||
raise Exception(str(e))
|
||||
log.error("Exception when calling CustomObjectsApi->delete_namespaced_custom_object: %s\n" % e)
|
||||
if raise_ex:
|
||||
raise Exception(str(e))
|
||||
|
||||
def list_chaos_object(self):
|
||||
try:
|
||||
|
@ -51,9 +43,8 @@ class ChaosOpt(object):
|
|||
api_instance = client.CustomObjectsApi()
|
||||
data = api_instance.list_namespaced_custom_object(self.group, self.version, self.namespace,
|
||||
plural=self.plural)
|
||||
# pprint(data)
|
||||
except ApiException as e:
|
||||
logger.error("Exception when calling CustomObjectsApi->list_namespaced_custom_object: %s\n" % e)
|
||||
log.error("Exception when calling CustomObjectsApi->list_namespaced_custom_object: %s\n" % e)
|
||||
raise Exception(str(e))
|
||||
return data
|
||||
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
import datetime
|
||||
from enum import Enum
|
||||
from random import randint
|
||||
|
||||
from time import sleep
|
||||
from base.collection_wrapper import ApiCollectionWrapper
|
||||
from common import common_func as cf
|
||||
from common import common_type as ct
|
||||
import constants
|
||||
from utils.util_log import test_log as log
|
||||
|
||||
|
||||
class Op(Enum):
|
||||
|
@ -23,6 +26,12 @@ class Checker:
|
|||
self._succ = 0
|
||||
self._fail = 0
|
||||
self._running = True
|
||||
self.c_wrap = ApiCollectionWrapper()
|
||||
self.c_wrap.init_collection(name=cf.gen_unique_str('Checker_'),
|
||||
schema=cf.gen_default_collection_schema())
|
||||
self.c_wrap.insert(data=cf.gen_default_list_data(nb=constants.ENTITIES_FOR_SEARCH),
|
||||
check_task='check_nothing')
|
||||
self.initial_entities = self.c_wrap.num_entities # do as a flush
|
||||
|
||||
def total(self):
|
||||
return self._succ + self._fail
|
||||
|
@ -39,47 +48,45 @@ class Checker:
|
|||
|
||||
|
||||
class SearchChecker(Checker):
|
||||
def __init__(self, collection_wrap):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.c_wrap = collection_wrap
|
||||
self.c_wrap.load() # do load before search
|
||||
|
||||
def keep_running(self):
|
||||
while self._running is True:
|
||||
search_vec = cf.gen_vectors(5, ct.default_dim)
|
||||
_, result = self.c_wrap.search(
|
||||
data=search_vec,
|
||||
params={"nprobe": 32},
|
||||
limit=1,
|
||||
check_task="nothing"
|
||||
anns_field=ct.default_float_vec_field_name,
|
||||
param={"nprobe": 32},
|
||||
limit=1, check_task='check_nothing'
|
||||
)
|
||||
if result is True:
|
||||
if result:
|
||||
self._succ += 1
|
||||
else:
|
||||
self._fail += 1
|
||||
sleep(constants.WAIT_PER_OP / 10)
|
||||
|
||||
|
||||
class InsertFlushChecker(Checker):
|
||||
def __init__(self, connection, collection_wrap, do_flush=False):
|
||||
def __init__(self, flush=False):
|
||||
super().__init__()
|
||||
self.conn = connection
|
||||
self.c_wrap = collection_wrap
|
||||
self._do_flush = do_flush
|
||||
self._flush = flush
|
||||
|
||||
def keep_running(self):
|
||||
while self._running is True:
|
||||
_, insert_result = self.c_wrap.insert(
|
||||
data=cf.gen_default_dataframe_data(nb=constants.DELTA_PER_INS)
|
||||
)
|
||||
if self._do_flush is False:
|
||||
if insert_result is True:
|
||||
while self._running:
|
||||
init_entities = self.c_wrap.num_entities
|
||||
_, insert_result = \
|
||||
self.c_wrap.insert(data=cf.gen_default_list_data(nb=constants.DELTA_PER_INS),
|
||||
check_task='check_nothing')
|
||||
if not self._flush:
|
||||
if insert_result:
|
||||
self._succ += 1
|
||||
else:
|
||||
self._fail += 1
|
||||
sleep(constants.WAIT_PER_OP / 10)
|
||||
else:
|
||||
entities_1 = self.c_wrap.num_entities
|
||||
self.conn.flush([self.c_wrap.name])
|
||||
entities_2 = self.c_wrap.num_entities
|
||||
if entities_2 == (entities_1 + constants.DELTA_PER_INS):
|
||||
if self.c_wrap.num_entities == (init_entities + constants.DELTA_PER_INS):
|
||||
self._succ += 1
|
||||
else:
|
||||
self._fail += 1
|
||||
|
@ -88,35 +95,81 @@ class InsertFlushChecker(Checker):
|
|||
class CreateChecker(Checker):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.c_wrapper = ApiCollectionWrapper()
|
||||
|
||||
def keep_running(self):
|
||||
while self._running is True:
|
||||
sleep(2)
|
||||
collection, result = self.c_wrapper.init_collection(
|
||||
_, result = self.c_wrap.init_collection(
|
||||
name=cf.gen_unique_str("CreateChecker_"),
|
||||
schema=cf.gen_default_collection_schema(),
|
||||
check_task="check_nothing"
|
||||
check_task='check_nothing'
|
||||
)
|
||||
if result is True:
|
||||
if result:
|
||||
self._succ += 1
|
||||
self.c_wrapper.drop(check_task="check_nothing")
|
||||
self.c_wrap.drop(check_task="check_nothing")
|
||||
else:
|
||||
self._fail += 1
|
||||
sleep(constants.WAIT_PER_OP / 10)
|
||||
|
||||
|
||||
class IndexChecker(Checker):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.c_wrap.insert(data=cf.gen_default_list_data(nb=5*constants.ENTITIES_FOR_SEARCH),
|
||||
check_task='check_nothing')
|
||||
log.debug(f"Index ready entities: {self.c_wrap.num_entities }") # do as a flush before indexing
|
||||
|
||||
def keep_running(self):
|
||||
pass
|
||||
while self._running:
|
||||
_, result = self.c_wrap.create_index(ct.default_float_vec_field_name,
|
||||
constants.DEFAULT_INDEX_PARAM,
|
||||
name=cf.gen_unique_str('index_'),
|
||||
check_task='check_nothing')
|
||||
if result:
|
||||
self._succ += 1
|
||||
self.c_wrap.drop_index(check_task='check_nothing')
|
||||
else:
|
||||
self._fail += 1
|
||||
|
||||
|
||||
class QueryChecker(Checker):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.c_wrap.load() # load before query
|
||||
|
||||
def keep_running(self):
|
||||
pass
|
||||
while self._running:
|
||||
int_values = []
|
||||
for _ in range(5):
|
||||
int_values.append(randint(0, constants.ENTITIES_FOR_SEARCH))
|
||||
# term_expr = f'{ct.default_int64_field_name} in {int_values}'
|
||||
# _, result = self.c_wrap.query(term_expr, check_task='check_nothing')
|
||||
result = False
|
||||
sleep(constants.WAIT_PER_OP/10)
|
||||
if result:
|
||||
self._succ += 1
|
||||
else:
|
||||
self._fail += 1
|
||||
|
||||
#
|
||||
# if __name__ == '__main__':
|
||||
# from pymilvus_orm import connections
|
||||
# connections.add_connection(default={"host": '10.98.0.7', "port": 19530})
|
||||
# conn = connections.connect(alias='default')
|
||||
# c_w = ApiCollectionWrapper()
|
||||
# c_w.init_collection(name=cf.gen_unique_str("collection_4_search_"),
|
||||
# schema=cf.gen_default_collection_schema())
|
||||
# c_w.insert(data=cf.gen_default_list_data(nb=constants.ENTITIES_FOR_SEARCH))
|
||||
# log.debug(f"nums: {c_w.num_entities}")
|
||||
# # c_w.load()
|
||||
# # # int_values = []
|
||||
# # # for _ in range(5):
|
||||
# # # int_values.append(randint(0, constants.ENTITIES_FOR_SEARCH))
|
||||
# # term_expr = f'{ct.default_int64_field_name} in [1,2,3,4,5]'
|
||||
# # log.debug(term_expr)
|
||||
# # res, result = c_w.query(term_expr)
|
||||
#
|
||||
# res, result = c_w.create_index(ct.default_float_vec_field_name,
|
||||
# constants.DEFAULT_INDEX_PARAM,
|
||||
# name=cf.gen_unique_str('index_'),
|
||||
# check_task='check_nothing')
|
||||
# log.debug(res)
|
||||
|
|
|
@ -11,25 +11,27 @@ SERVER_HOST_DEFAULT = "127.0.0.1"
|
|||
SERVER_PORT_DEFAULT = 19530
|
||||
SERVER_VERSION = "2.0"
|
||||
|
||||
HELM_NAMESPACE = "milvus"
|
||||
BRANCH = "master"
|
||||
|
||||
DEFAULT_CPUS = 48
|
||||
|
||||
RAW_DATA_DIR = "/test/milvus/raw_data/"
|
||||
|
||||
# nars log
|
||||
LOG_PATH = "/test/milvus/benchmark/logs/{}/".format(BRANCH)
|
||||
# LOG_PATH = "/test/milvus/benchmark/logs/{}/".format(BRANCH)
|
||||
|
||||
DEFAULT_DEPLOY_MODE = "single"
|
||||
|
||||
NAMESPACE = "default"
|
||||
NAMESPACE = "chaos-testing"
|
||||
DEFAULT_API_VERSION = 'chaos-mesh.org/v1alpha1'
|
||||
DEFAULT_GROUP = 'chaos-mesh.org'
|
||||
DEFAULT_VERSION = 'v1alpha1'
|
||||
SUCC = 'succ'
|
||||
FAIL = 'fail'
|
||||
DELTA_PER_INS = 10
|
||||
ENTITIES_FOR_SEARCH = 1000
|
||||
|
||||
CHAOS_CONFIG_ENV = 'CHAOS_CONFIG_PATH' # env variables for chao path
|
||||
TESTS_CONFIG_LOCATION = 'chaos_objects/'
|
||||
ALL_CHAOS_YAMLS = 'chaos_*.yaml'
|
||||
WAIT_PER_OP = 10
|
||||
DEFAULT_INDEX_PARAM = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
|
||||
|
|
|
@ -2,7 +2,8 @@ import pytest
|
|||
from time import sleep
|
||||
|
||||
from pymilvus_orm import connections
|
||||
from checker import CreateChecker, Op
|
||||
from checker import CreateChecker, InsertFlushChecker, \
|
||||
SearchChecker, QueryChecker, IndexChecker, Op
|
||||
from chaos_opt import ChaosOpt
|
||||
from utils.util_log import test_log as log
|
||||
from base.collection_wrapper import ApiCollectionWrapper
|
||||
|
@ -12,6 +13,18 @@ from common.common_type import CaseLabel
|
|||
import constants
|
||||
|
||||
|
||||
def assert_statistic(checkers, expectations={}):
|
||||
for k in checkers.keys():
|
||||
# expect succ if no expectations
|
||||
succ_rate = checkers[k].succ_rate()
|
||||
if expectations.get(k, '') == constants.FAIL:
|
||||
log.debug(f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {checkers[k].total()}")
|
||||
delayed_assert.expect(succ_rate < 0.49)
|
||||
else:
|
||||
log.debug(f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {checkers[k].total()}")
|
||||
delayed_assert.expect(succ_rate > 0.90)
|
||||
|
||||
|
||||
class TestChaosBase:
|
||||
expect_create = constants.SUCC
|
||||
expect_insert = constants.SUCC
|
||||
|
@ -19,8 +32,11 @@ class TestChaosBase:
|
|||
expect_index = constants.SUCC
|
||||
expect_search = constants.SUCC
|
||||
expect_query = constants.SUCC
|
||||
chaos_location = ''
|
||||
host = 'localhost'
|
||||
port = 19530
|
||||
_chaos_config = None
|
||||
health_checkers = {}
|
||||
checker_threads = {}
|
||||
|
||||
def parser_testcase_config(self, chaos_yaml):
|
||||
tests_yaml = constants.TESTS_CONFIG_LOCATION + 'testcases.yaml'
|
||||
|
@ -31,12 +47,16 @@ class TestChaosBase:
|
|||
if test_chaos in chaos_yaml:
|
||||
expects = t.get('testcase', {}).get('expectation', {}) \
|
||||
.get('cluster_1_node', {})
|
||||
self.expect_create = expects.get(Op.create, constants.SUCC)
|
||||
self.expect_insert = expects.get(Op.insert, constants.SUCC)
|
||||
self.expect_flush = expects.get(Op.flush, constants.SUCC)
|
||||
self.expect_index = expects.get(Op.index, constants.SUCC)
|
||||
self.expect_search = expects.get(Op.search, constants.SUCC)
|
||||
self.expect_query = expects.get(Op.query, constants.SUCC)
|
||||
log.debug(f"yaml.expects: {expects}")
|
||||
self.expect_create = expects.get(Op.create.value, constants.SUCC)
|
||||
self.expect_insert = expects.get(Op.insert.value, constants.SUCC)
|
||||
self.expect_flush = expects.get(Op.flush.value, constants.SUCC)
|
||||
self.expect_index = expects.get(Op.index.value, constants.SUCC)
|
||||
self.expect_search = expects.get(Op.search.value, constants.SUCC)
|
||||
self.expect_query = expects.get(Op.query.value, constants.SUCC)
|
||||
log.debug(f"self.expects: create:{self.expect_create}, insert:{self.expect_insert}, "
|
||||
f"flush:{self.expect_flush}, index:{self.expect_index}, "
|
||||
f"search:{self.expect_search}, query:{self.expect_query}")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
@ -44,117 +64,110 @@ class TestChaosBase:
|
|||
|
||||
class TestChaos(TestChaosBase):
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L3)
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def connection(self):
|
||||
connections.add_connection(default={"host": "192.168.1.239", "port": 19530})
|
||||
def connection(self, host, port):
|
||||
connections.add_connection(default={"host": host, "port": port})
|
||||
conn = connections.connect(alias='default')
|
||||
if conn is None:
|
||||
raise Exception("no connections")
|
||||
self.host = host
|
||||
self.port = port
|
||||
return conn
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def collection_wrap_4_insert(self, connection):
|
||||
c_wrap = ApiCollectionWrapper()
|
||||
c_wrap.init_collection(name=cf.gen_unique_str("collection_4_insert"),
|
||||
schema=cf.gen_default_collection_schema(),
|
||||
check_task="check_nothing")
|
||||
return c_wrap
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def collection_wrap_4_flush(self, connection):
|
||||
c_wrap = ApiCollectionWrapper()
|
||||
c_wrap.init_collection(name=cf.gen_unique_str("collection_4_insert"),
|
||||
schema=cf.gen_default_collection_schema(),
|
||||
check_task="check_nothing")
|
||||
return c_wrap
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def collection_wrap_4_search(self, connection):
|
||||
c_wrap = ApiCollectionWrapper()
|
||||
c_wrap.init_collection(name=cf.gen_unique_str("collection_4_search_"),
|
||||
schema=cf.gen_default_collection_schema(),
|
||||
check_task="check_nothing")
|
||||
c_wrap.insert(data=cf.gen_default_dataframe_data(nb=10000))
|
||||
return c_wrap
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def init_health_checkers(self, connection, collection_wrap_4_insert,
|
||||
collection_wrap_4_flush, collection_wrap_4_search):
|
||||
checkers = {}
|
||||
# search_ch = SearchChecker(collection_wrap=collection_wrap_4_search)
|
||||
# checkers[Op.search] = search_ch
|
||||
# insert_ch = InsertFlushChecker(connection=connection,
|
||||
# collection_wrap=collection_wrap_4_insert)
|
||||
# checkers[Op.insert] = insert_ch
|
||||
# flush_ch = InsertFlushChecker(connection=connection,
|
||||
# collection_wrap=collection_wrap_4_flush,
|
||||
# do_flush=True)
|
||||
# checkers[Op.flush] = flush_ch
|
||||
create_ch = CreateChecker()
|
||||
checkers[Op.create] = create_ch
|
||||
|
||||
def init_health_checkers(self, connection):
|
||||
checkers = {
|
||||
Op.create: CreateChecker(),
|
||||
Op.insert: InsertFlushChecker(),
|
||||
Op.flush: InsertFlushChecker(flush=True),
|
||||
Op.index: IndexChecker(),
|
||||
Op.search: SearchChecker(),
|
||||
Op.query: QueryChecker()
|
||||
}
|
||||
self.health_checkers = checkers
|
||||
|
||||
def teardown(self):
|
||||
for ch in self.health_checkers.values():
|
||||
chaos_opt = ChaosOpt(self._chaos_config['kind'])
|
||||
meta_name = self._chaos_config.get('metadata', None).get('name', None)
|
||||
chaos_opt.delete_chaos_object(meta_name, raise_ex=False)
|
||||
for k, ch in self.health_checkers.items():
|
||||
ch.terminate()
|
||||
pass
|
||||
log.debug(f"tear down: checker {k} terminated")
|
||||
sleep(2)
|
||||
for k, t in self.checker_threads.items():
|
||||
log.debug(f"Thread {k} is_alive(): {t.is_alive()}")
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L3)
|
||||
@pytest.mark.parametrize('chaos_yaml', get_chaos_yamls())
|
||||
def test_chaos(self, chaos_yaml):
|
||||
# start the monitor threads to check the milvus ops
|
||||
start_monitor_threads(self.health_checkers)
|
||||
log.debug("*********************Chaos Test Start**********************")
|
||||
log.debug(connections.get_connection_addr('default'))
|
||||
self.checker_threads = start_monitor_threads(self.health_checkers)
|
||||
|
||||
# parse chaos object
|
||||
print("test.start")
|
||||
chaos_config = gen_experiment_config(chaos_yaml)
|
||||
self._chaos_config = chaos_config # cache the chaos config for tear down
|
||||
log.debug(chaos_config)
|
||||
|
||||
# parse the test expectations in testcases.yaml
|
||||
self.parser_testcase_config(chaos_yaml)
|
||||
if self.parser_testcase_config(chaos_yaml) is False:
|
||||
log.error("Fail to get the testcase info in testcases.yaml")
|
||||
assert False
|
||||
|
||||
# wait 120s
|
||||
sleep(1)
|
||||
sleep(constants.WAIT_PER_OP*2)
|
||||
|
||||
# assert statistic:all ops 100% succ
|
||||
log.debug("******1st assert before chaos: ")
|
||||
assert_statistic(self.health_checkers)
|
||||
|
||||
# reset counting
|
||||
reset_counting(self.health_checkers)
|
||||
|
||||
# apply chaos object
|
||||
# chaos_opt = ChaosOpt(chaos_config['kind'])
|
||||
# chaos_opt.create_chaos_object(chaos_config)
|
||||
chaos_opt = ChaosOpt(chaos_config['kind'])
|
||||
chaos_opt.create_chaos_object(chaos_config)
|
||||
log.debug("chaos injected")
|
||||
|
||||
# wait 120s
|
||||
sleep(1)
|
||||
sleep(constants.WAIT_PER_OP*4)
|
||||
|
||||
for k, t in self.checker_threads.items():
|
||||
log.debug(f"10s later: Thread {k} is_alive(): {t.is_alive()}")
|
||||
|
||||
# assert statistic
|
||||
assert_statistic(self.health_checkers, expectations={Op.create: self.expect_create,
|
||||
Op.insert: self.expect_insert,
|
||||
Op.flush: self.expect_flush,
|
||||
Op.index: self.expect_index,
|
||||
Op.search: self.expect_search,
|
||||
Op.query: self.expect_query
|
||||
})
|
||||
#
|
||||
log.debug("******2nd assert after chaos injected: ")
|
||||
assert_statistic(self.health_checkers,
|
||||
expectations={Op.create: self.expect_create,
|
||||
Op.insert: self.expect_insert,
|
||||
Op.flush: self.expect_flush,
|
||||
Op.index: self.expect_index,
|
||||
Op.search: self.expect_search,
|
||||
Op.query: self.expect_query
|
||||
})
|
||||
|
||||
# delete chaos
|
||||
# meta_name = chaos_config.get('metadata', None).get('name', None)
|
||||
# chaos_opt.delete_chaos_object(meta_name)
|
||||
meta_name = chaos_config.get('metadata', None).get('name', None)
|
||||
chaos_opt.delete_chaos_object(meta_name)
|
||||
log.debug("chaos deleted")
|
||||
for k, t in self.checker_threads.items():
|
||||
log.debug(f"Thread {k} is_alive(): {t.is_alive()}")
|
||||
sleep(2)
|
||||
# reconnect if needed
|
||||
sleep(constants.WAIT_PER_OP)
|
||||
reconnect(connections, self.host, self.port)
|
||||
|
||||
# reset counting again
|
||||
reset_counting(self.health_checkers)
|
||||
|
||||
# wait 300s (varies by feature)
|
||||
sleep(1)
|
||||
sleep(constants.WAIT_PER_OP*1.5)
|
||||
|
||||
# assert statistic: all ops success again
|
||||
log.debug("******3rd assert after chaos deleted: ")
|
||||
assert_statistic(self.health_checkers)
|
||||
|
||||
# terminate thread
|
||||
for ch in self.health_checkers.values():
|
||||
ch.terminate()
|
||||
# log.debug("*******************Test Completed.*******************")
|
||||
log.debug("*********************Chaos Test Completed**********************")
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
import pytest
|
||||
|
||||
from base.client_base import TestcaseBase
|
||||
from common import common_func as cf
|
||||
from common import common_type as ct
|
||||
from common.common_type import CaseLabel
|
||||
|
||||
prefix = "e2e_"
|
||||
|
||||
|
||||
class TestE2e(TestcaseBase):
|
||||
""" Test case of end to end"""
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("name", [(cf.gen_unique_str(prefix))])
|
||||
def test_milvus_default(self, name):
|
||||
from utils.util_log import test_log as log
|
||||
# create
|
||||
collection_w = self.init_collection_wrap(name=name)
|
||||
log.debug("assert create")
|
||||
assert collection_w.name == name
|
||||
|
||||
# insert
|
||||
data = cf.gen_default_list_data()
|
||||
_, res = collection_w.insert(data)
|
||||
log.debug("assert insert")
|
||||
assert res
|
||||
|
||||
# flush
|
||||
log.debug("assert flush")
|
||||
assert collection_w.num_entities == len(data[0])
|
||||
|
||||
# search
|
||||
collection_w.load()
|
||||
search_vectors = cf.gen_vectors(1, ct.default_dim)
|
||||
res_1, _ = collection_w.search(data=search_vectors,
|
||||
anns_field=ct.default_float_vec_field_name,
|
||||
param={"nprobe": 16}, limit=1)
|
||||
log.debug("assert search")
|
||||
assert len(res_1) == 1
|
||||
|
||||
# index
|
||||
collection_w.insert(cf.gen_default_dataframe_data(nb=4000))
|
||||
assert collection_w.num_entities == len(data[0]) + 4000
|
||||
_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
|
||||
index, _ = collection_w.create_index(field_name=ct.default_float_vec_field_name,
|
||||
index_params=_index_params,
|
||||
name=cf.gen_unique_str())
|
||||
log.debug("assert index")
|
||||
assert len(collection_w.indexes) == 1
|
||||
|
||||
# # query
|
||||
# term_expr = f'{ct.default_int64_field_name} in [1,2,3,4]'
|
||||
# res, _ = collection_w.query(term_expr)
|
||||
# assert len(res) == 4
|
|
@ -308,13 +308,13 @@ class TestPartitionParams(TestcaseBase):
|
|||
|
||||
# insert data
|
||||
partition_w.insert(data)
|
||||
self._connect().flush([collection_w.name])
|
||||
# self._connect().flush([collection_w.name]) # don't need flush for issue #5737
|
||||
assert not partition_w.is_empty
|
||||
assert partition_w.num_entities == nums
|
||||
|
||||
# insert data
|
||||
partition_w.insert(data)
|
||||
self._connect().flush([collection_w.name])
|
||||
# self._connect().flush([collection_w.name])
|
||||
assert not partition_w.is_empty
|
||||
assert partition_w.num_entities == (nums + nums)
|
||||
|
||||
|
@ -481,9 +481,9 @@ class TestPartitionOperations(TestcaseBase):
|
|||
assert not collection_w.has_partition(partition_name)[0]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("flush", [True, False])
|
||||
# @pytest.mark.parametrize("flush", [True, False])
|
||||
@pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
|
||||
def test_partition_drop_non_empty_partition(self, flush, partition_name):
|
||||
def test_partition_drop_non_empty_partition(self, partition_name):
|
||||
"""
|
||||
target: verify drop a partition which has data inserted
|
||||
method: 1.create a partition with default schema
|
||||
|
@ -502,25 +502,25 @@ class TestPartitionOperations(TestcaseBase):
|
|||
# insert data to partition
|
||||
partition_w.insert(cf.gen_default_dataframe_data())
|
||||
|
||||
# flush
|
||||
if flush:
|
||||
self._connect().flush([collection_w.name])
|
||||
# # flush remove flush for issue #5837
|
||||
# if flush:
|
||||
# self._connect().flush([collection_w.name])
|
||||
|
||||
# drop partition
|
||||
partition_w.drop()
|
||||
assert not collection_w.has_partition(partition_name)[0]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("flush", [True, False])
|
||||
# @pytest.mark.parametrize("flush", [True, False])
|
||||
@pytest.mark.parametrize("partition_name, data", [(cf.gen_unique_str(prefix), cf.gen_default_list_data(nb=10))])
|
||||
@pytest.mark.parametrize("index_param", cf.gen_simple_index())
|
||||
def test_partition_drop_indexed_partition(self, flush, partition_name, data, index_param):
|
||||
def test_partition_drop_indexed_partition(self, partition_name, data, index_param):
|
||||
"""
|
||||
target: verify drop an indexed partition
|
||||
method: 1.create a partition
|
||||
2. insert same data
|
||||
3. create an index
|
||||
4. flush or not flush
|
||||
4. flush or not flush (remove flush step for issue # 5837)
|
||||
5. drop the partition
|
||||
expected: drop successfully
|
||||
"""
|
||||
|
@ -537,9 +537,9 @@ class TestPartitionOperations(TestcaseBase):
|
|||
# create index of collection
|
||||
collection_w.create_index(ct.default_float_vec_field_name, index_param)
|
||||
|
||||
# flush
|
||||
if flush:
|
||||
self._connect().flush([collection_w.name])
|
||||
# # flush
|
||||
# if flush:
|
||||
# self._connect().flush([collection_w.name])
|
||||
|
||||
# drop partition
|
||||
partition_w.drop()
|
||||
|
@ -624,7 +624,10 @@ class TestPartitionOperations(TestcaseBase):
|
|||
assert collection_w.has_partition(partition_name)[0]
|
||||
|
||||
# insert data to partition
|
||||
partition_w.insert(cf.gen_default_list_data())
|
||||
data = cf.gen_default_list_data()
|
||||
partition_w.insert(data)
|
||||
assert partition_w.num_entities == len(data[0])
|
||||
assert collection_w.num_entities == len(data[0])
|
||||
|
||||
# load partition
|
||||
partition_w.load()
|
||||
|
@ -635,6 +638,7 @@ class TestPartitionOperations(TestcaseBase):
|
|||
params={"nprobe": 32}, limit=1)
|
||||
assert len(res_1) == 1
|
||||
|
||||
|
||||
# release collection
|
||||
collection_w.release()
|
||||
|
||||
|
@ -643,8 +647,8 @@ class TestPartitionOperations(TestcaseBase):
|
|||
anns_field=ct.default_float_vec_field_name,
|
||||
params={"nprobe": 32}, limit=1,
|
||||
check_task=ct.CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1,
|
||||
ct.err_msg: "collection has been released"})
|
||||
check_items={ct.err_code: 0,
|
||||
ct.err_msg: "not loaded into memory"})
|
||||
# release partition
|
||||
partition_w.release()
|
||||
|
||||
|
@ -666,7 +670,7 @@ class TestPartitionOperations(TestcaseBase):
|
|||
|
||||
# insert data to partition
|
||||
partition_w.insert(data)
|
||||
self._connect().flush([collection_w.name])
|
||||
# self._connect().flush([collection_w.name])
|
||||
assert partition_w.num_entities == len(data)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
|
@ -730,7 +734,7 @@ class TestPartitionOperations(TestcaseBase):
|
|||
# insert data to partition
|
||||
max_size = 100000 # TODO: clarify the max size of data
|
||||
partition_w.insert(cf.gen_default_dataframe_data(max_size))
|
||||
self._connect().flush([collection_w.name])
|
||||
# self._connect().flush([collection_w.name])
|
||||
assert partition_w.num_entities == max_size
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
|
|
|
@ -16,7 +16,7 @@ def api_request_catch():
|
|||
def inner_wrapper(*args, **kwargs):
|
||||
try:
|
||||
res = func(*args, **kwargs)
|
||||
log.debug("(api_response) Response : %s " % str(res)[0:log_row_length])
|
||||
# log.debug("(api_response) Response : %s " % str(res)[0:log_row_length])
|
||||
return res, True
|
||||
except Exception as e:
|
||||
log.error(traceback.format_exc())
|
||||
|
@ -36,8 +36,8 @@ def api_request(_list, **kwargs):
|
|||
if len(_list) > 1:
|
||||
for a in _list[1:]:
|
||||
arg.append(a)
|
||||
log.debug("(api_request) Request: [%s] args: %s, kwargs: %s"
|
||||
% (str(func), str(arg)[0:log_row_length], str(kwargs)))
|
||||
# log.debug("(api_request) Request: [%s] args: %s, kwargs: %s"
|
||||
# % (str(func), str(arg)[0:log_row_length], str(kwargs)))
|
||||
return func(*arg, **kwargs)
|
||||
return False, False
|
||||
|
||||
|
|
Loading…
Reference in New Issue