mirror of https://github.com/milvus-io/milvus.git
[skip ci] Add chaos tests for etcd and minio (#6159)
* [skip ci] Add chaos tests for etcd and minio Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com> * Update timeout for nightly Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com> * Add more mins for CI and nightly to workaround #6164 Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>pull/6168/head
parent
c468ff30cf
commit
6f4ad331c8
|
@ -13,7 +13,7 @@ pipeline {
|
|||
}
|
||||
options {
|
||||
timestamps()
|
||||
timeout(time: 1, unit: 'HOURS')
|
||||
timeout(time: 90, unit: 'MINUTES')
|
||||
buildDiscarder logRotator(artifactDaysToKeepStr: '30')
|
||||
// parallelsAlwaysFailFast()
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@ pipeline {
|
|||
|
||||
options {
|
||||
timestamps()
|
||||
timeout(time: 30, unit: 'MINUTES')
|
||||
timeout(time: 36, unit: 'MINUTES')
|
||||
// parallelsAlwaysFailFast()
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: test-etcd-podkill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-kill
|
||||
mode: one
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing # target namespace of milvus deployment
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
app.kubernetes.io/name: etcd
|
||||
scheduler:
|
||||
cron: '@every 5s'
|
|
@ -0,0 +1,16 @@
|
|||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: PodChaos
|
||||
metadata:
|
||||
name: test-minio-podkill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
action: pod-kill
|
||||
mode: one
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing # target namespace of milvus deployment
|
||||
labelSelectors:
|
||||
release: milvus-chaos
|
||||
app: minio
|
||||
scheduler:
|
||||
cron: '@every 5s'
|
|
@ -14,4 +14,4 @@ spec:
|
|||
app.kubernetes.io/name: milvus
|
||||
component: standalone
|
||||
scheduler:
|
||||
cron: '@every 10s'
|
||||
cron: '@every 3s'
|
||||
|
|
|
@ -33,8 +33,8 @@ class ChaosOpt(object):
|
|||
metadata_name)
|
||||
log.debug(f"delete chaos response: {data}")
|
||||
except ApiException as e:
|
||||
log.error("Exception when calling CustomObjectsApi->delete_namespaced_custom_object: %s\n" % e)
|
||||
if raise_ex:
|
||||
log.error("Exception when calling CustomObjectsApi->delete_namespaced_custom_object: %s\n" % e)
|
||||
raise Exception(str(e))
|
||||
|
||||
def list_chaos_object(self):
|
||||
|
|
|
@ -141,14 +141,13 @@ class QueryChecker(Checker):
|
|||
int_values = []
|
||||
for _ in range(5):
|
||||
int_values.append(randint(0, constants.ENTITIES_FOR_SEARCH))
|
||||
# term_expr = f'{ct.default_int64_field_name} in {int_values}'
|
||||
# _, result = self.c_wrap.query(term_expr, check_task='check_nothing')
|
||||
result = False
|
||||
sleep(constants.WAIT_PER_OP/10)
|
||||
term_expr = f'{ct.default_int64_field_name} in {int_values}'
|
||||
_, result = self.c_wrap.query(term_expr, check_task='check_nothing')
|
||||
if result:
|
||||
self._succ += 1
|
||||
else:
|
||||
self._fail += 1
|
||||
sleep(constants.WAIT_PER_OP / 10)
|
||||
|
||||
#
|
||||
# if __name__ == '__main__':
|
||||
|
|
|
@ -6,7 +6,6 @@ from checker import CreateChecker, InsertFlushChecker, \
|
|||
SearchChecker, QueryChecker, IndexChecker, Op
|
||||
from chaos_opt import ChaosOpt
|
||||
from utils.util_log import test_log as log
|
||||
from base.collection_wrapper import ApiCollectionWrapper
|
||||
from common import common_func as cf
|
||||
from chaos_commons import *
|
||||
from common.common_type import CaseLabel
|
||||
|
@ -122,14 +121,14 @@ class TestChaos(TestChaosBase):
|
|||
log.debug("******1st assert before chaos: ")
|
||||
assert_statistic(self.health_checkers)
|
||||
|
||||
# reset counting
|
||||
reset_counting(self.health_checkers)
|
||||
|
||||
# apply chaos object
|
||||
chaos_opt = ChaosOpt(chaos_config['kind'])
|
||||
chaos_opt.create_chaos_object(chaos_config)
|
||||
log.debug("chaos injected")
|
||||
|
||||
# reset counting
|
||||
reset_counting(self.health_checkers)
|
||||
|
||||
# wait 120s
|
||||
sleep(constants.WAIT_PER_OP*4)
|
||||
|
||||
|
@ -155,19 +154,17 @@ class TestChaos(TestChaosBase):
|
|||
log.debug(f"Thread {k} is_alive(): {t.is_alive()}")
|
||||
sleep(2)
|
||||
# reconnect if needed
|
||||
sleep(constants.WAIT_PER_OP)
|
||||
sleep(constants.WAIT_PER_OP*2)
|
||||
reconnect(connections, self.host, self.port)
|
||||
|
||||
# reset counting again
|
||||
reset_counting(self.health_checkers)
|
||||
|
||||
# wait 300s (varies by feature)
|
||||
sleep(constants.WAIT_PER_OP*1.5)
|
||||
sleep(constants.WAIT_PER_OP*2.5)
|
||||
|
||||
# assert statistic: all ops success again
|
||||
log.debug("******3rd assert after chaos deleted: ")
|
||||
assert_statistic(self.health_checkers)
|
||||
|
||||
log.debug("*********************Chaos Test Completed**********************")
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
import pytest
|
||||
import datetime
|
||||
from time import sleep
|
||||
|
||||
from pymilvus_orm import connections, utility
|
||||
from base.collection_wrapper import ApiCollectionWrapper
|
||||
from chaos_opt import ChaosOpt
|
||||
from common import common_func as cf
|
||||
from common import common_type as ct
|
||||
from chaos_commons import *
|
||||
from common.common_type import CaseLabel, CheckTasks
|
||||
import constants
|
||||
|
||||
|
||||
def reboot_pod(chaos_yaml):
|
||||
# parse chaos object
|
||||
chaos_config = gen_experiment_config(chaos_yaml)
|
||||
log.debug(chaos_config)
|
||||
# inject chaos
|
||||
chaos_opt = ChaosOpt(chaos_config['kind'])
|
||||
chaos_opt.create_chaos_object(chaos_config)
|
||||
log.debug("chaos injected")
|
||||
sleep(1)
|
||||
# delete chaos
|
||||
meta_name = chaos_config.get('metadata', None).get('name', None)
|
||||
chaos_opt.delete_chaos_object(meta_name)
|
||||
log.debug("chaos deleted")
|
||||
|
||||
|
||||
class TestChaosData:
|
||||
host = 'localhost'
|
||||
port = 19530
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def connection(self, host, port):
|
||||
connections.add_connection(default={"host": host, "port": port})
|
||||
conn = connections.connect(alias='default')
|
||||
if conn is None:
|
||||
raise Exception("no connections")
|
||||
self.host = host
|
||||
self.port = port
|
||||
return conn
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L3)
|
||||
@pytest.mark.parametrize('chaos_yaml', get_chaos_yamls())
|
||||
def test_chaos_data_consist(self, connection, chaos_yaml):
|
||||
c_name = cf.gen_unique_str('chaos_collection_')
|
||||
nb = 5000
|
||||
i_name = cf.gen_unique_str('chaos_index_')
|
||||
index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
|
||||
|
||||
# create
|
||||
t0 = datetime.datetime.now()
|
||||
collection_w = ApiCollectionWrapper()
|
||||
collection_w.init_collection(name=c_name,
|
||||
schema=cf.gen_default_collection_schema())
|
||||
tt = datetime.datetime.now() - t0
|
||||
log.debug(f"assert create: {tt}")
|
||||
assert collection_w.name == c_name
|
||||
|
||||
# insert
|
||||
data = cf.gen_default_list_data(nb=nb)
|
||||
t0 = datetime.datetime.now()
|
||||
_, res = collection_w.insert(data)
|
||||
tt = datetime.datetime.now() - t0
|
||||
log.debug(f"assert insert: {tt}")
|
||||
assert res
|
||||
|
||||
# flush
|
||||
t0 = datetime.datetime.now()
|
||||
assert collection_w.num_entities == nb
|
||||
tt = datetime.datetime.now() - t0
|
||||
log.debug(f"assert flush: {tt}")
|
||||
|
||||
# search
|
||||
collection_w.load()
|
||||
search_vectors = cf.gen_vectors(1, ct.default_dim)
|
||||
t0 = datetime.datetime.now()
|
||||
search_res, _ = collection_w.search(data=search_vectors,
|
||||
anns_field=ct.default_float_vec_field_name,
|
||||
param={"nprobe": 16}, limit=1)
|
||||
tt = datetime.datetime.now() - t0
|
||||
log.debug(f"assert search: {tt}")
|
||||
assert len(search_res) == 1
|
||||
|
||||
# index
|
||||
t0 = datetime.datetime.now()
|
||||
index, _ = collection_w.create_index(field_name=ct.default_float_vec_field_name,
|
||||
index_params=index_params,
|
||||
name=i_name)
|
||||
tt = datetime.datetime.now() - t0
|
||||
log.debug(f"assert index: {tt}")
|
||||
assert len(collection_w.indexes) == 1
|
||||
|
||||
# query
|
||||
term_expr = f'{ct.default_int64_field_name} in [3001,4001,4999,2999]'
|
||||
t0 = datetime.datetime.now()
|
||||
query_res, _ = collection_w.query(term_expr)
|
||||
tt = datetime.datetime.now() - t0
|
||||
log.debug(f"assert query: {tt}")
|
||||
assert len(query_res) == 4
|
||||
|
||||
# reboot a pod
|
||||
reboot_pod(chaos_yaml)
|
||||
|
||||
# reconnect if needed
|
||||
sleep(constants.WAIT_PER_OP * 4)
|
||||
reconnect(connections, self.host, self.port)
|
||||
|
||||
# verify collection persists
|
||||
assert utility.has_collection(c_name)
|
||||
log.debug("assert collection persists")
|
||||
collection_w2 = ApiCollectionWrapper()
|
||||
collection_w2.init_collection(c_name)
|
||||
# verify data persist
|
||||
assert collection_w2.num_entities == nb
|
||||
log.debug("assert data persists")
|
||||
# verify index persists
|
||||
assert collection_w2.has_index(i_name)
|
||||
log.debug("assert index persists")
|
||||
# verify search results persist
|
||||
|
||||
# verify query results persist
|
||||
query_res2, _ = collection_w2.query(term_expr)
|
||||
assert query_res2 == query_res
|
||||
log.debug("assert query result persists")
|
||||
|
|
@ -37,7 +37,7 @@ class TestPartitionParams(TestcaseBase):
|
|||
assert collection_w.has_partition(partition_name)[0]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.xfail(reason="issue #5375")
|
||||
# @pytest.mark.xfail(reason="issue #5375")
|
||||
@pytest.mark.parametrize("partition_name", [""])
|
||||
def test_partition_empty_name(self, partition_name):
|
||||
"""
|
||||
|
@ -455,7 +455,7 @@ class TestPartitionOperations(TestcaseBase):
|
|||
|
||||
# verify that drop the partition again with exception
|
||||
partition_w.drop(check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1, ct.err_msg: "None Type"})
|
||||
check_items={ct.err_code: 1, ct.err_msg: "Partition doesn't exist"})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
|
||||
|
@ -578,7 +578,7 @@ class TestPartitionOperations(TestcaseBase):
|
|||
|
||||
# release the dropped partition and check err response
|
||||
partition_w.release(check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1, ct.err_msg: "None Type"})
|
||||
check_items={ct.err_code: 1, ct.err_msg: "Partition doesn't exist"})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
|
||||
|
@ -690,7 +690,7 @@ class TestPartitionOperations(TestcaseBase):
|
|||
# insert data to partition
|
||||
partition_w.insert(cf.gen_default_dataframe_data(),
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1, ct.err_msg: "can not be find"})
|
||||
check_items={ct.err_code: 1, ct.err_msg: "Partition doesn't exist"})
|
||||
# TODO: update the assert error
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
|
|
Loading…
Reference in New Issue