diff --git a/tests20/python_client/chaos/chaos_objects/chaos_datanode_container_kill.yaml b/tests20/python_client/chaos/chaos_objects/chaos_datanode_container_kill.yaml new file mode 100644 index 0000000000..e6061e6aff --- /dev/null +++ b/tests20/python_client/chaos/chaos_objects/chaos_datanode_container_kill.yaml @@ -0,0 +1,18 @@ +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: test-datanode-container-kill + namespace: chaos-testing +spec: + action: container-kill + mode: one + containerName: 'datanode' + selector: + namespaces: + - chaos-testing # target namespace of milvus deployment + labelSelectors: + app.kubernetes.io/instance: milvus-chaos + app.kubernetes.io/name: milvus + component: datanode + scheduler: + cron: '@every 2s' diff --git a/tests20/python_client/chaos/chaos_objects/chaos_datanode_pod_failure.yaml b/tests20/python_client/chaos/chaos_objects/chaos_datanode_pod_failure.yaml new file mode 100644 index 0000000000..5a004f7e06 --- /dev/null +++ b/tests20/python_client/chaos/chaos_objects/chaos_datanode_pod_failure.yaml @@ -0,0 +1,19 @@ +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: test-datanode-pod-failure + namespace: chaos-testing +spec: + action: pod-failure + mode: one + value: '' + duration: '20s' + selector: + namespaces: + - chaos-testing # target namespace of milvus deployment + labelSelectors: + app.kubernetes.io/instance: milvus-chaos + app.kubernetes.io/name: milvus + component: datanode + scheduler: + cron: '@every 30s' diff --git a/tests20/python_client/chaos/chaos_objects/chaos_querynode_pod_failure.yaml b/tests20/python_client/chaos/chaos_objects/chaos_querynode_pod_failure.yaml new file mode 100644 index 0000000000..ece92b6fc7 --- /dev/null +++ b/tests20/python_client/chaos/chaos_objects/chaos_querynode_pod_failure.yaml @@ -0,0 +1,19 @@ +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: test-querynode-pod-failure + namespace: chaos-testing +spec: + action: pod-failure + mode: one + value: '' + duration: '20s' + selector: + namespaces: + - chaos-testing # target namespace of milvus deployment + labelSelectors: + app.kubernetes.io/instance: milvus-chaos + app.kubernetes.io/name: milvus + component: querynode + scheduler: + cron: '@every 30s' diff --git a/tests20/python_client/chaos/chaos_objects/testcases.yaml b/tests20/python_client/chaos/chaos_objects/testcases.yaml index 0c7152b829..cb4bdf742d 100644 --- a/tests20/python_client/chaos/chaos_objects/testcases.yaml +++ b/tests20/python_client/chaos/chaos_objects/testcases.yaml @@ -140,4 +140,26 @@ Collections: testcase: name: test_querynode_network_isolation chaos: chaos_querynode_network_isolation.yaml - # and 10 more for the other pods \ No newline at end of file + # and 10 more for the other pods + + - + testcase: + name: test_datanode_container_kill + chaos: chaos_datanode_container_kill.yaml + expectation: + cluster_1_node: + insert: succ + flush: fail + cluster_n_nodes: + insert: degrade + + - + testcase: + name: test_datanode_pod_failure + chaos: chaos_datanode_pod_failure.yaml + expectation: + cluster_1_node: + insert: succ + flush: fail + cluster_n_nodes: + insert: degrade \ No newline at end of file diff --git a/tests20/python_client/chaos/checker.py b/tests20/python_client/chaos/checker.py index f74aefc291..2dd52eda2b 100644 --- a/tests20/python_client/chaos/checker.py +++ b/tests20/python_client/chaos/checker.py @@ -34,7 +34,7 @@ class Checker: schema=cf.gen_default_collection_schema(), timeout=timeout) self.c_wrap.insert(data=cf.gen_default_list_data(nb=constants.ENTITIES_FOR_SEARCH), - timeout=timeout, check_task='check_nothing') + timeout=timeout) self.initial_entities = self.c_wrap.num_entities # do as a flush def total(self): @@ -63,7 +63,7 @@ class SearchChecker(Checker): data=search_vec, anns_field=ct.default_float_vec_field_name, param={"nprobe": 32}, - limit=1, timeout=timeout, check_task='check_nothing' + limit=1, timeout=timeout ) if result: self._succ += 1 @@ -82,7 +82,7 @@ class InsertFlushChecker(Checker): while self._running: _, insert_result = \ self.c_wrap.insert(data=cf.gen_default_list_data(nb=constants.DELTA_PER_INS), - timeout=timeout, check_task='check_nothing') + timeout=timeout) if not self._flush: if insert_result: self._succ += 1 @@ -106,11 +106,11 @@ class CreateChecker(Checker): _, result = self.c_wrap.init_collection( name=cf.gen_unique_str("CreateChecker_"), schema=cf.gen_default_collection_schema(), - timeout=timeout, check_task='check_nothing' + timeout=timeout ) if result: self._succ += 1 - self.c_wrap.drop(timeout=timeout, check_task="check_nothing") + self.c_wrap.drop(timeout=timeout) else: self._fail += 1 sleep(constants.WAIT_PER_OP / 10) @@ -120,7 +120,7 @@ class IndexChecker(Checker): def __init__(self): super().__init__() self.c_wrap.insert(data=cf.gen_default_list_data(nb=5*constants.ENTITIES_FOR_SEARCH), - timeout=timeout, check_task='check_nothing') + timeout=timeout) log.debug(f"Index ready entities: {self.c_wrap.num_entities }") # do as a flush before indexing def keep_running(self): @@ -128,10 +128,10 @@ class IndexChecker(Checker): _, result = self.c_wrap.create_index(ct.default_float_vec_field_name, constants.DEFAULT_INDEX_PARAM, name=cf.gen_unique_str('index_'), - timeout=timeout, check_task='check_nothing') + timeout=timeout) if result: self._succ += 1 - self.c_wrap.drop_index(timeout=timeout, check_task='check_nothing') + self.c_wrap.drop_index(timeout=timeout) else: self._fail += 1 @@ -147,7 +147,7 @@ class QueryChecker(Checker): for _ in range(5): int_values.append(randint(0, constants.ENTITIES_FOR_SEARCH)) term_expr = f'{ct.default_int64_field_name} in {int_values}' - _, result = self.c_wrap.query(term_expr, timeout=timeout, check_task='check_nothing') + _, result = self.c_wrap.query(term_expr, timeout=timeout) if result: self._succ += 1 else: diff --git a/tests20/python_client/chaos/test_chaos.py b/tests20/python_client/chaos/test_chaos.py index c4968fa068..4abd0521f6 100644 --- a/tests20/python_client/chaos/test_chaos.py +++ b/tests20/python_client/chaos/test_chaos.py @@ -129,7 +129,7 @@ class TestChaos(TestChaosBase): chaos_opt = ChaosOpt(chaos_config['kind']) chaos_opt.create_chaos_object(chaos_config) log.debug("chaos injected") - + sleep(constants.WAIT_PER_OP * 2.1) # reset counting reset_counting(self.health_checkers) diff --git a/tests20/python_client/scale/constants.py b/tests20/python_client/scale/constants.py index c8c88248ed..591433f638 100644 --- a/tests20/python_client/scale/constants.py +++ b/tests20/python_client/scale/constants.py @@ -12,3 +12,4 @@ QUERY_NODE = "queryNode" # my values.yaml path MILVUS_CHART_ENV = 'MILVUS_CHART_ENV' MILVUS_CHART_PATH = '/home/zong/milvus-helm/charts/milvus' +MILVUS_LOGS_PATH = '/tmp/milvus' diff --git a/tests20/python_client/scale/helm_env.py b/tests20/python_client/scale/helm_env.py index 3e69802ac8..93972f9a86 100644 --- a/tests20/python_client/scale/helm_env.py +++ b/tests20/python_client/scale/helm_env.py @@ -95,14 +95,37 @@ class HelmEnv: service = v1.read_namespaced_service(f'{self.release_name}-milvus', constants.NAMESPACE) return service.status.load_balancer.ingress[0].ip + def export_all_logs(self): + """ + export all cluster logs to /tmp/milvus, and temporarily missing minio pod logs + :return: export all pods' log to constants.MILVUS_LOGS_PATH + """ + pods = self.list_all_pods() + for pod in pods: + os.system(f'kubectl logs {pod} > {constants.MILVUS_LOGS_PATH}/{pod}.log 2>&1') + + def list_all_pods(self): + from kubernetes import client, config + config.load_kube_config() + v1 = client.CoreV1Api() + label_selector = f'app.kubernetes.io/instance={self.release_name}' + ret = v1.list_namespaced_pod(namespace=constants.NAMESPACE, label_selector=label_selector) + pods = [] + # # label_selector = 'release=zong-single' + for i in ret.items: + pods.append(i.metadata.name) + # # print("%s\t%s\t%s" % (i.status.pod_ip, i.metadata.namespace, i.metadata.name)) + return pods + if __name__ == '__main__': # default deploy q replicas - release_name = "scale-test" + release_name = "milvus-chaos" env = HelmEnv(release_name=release_name) # host = env.get_svc_external_ip() # log.debug(host) # env.helm_install_cluster_milvus() # env.helm_upgrade_cluster_milvus(queryNode=2) env.helm_uninstall_cluster_milvus() - sleep(5) \ No newline at end of file + # sleep(5) + # env.export_all_logs()