[skip ci] Add more chaos tests (#6062)

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
2021-06-24 11:56:08 +08:00 · 2021-06-24 11:56:08 +08:00 · 4df6168c5e
parent 879c1e6436
commit 4df6168c5e
19 changed files with 452 additions and 213 deletions
--- a/tests20/python_client/chaos/chaos_commons.py
+++ b/tests20/python_client/chaos/chaos_commons.py
@ -1,9 +1,10 @@
 import os
 import threading
 import glob
-from delayed_assert import expect
+import delayed_assert
 import constants
 from yaml import full_load
+from utils.util_log import test_log as log


 def check_config(chaos_config):
@ -31,32 +32,22 @@ def gen_experiment_config(yaml):


 def start_monitor_threads(checkers={}):
-    for k in checkers.keys():
-        v = checkers[k]
-        t = threading.Thread(target=v.keep_running, args=())
+    threads = {}
+    for k, ch in checkers.items():
+        t = threading.Thread(target=ch.keep_running, args=())
        t.start()
-
-
-def assert_statistic(checkers, expectations={}):
-    for k in checkers.keys():
-        # expect succ if no expectations
-        succ_rate = checkers[k].succ_rate()
-        if expectations.get(k, '') == constants.FAIL:
-            print(f"Expect Fail: {str(checkers[k])} current succ rate {succ_rate}")
-            expect(succ_rate < 0.49)
-        else:
-            print(f"Expect Succ: {str(checkers[k])} current succ rate {succ_rate}")
-            expect(succ_rate > 0.90)
+        threads[k] = t
+    return threads


 def get_env_variable_by_name(name):
    """ get env variable by name"""
    try:
        env_var = os.environ[name]
-        print(f"env_variable: {env_var}")
+        log.debug(f"env_variable: {env_var}")
        return str(env_var)
    except Exception as e:
-        print(f"fail to get env variables, error: {str(e)}")
+        log.debug(f"fail to get env variables, error: {str(e)}")
        return None


@ -64,14 +55,18 @@ def get_chaos_yamls():
    chaos_env = get_env_variable_by_name(constants.CHAOS_CONFIG_ENV)
    if chaos_env is not None:
        if os.path.isdir(chaos_env):
-            print(f"chaos_env is a dir: {chaos_env}")
+            log.debug(f"chaos_env is a dir: {chaos_env}")
            return glob.glob(chaos_env + 'chaos_*.yaml')
        elif os.path.isfile(chaos_env):
-            print(f"chaos_env is a file: {chaos_env}")
+            log.debug(f"chaos_env is a file: {chaos_env}")
            return [chaos_env]
        else:
            # not a valid directory, return default
            pass
-    print("not a valid directory or file, return default")
-    return glob.glob(constants.TESTS_CONFIG_LOCATION + 'chaos_*.yaml')
+    log.debug("not a valid directory or file, return default")
+    return glob.glob(constants.TESTS_CONFIG_LOCATION + constants.ALL_CHAOS_YAMLS)

+
+def reconnect(conn, host, port):
+    conn.add_connection(default={"host": host, "port": port})
+    return conn.connect(alias='default')
--- a/tests20/python_client/chaos/chaos_objects/chaos_datacoord_podkill.yaml
+++ b/tests20/python_client/chaos/chaos_objects/chaos_datacoord_podkill.yaml
@ -0,0 +1,17 @@
+apiVersion: chaos-mesh.org/v1alpha1
+kind: PodChaos
+metadata:
+  name: test-datacoord-podkill
+  namespace: chaos-testing
+spec:
+  action: pod-kill
+  mode: one
+  selector:
+    namespaces:
+      - chaos-testing         # target namespace of milvus deployment
+    labelSelectors:
+      app.kubernetes.io/instance: milvus-chaos
+      app.kubernetes.io/name: milvus
+      component: datacoord
+  scheduler:
+    cron: '@every 2s'
--- a/tests20/python_client/chaos/chaos_objects/chaos_datanode_podkill.yaml
+++ b/tests20/python_client/chaos/chaos_objects/chaos_datanode_podkill.yaml
@ -1,16 +1,17 @@
 apiVersion: chaos-mesh.org/v1alpha1
 kind: PodChaos
 metadata:
-  name: test-data-node-pod-kill
-  namespace:
+  name: test-datanode-podkill
+  namespace: chaos-testing
 spec:
  action: pod-kill
  mode: one
  selector:
    namespaces:
-      - default         # target namespace of milvus deployment
+      - chaos-testing         # target namespace of milvus deployment
    labelSelectors:
-      app.kubernetes.io/name: milvus-ha
-      component: 'datanode'
+      app.kubernetes.io/instance: milvus-chaos
+      app.kubernetes.io/name: milvus
+      component: datanode
  scheduler:
-    cron: '@every 20s'
+    cron: '@every 2s'
--- a/tests20/python_client/chaos/chaos_objects/chaos_indexcoord_podkill.yaml
+++ b/tests20/python_client/chaos/chaos_objects/chaos_indexcoord_podkill.yaml
@ -0,0 +1,17 @@
+apiVersion: chaos-mesh.org/v1alpha1
+kind: PodChaos
+metadata:
+  name: test-indexcoord-podkill
+  namespace: chaos-testing
+spec:
+  action: pod-kill
+  mode: one
+  selector:
+    namespaces:
+      - chaos-testing         # target namespace of milvus deployment
+    labelSelectors:
+      app.kubernetes.io/instance: milvus-chaos
+      app.kubernetes.io/name: milvus
+      component: indexcoord
+  scheduler:
+    cron: '@every 2s'
--- a/tests20/python_client/chaos/chaos_objects/chaos_indexnode_podkill.yaml
+++ b/tests20/python_client/chaos/chaos_objects/chaos_indexnode_podkill.yaml
@ -0,0 +1,17 @@
+apiVersion: chaos-mesh.org/v1alpha1
+kind: PodChaos
+metadata:
+  name: test-indexnode-podkill
+  namespace: chaos-testing
+spec:
+  action: pod-kill
+  mode: one
+  selector:
+    namespaces:
+      - chaos-testing         # target namespace of milvus deployment
+    labelSelectors:
+      app.kubernetes.io/instance: milvus-chaos
+      app.kubernetes.io/name: milvus
+      component: indexnode
+  scheduler:
+    cron: '@every 2s'
--- a/tests20/python_client/chaos/chaos_objects/chaos_proxy_podkill.yaml
+++ b/tests20/python_client/chaos/chaos_objects/chaos_proxy_podkill.yaml
@ -0,0 +1,17 @@
+apiVersion: chaos-mesh.org/v1alpha1
+kind: PodChaos
+metadata:
+  name: test-proxy-podkill
+  namespace: chaos-testing
+spec:
+  action: pod-kill
+  mode: one
+  selector:
+    namespaces:
+      - chaos-testing         # target namespace of milvus deployment
+    labelSelectors:
+      app.kubernetes.io/instance: milvus-chaos
+      app.kubernetes.io/name: milvus
+      component: proxy
+  scheduler:
+    cron: '@every 2s'
--- a/tests20/python_client/chaos/chaos_objects/chaos_proxynode_podkill.yaml
+++ b/tests20/python_client/chaos/chaos_objects/chaos_proxynode_podkill.yaml
@ -1,16 +0,0 @@
-apiVersion: chaos-mesh.org/v1alpha1
-kind: PodChaos
-metadata:
-  name: test-proxy-node-pod-kill
-  namespace:
-spec:
-  action: pod-kill
-  mode: one
-  selector:
-    namespaces:
-      - default         # target namespace of milvus deployment
-    labelSelectors:
-      app.kubernetes.io/name: milvus-ha
-      component: 'proxynode'
-  scheduler:
-    cron: '@every 20s'
--- a/tests20/python_client/chaos/chaos_objects/chaos_querycoord_podkill.yaml
+++ b/tests20/python_client/chaos/chaos_objects/chaos_querycoord_podkill.yaml
@ -0,0 +1,17 @@
+apiVersion: chaos-mesh.org/v1alpha1
+kind: PodChaos
+metadata:
+  name: test-querycoord-podkill
+  namespace: chaos-testing
+spec:
+  action: pod-kill
+  mode: one
+  selector:
+    namespaces:
+      - chaos-testing         # target namespace of milvus deployment
+    labelSelectors:
+      app.kubernetes.io/instance: milvus-chaos
+      app.kubernetes.io/name: milvus
+      component: querycoord
+  scheduler:
+    cron: '@every 2s'
--- a/tests20/python_client/chaos/chaos_objects/chaos_querynode_podkill.yaml
+++ b/tests20/python_client/chaos/chaos_objects/chaos_querynode_podkill.yaml
@ -1,16 +1,17 @@
 apiVersion: chaos-mesh.org/v1alpha1
 kind: PodChaos
 metadata:
-  name: test-query-node-pod-kill
-  namespace:
+  name: test-querynode-podkill
+  namespace: chaos-testing
 spec:
  action: pod-kill
  mode: one
  selector:
    namespaces:
-      - default         # target namespace of milvus deployment
+      - chaos-testing         # target namespace of milvus deployment
    labelSelectors:
-      app.kubernetes.io/name: milvus-ha
-      component: 'querynode'
+      app.kubernetes.io/instance: milvus-chaos
+      app.kubernetes.io/name: milvus
+      component: querynode
  scheduler:
-    cron: '@every 20s'
+    cron: '@every 2s'
--- a/tests20/python_client/chaos/chaos_objects/chaos_rootcoord_podkill.yaml
+++ b/tests20/python_client/chaos/chaos_objects/chaos_rootcoord_podkill.yaml
@ -0,0 +1,17 @@
+apiVersion: chaos-mesh.org/v1alpha1
+kind: PodChaos
+metadata:
+  name: test-rootcoord-podkill
+  namespace: chaos-testing
+spec:
+  action: pod-kill
+  mode: one
+  selector:
+    namespaces:
+      - chaos-testing         # target namespace of milvus deployment
+    labelSelectors:
+      app.kubernetes.io/instance: milvus-chaos
+      app.kubernetes.io/name: milvus
+      component: rootcoord
+  scheduler:
+    cron: '@every 2s'
--- a/tests20/python_client/chaos/chaos_objects/chaos_standalone_podkill.yaml
+++ b/tests20/python_client/chaos/chaos_objects/chaos_standalone_podkill.yaml
@ -1,15 +1,17 @@
 apiVersion: chaos-mesh.org/v1alpha1
 kind: PodChaos
 metadata:
-  name: test-standalone-pod-kill
-  namespace:
+  name: test-standalone-podkill
+  namespace: chaos-testing
 spec:
  action: pod-kill
  mode: one
  selector:
    namespaces:
-      - default         # target namespace of milvus deployment
+      - chaos-testing         # target namespace of milvus deployment
    labelSelectors:
-      app.kubernetes.io/name: milvus-ha     # pod of standalone milvus
+      app.kubernetes.io/instance: milvus-chaos
+      app.kubernetes.io/name: milvus
+      component: standalone
  scheduler:
-    cron: '@every 20s'
+    cron: '@every 10s'
--- a/tests20/python_client/chaos/chaos_objects/testcases.yaml
+++ b/tests20/python_client/chaos/chaos_objects/testcases.yaml
@ -3,8 +3,8 @@
 #     standalone
 #       3 pods(standalone-ha-blabla, etcd, minio)
 #     cluster-1-node
-#       11 pods(proxy, master, query node, query service, data node, data service,
-#               index node, index service, pulsar, etcd, minio)
+#       11 pods(proxy, rootcoord, querynode, querycoord, datanode, datacoord,
+#               indexnode, indexcoord, pulsar, etcd, minio)
 #     cluster-n-nodes
 #       11 pods* n: kill one and kill all

@ -33,12 +33,19 @@ Collections:
            search: fail
            query: fail
          cluster_n_nodes:
-            search: degrade    # keep functional, but performance degraded
+            search: degrade
            query: degrade
  -
    testcase:
-      name: test_queryservice_podkill
-      chaos: chaos_queryservice_podkill.yaml
+      name: test_querycoord_podkill
+      chaos: chaos_querycoord_podkill.yaml
+      expectation:
+        cluster_1_node:
+          search: fail
+          query: fail
+        cluster_n_nodes:
+          search: degrade
+          query: degrade
  -
    testcase:
      name: test_datanode_podkill
@ -51,16 +58,32 @@ Collections:
          insert: degrade
  -
    testcase:
-      name: test_dataservice_podkill
-      chaos: chaos_dataservice_podkill.yaml
+      name: test_datascoord_podkill
+      chaos: chaos_datacoord_podkill.yaml
+      expectation:
+        cluster_1_node:
+          insert: succ
+          flush: fail
+        cluster_n_nodes:
+          insert: degrade
  -
    testcase:
      name: test_indexnode_podkill
      chaos: chaos_indexnode_podkill.yaml
+      expectation:
+        cluster_1_node:
+          index: fail
+        cluster_n_nodes:
+          index: degrade
  -
    testcase:
-      name: test_indexservice_podkill
-      chaos: chaos_indexservice_podkill.yaml
+      name: test_indexcoord_podkill
+      chaos: chaos_indexcoord_podkill.yaml
+      expectation:
+        cluster_1_node:
+          index: fail
+        cluster_n_nodes:
+          insert: degrade
  -
    testcase:
      name: test_proxy_podkill
@ -74,11 +97,21 @@ Collections:
          search: fail
          query: fail
        cluster_n_nodes:
-          insert: degrade
+          insert: fail
  -
    testcase:
-      name: test_master_podkill
-      chaos: chaos_master_podkill.yaml
+      name: test_rootcoord_podkill
+      chaos: chaos_rootcoord_podkill.yaml
+      expectation:
+        cluster_1_node:
+          create: fail
+          insert: fail
+          flush: fail
+          index: fail
+          search: fail
+          query: fail
+        cluster_n_nodes:
+          insert: degrade
  -
    testcase:
      name: test_etcd_podkill
@ -87,6 +120,10 @@ Collections:
    testcase:
      name: test_minio_podkill
      chaos: chaos_minio_podkill.yaml
+  -
+    testcase:
+      name: test_pulsar_podkill
+      chaos: chaos_minio_podkill.yaml
  -
    testcase:
      name: test_querynode_cpu100p
--- a/tests20/python_client/chaos/chaos_opt.py
+++ b/tests20/python_client/chaos/chaos_opt.py
@ -1,11 +1,8 @@
 from __future__ import print_function
-import logging
 from kubernetes import client, config
 from kubernetes.client.rest import ApiException
 import constants as cf
-
-
-logger = logging.getLogger("milvus_benchmark.chaos.chaosOpt")
+from utils.util_log import test_log as log


 class ChaosOpt(object):
@ -15,35 +12,30 @@ class ChaosOpt(object):
        self.namespace = namespace
        self.plural = kind.lower()

-    # def get_metadata_name(self):
-    #     return self.metadata_name
-
    def create_chaos_object(self, body):
-        # body = create_chaos_config(self.plural, self.metadata_name, spec_params)
-        # logger.info(body)
        pretty = 'true'
        config.load_kube_config()
        api_instance = client.CustomObjectsApi()
        try:
            api_response = api_instance.create_namespaced_custom_object(self.group, self.version, self.namespace,
                                                                        plural=self.plural, body=body, pretty=pretty)
-            print(api_response)
-            logging.getLogger().info(api_instance)
+            log.debug(f"create chaos response: {api_response}")
        except ApiException as e:
-            logger.error("Exception when calling CustomObjectsApi->create_namespaced_custom_object: %s\n" % e)
+            log.error("Exception when calling CustomObjectsApi->create_namespaced_custom_object: %s\n" % e)
            raise Exception(str(e))

-    def delete_chaos_object(self, metadata_name):
+    def delete_chaos_object(self, metadata_name, raise_ex=True):
        print(metadata_name)
        try:
            config.load_kube_config()
            api_instance = client.CustomObjectsApi()
            data = api_instance.delete_namespaced_custom_object(self.group, self.version, self.namespace, self.plural,
                                                                metadata_name)
-            logger.info(data)
+            log.debug(f"delete chaos response: {data}")
        except ApiException as e:
-            logger.error("Exception when calling CustomObjectsApi->delete_namespaced_custom_object: %s\n" % e)
-            raise Exception(str(e))
+            log.error("Exception when calling CustomObjectsApi->delete_namespaced_custom_object: %s\n" % e)
+            if raise_ex:
+                raise Exception(str(e))

    def list_chaos_object(self):
        try:
@ -51,9 +43,8 @@ class ChaosOpt(object):
            api_instance = client.CustomObjectsApi()
            data = api_instance.list_namespaced_custom_object(self.group, self.version, self.namespace,
                                                              plural=self.plural)
-            # pprint(data)
        except ApiException as e:
-            logger.error("Exception when calling CustomObjectsApi->list_namespaced_custom_object: %s\n" % e)
+            log.error("Exception when calling CustomObjectsApi->list_namespaced_custom_object: %s\n" % e)
            raise Exception(str(e))
        return data

--- a/tests20/python_client/chaos/checker.py
+++ b/tests20/python_client/chaos/checker.py
@ -1,10 +1,13 @@
+import datetime
 from enum import Enum
+from random import randint

 from time import sleep
 from base.collection_wrapper import ApiCollectionWrapper
 from common import common_func as cf
 from common import common_type as ct
 import constants
+from utils.util_log import test_log as log


 class Op(Enum):
@ -23,6 +26,12 @@ class Checker:
        self._succ = 0
        self._fail = 0
        self._running = True
+        self.c_wrap = ApiCollectionWrapper()
+        self.c_wrap.init_collection(name=cf.gen_unique_str('Checker_'),
+                                    schema=cf.gen_default_collection_schema())
+        self.c_wrap.insert(data=cf.gen_default_list_data(nb=constants.ENTITIES_FOR_SEARCH),
+                           check_task='check_nothing')
+        self.initial_entities = self.c_wrap.num_entities    # do as a flush

    def total(self):
        return self._succ + self._fail
@ -39,47 +48,45 @@ class Checker:


 class SearchChecker(Checker):
-    def __init__(self, collection_wrap):
+    def __init__(self):
        super().__init__()
-        self.c_wrap = collection_wrap
+        self.c_wrap.load()   # do load before search

    def keep_running(self):
        while self._running is True:
            search_vec = cf.gen_vectors(5, ct.default_dim)
            _, result = self.c_wrap.search(
                                data=search_vec,
-                                params={"nprobe": 32},
-                                limit=1,
-                                check_task="nothing"
+                                anns_field=ct.default_float_vec_field_name,
+                                param={"nprobe": 32},
+                                limit=1, check_task='check_nothing'
                            )
-            if result is True:
+            if result:
                self._succ += 1
            else:
                self._fail += 1
+            sleep(constants.WAIT_PER_OP / 10)


 class InsertFlushChecker(Checker):
-    def __init__(self, connection, collection_wrap, do_flush=False):
+    def __init__(self, flush=False):
        super().__init__()
-        self.conn = connection
-        self.c_wrap = collection_wrap
-        self._do_flush = do_flush
+        self._flush = flush

    def keep_running(self):
-        while self._running is True:
-            _, insert_result = self.c_wrap.insert(
-                                    data=cf.gen_default_dataframe_data(nb=constants.DELTA_PER_INS)
-                                    )
-            if self._do_flush is False:
-                if insert_result is True:
+        while self._running:
+            init_entities = self.c_wrap.num_entities
+            _, insert_result = \
+                self.c_wrap.insert(data=cf.gen_default_list_data(nb=constants.DELTA_PER_INS),
+                                   check_task='check_nothing')
+            if not self._flush:
+                if insert_result:
                    self._succ += 1
                else:
                    self._fail += 1
+                sleep(constants.WAIT_PER_OP / 10)
            else:
-                entities_1 = self.c_wrap.num_entities
-                self.conn.flush([self.c_wrap.name])
-                entities_2 = self.c_wrap.num_entities
-                if entities_2 == (entities_1 + constants.DELTA_PER_INS):
+                if self.c_wrap.num_entities == (init_entities + constants.DELTA_PER_INS):
                    self._succ += 1
                else:
                    self._fail += 1
@ -88,35 +95,81 @@ class InsertFlushChecker(Checker):
 class CreateChecker(Checker):
    def __init__(self):
        super().__init__()
-        self.c_wrapper = ApiCollectionWrapper()

    def keep_running(self):
        while self._running is True:
-            sleep(2)
-            collection, result = self.c_wrapper.init_collection(
+            _, result = self.c_wrap.init_collection(
                                    name=cf.gen_unique_str("CreateChecker_"),
                                    schema=cf.gen_default_collection_schema(),
-                                    check_task="check_nothing"
+                                    check_task='check_nothing'
                                )
-            if result is True:
+            if result:
                self._succ += 1
-                self.c_wrapper.drop(check_task="check_nothing")
+                self.c_wrap.drop(check_task="check_nothing")
            else:
                self._fail += 1
+            sleep(constants.WAIT_PER_OP / 10)


 class IndexChecker(Checker):
    def __init__(self):
        super().__init__()
+        self.c_wrap.insert(data=cf.gen_default_list_data(nb=5*constants.ENTITIES_FOR_SEARCH),
+                           check_task='check_nothing')
+        log.debug(f"Index ready entities: {self.c_wrap.num_entities }")  # do as a flush before indexing

    def keep_running(self):
-        pass
+        while self._running:
+            _, result = self.c_wrap.create_index(ct.default_float_vec_field_name,
+                                                 constants.DEFAULT_INDEX_PARAM,
+                                                 name=cf.gen_unique_str('index_'),
+                                                 check_task='check_nothing')
+            if result:
+                self._succ += 1
+                self.c_wrap.drop_index(check_task='check_nothing')
+            else:
+                self._fail += 1


 class QueryChecker(Checker):
    def __init__(self):
        super().__init__()
+        self.c_wrap.load()      # load before query

    def keep_running(self):
-        pass
+        while self._running:
+            int_values = []
+            for _ in range(5):
+                int_values.append(randint(0, constants.ENTITIES_FOR_SEARCH))
+            # term_expr = f'{ct.default_int64_field_name} in {int_values}'
+            # _, result = self.c_wrap.query(term_expr, check_task='check_nothing')
+            result = False
+            sleep(constants.WAIT_PER_OP/10)
+            if result:
+                self._succ += 1
+            else:
+                self._fail += 1

+#
+# if __name__ == '__main__':
+#     from pymilvus_orm import connections
+#     connections.add_connection(default={"host": '10.98.0.7', "port": 19530})
+#     conn = connections.connect(alias='default')
+#     c_w = ApiCollectionWrapper()
+#     c_w.init_collection(name=cf.gen_unique_str("collection_4_search_"),
+#                         schema=cf.gen_default_collection_schema())
+#     c_w.insert(data=cf.gen_default_list_data(nb=constants.ENTITIES_FOR_SEARCH))
+#     log.debug(f"nums: {c_w.num_entities}")
+#     # c_w.load()
+#     # # int_values = []
+#     # # for _ in range(5):
+#     # #     int_values.append(randint(0, constants.ENTITIES_FOR_SEARCH))
+#     # term_expr = f'{ct.default_int64_field_name} in [1,2,3,4,5]'
+#     # log.debug(term_expr)
+#     # res, result = c_w.query(term_expr)
+#
+#     res, result = c_w.create_index(ct.default_float_vec_field_name,
+#                                    constants.DEFAULT_INDEX_PARAM,
+#                                    name=cf.gen_unique_str('index_'),
+#                                    check_task='check_nothing')
+#     log.debug(res)
--- a/tests20/python_client/chaos/constants.py
+++ b/tests20/python_client/chaos/constants.py
@ -11,25 +11,27 @@ SERVER_HOST_DEFAULT = "127.0.0.1"
 SERVER_PORT_DEFAULT = 19530
 SERVER_VERSION = "2.0"

-HELM_NAMESPACE = "milvus"
-BRANCH = "master"

 DEFAULT_CPUS = 48

 RAW_DATA_DIR = "/test/milvus/raw_data/"

 # nars log
-LOG_PATH = "/test/milvus/benchmark/logs/{}/".format(BRANCH)
+# LOG_PATH = "/test/milvus/benchmark/logs/{}/".format(BRANCH)

 DEFAULT_DEPLOY_MODE = "single"

-NAMESPACE = "default"
+NAMESPACE = "chaos-testing"
 DEFAULT_API_VERSION = 'chaos-mesh.org/v1alpha1'
 DEFAULT_GROUP = 'chaos-mesh.org'
 DEFAULT_VERSION = 'v1alpha1'
 SUCC = 'succ'
 FAIL = 'fail'
 DELTA_PER_INS = 10
+ENTITIES_FOR_SEARCH = 1000

 CHAOS_CONFIG_ENV = 'CHAOS_CONFIG_PATH'      # env variables for chao path
 TESTS_CONFIG_LOCATION = 'chaos_objects/'
+ALL_CHAOS_YAMLS = 'chaos_*.yaml'
+WAIT_PER_OP = 10
+DEFAULT_INDEX_PARAM = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
--- a/tests20/python_client/chaos/test_chaos.py
+++ b/tests20/python_client/chaos/test_chaos.py
@ -2,7 +2,8 @@ import pytest
 from time import sleep

 from pymilvus_orm import connections
-from checker import CreateChecker, Op
+from checker import CreateChecker, InsertFlushChecker, \
+    SearchChecker, QueryChecker, IndexChecker, Op
 from chaos_opt import ChaosOpt
 from utils.util_log import test_log as log
 from base.collection_wrapper import ApiCollectionWrapper
@ -12,6 +13,18 @@ from common.common_type import CaseLabel
 import constants


+def assert_statistic(checkers, expectations={}):
+    for k in checkers.keys():
+        # expect succ if no expectations
+        succ_rate = checkers[k].succ_rate()
+        if expectations.get(k, '') == constants.FAIL:
+            log.debug(f"Expect Fail: {str(k)} succ rate {succ_rate}, total: {checkers[k].total()}")
+            delayed_assert.expect(succ_rate < 0.49)
+        else:
+            log.debug(f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {checkers[k].total()}")
+            delayed_assert.expect(succ_rate > 0.90)
+
+
 class TestChaosBase:
    expect_create = constants.SUCC
    expect_insert = constants.SUCC
@ -19,8 +32,11 @@ class TestChaosBase:
    expect_index = constants.SUCC
    expect_search = constants.SUCC
    expect_query = constants.SUCC
-    chaos_location = ''
+    host = 'localhost'
+    port = 19530
+    _chaos_config = None
    health_checkers = {}
+    checker_threads = {}

    def parser_testcase_config(self, chaos_yaml):
        tests_yaml = constants.TESTS_CONFIG_LOCATION + 'testcases.yaml'
@ -31,12 +47,16 @@ class TestChaosBase:
            if test_chaos in chaos_yaml:
                expects = t.get('testcase', {}).get('expectation', {}) \
                    .get('cluster_1_node', {})
-                self.expect_create = expects.get(Op.create, constants.SUCC)
-                self.expect_insert = expects.get(Op.insert, constants.SUCC)
-                self.expect_flush = expects.get(Op.flush, constants.SUCC)
-                self.expect_index = expects.get(Op.index, constants.SUCC)
-                self.expect_search = expects.get(Op.search, constants.SUCC)
-                self.expect_query = expects.get(Op.query, constants.SUCC)
+                log.debug(f"yaml.expects: {expects}")
+                self.expect_create = expects.get(Op.create.value, constants.SUCC)
+                self.expect_insert = expects.get(Op.insert.value, constants.SUCC)
+                self.expect_flush = expects.get(Op.flush.value, constants.SUCC)
+                self.expect_index = expects.get(Op.index.value, constants.SUCC)
+                self.expect_search = expects.get(Op.search.value, constants.SUCC)
+                self.expect_query = expects.get(Op.query.value, constants.SUCC)
+                log.debug(f"self.expects: create:{self.expect_create}, insert:{self.expect_insert}, "
+                          f"flush:{self.expect_flush}, index:{self.expect_index}, "
+                          f"search:{self.expect_search}, query:{self.expect_query}")
                return True

        return False
@ -44,117 +64,110 @@ class TestChaosBase:

 class TestChaos(TestChaosBase):

-    @pytest.mark.tags(CaseLabel.L3)
    @pytest.fixture(scope="function", autouse=True)
-    def connection(self):
-        connections.add_connection(default={"host": "192.168.1.239", "port": 19530})
+    def connection(self, host, port):
+        connections.add_connection(default={"host": host, "port": port})
        conn = connections.connect(alias='default')
        if conn is None:
            raise Exception("no connections")
+        self.host = host
+        self.port = port
        return conn

-    @pytest.fixture(scope="function")
-    def collection_wrap_4_insert(self, connection):
-        c_wrap = ApiCollectionWrapper()
-        c_wrap.init_collection(name=cf.gen_unique_str("collection_4_insert"),
-                               schema=cf.gen_default_collection_schema(),
-                               check_task="check_nothing")
-        return c_wrap
-
-    @pytest.fixture(scope="function")
-    def collection_wrap_4_flush(self, connection):
-        c_wrap = ApiCollectionWrapper()
-        c_wrap.init_collection(name=cf.gen_unique_str("collection_4_insert"),
-                               schema=cf.gen_default_collection_schema(),
-                               check_task="check_nothing")
-        return c_wrap
-
-    @pytest.fixture(scope="function")
-    def collection_wrap_4_search(self, connection):
-        c_wrap = ApiCollectionWrapper()
-        c_wrap.init_collection(name=cf.gen_unique_str("collection_4_search_"),
-                               schema=cf.gen_default_collection_schema(),
-                               check_task="check_nothing")
-        c_wrap.insert(data=cf.gen_default_dataframe_data(nb=10000))
-        return c_wrap
-
    @pytest.fixture(scope="function", autouse=True)
-    def init_health_checkers(self, connection, collection_wrap_4_insert,
-                             collection_wrap_4_flush, collection_wrap_4_search):
-        checkers = {}
-        # search_ch = SearchChecker(collection_wrap=collection_wrap_4_search)
-        # checkers[Op.search] = search_ch
-        # insert_ch = InsertFlushChecker(connection=connection,
-        #                                collection_wrap=collection_wrap_4_insert)
-        # checkers[Op.insert] = insert_ch
-        # flush_ch = InsertFlushChecker(connection=connection,
-        #                               collection_wrap=collection_wrap_4_flush,
-        #                               do_flush=True)
-        # checkers[Op.flush] = flush_ch
-        create_ch = CreateChecker()
-        checkers[Op.create] = create_ch
-
+    def init_health_checkers(self, connection):
+        checkers = {
+            Op.create: CreateChecker(),
+            Op.insert: InsertFlushChecker(),
+            Op.flush: InsertFlushChecker(flush=True),
+            Op.index: IndexChecker(),
+            Op.search: SearchChecker(),
+            Op.query: QueryChecker()
+        }
        self.health_checkers = checkers

    def teardown(self):
-        for ch in self.health_checkers.values():
+        chaos_opt = ChaosOpt(self._chaos_config['kind'])
+        meta_name = self._chaos_config.get('metadata', None).get('name', None)
+        chaos_opt.delete_chaos_object(meta_name, raise_ex=False)
+        for k, ch in self.health_checkers.items():
            ch.terminate()
-        pass
+            log.debug(f"tear down: checker {k} terminated")
+        sleep(2)
+        for k, t in self.checker_threads.items():
+            log.debug(f"Thread {k} is_alive(): {t.is_alive()}")

+    @pytest.mark.tags(CaseLabel.L3)
    @pytest.mark.parametrize('chaos_yaml', get_chaos_yamls())
    def test_chaos(self, chaos_yaml):
        # start the monitor threads to check the milvus ops
-        start_monitor_threads(self.health_checkers)
+        log.debug("*********************Chaos Test Start**********************")
+        log.debug(connections.get_connection_addr('default'))
+        self.checker_threads = start_monitor_threads(self.health_checkers)

        # parse chaos object
-        print("test.start")
        chaos_config = gen_experiment_config(chaos_yaml)
+        self._chaos_config = chaos_config   # cache the chaos config for tear down
        log.debug(chaos_config)

        # parse the test expectations in testcases.yaml
-        self.parser_testcase_config(chaos_yaml)
+        if self.parser_testcase_config(chaos_yaml) is False:
+            log.error("Fail to get the testcase info in testcases.yaml")
+            assert False

        # wait 120s
-        sleep(1)
+        sleep(constants.WAIT_PER_OP*2)

        # assert statistic:all ops 100% succ
+        log.debug("******1st assert before chaos: ")
        assert_statistic(self.health_checkers)

        # reset counting
        reset_counting(self.health_checkers)

        # apply chaos object
-        # chaos_opt = ChaosOpt(chaos_config['kind'])
-        # chaos_opt.create_chaos_object(chaos_config)
+        chaos_opt = ChaosOpt(chaos_config['kind'])
+        chaos_opt.create_chaos_object(chaos_config)
+        log.debug("chaos injected")

        # wait 120s
-        sleep(1)
+        sleep(constants.WAIT_PER_OP*4)
+
+        for k, t in self.checker_threads.items():
+            log.debug(f"10s later: Thread {k} is_alive(): {t.is_alive()}")

        # assert statistic
-        assert_statistic(self.health_checkers, expectations={Op.create: self.expect_create,
-                                                             Op.insert: self.expect_insert,
-                                                             Op.flush: self.expect_flush,
-                                                             Op.index: self.expect_index,
-                                                             Op.search: self.expect_search,
-                                                             Op.query: self.expect_query
-                                                             })
-        #
+        log.debug("******2nd assert after chaos injected: ")
+        assert_statistic(self.health_checkers,
+                         expectations={Op.create: self.expect_create,
+                                       Op.insert: self.expect_insert,
+                                       Op.flush: self.expect_flush,
+                                       Op.index: self.expect_index,
+                                       Op.search: self.expect_search,
+                                       Op.query: self.expect_query
+                                       })
+
        # delete chaos
-        # meta_name = chaos_config.get('metadata', None).get('name', None)
-        # chaos_opt.delete_chaos_object(meta_name)
+        meta_name = chaos_config.get('metadata', None).get('name', None)
+        chaos_opt.delete_chaos_object(meta_name)
+        log.debug("chaos deleted")
+        for k, t in self.checker_threads.items():
+            log.debug(f"Thread {k} is_alive(): {t.is_alive()}")
+        sleep(2)
+        # reconnect if needed
+        sleep(constants.WAIT_PER_OP)
+        reconnect(connections, self.host, self.port)

        # reset counting again
        reset_counting(self.health_checkers)

        # wait 300s (varies by feature)
-        sleep(1)
+        sleep(constants.WAIT_PER_OP*1.5)

        # assert statistic: all ops success again
+        log.debug("******3rd assert after chaos deleted: ")
        assert_statistic(self.health_checkers)

-        # terminate thread
-        for ch in self.health_checkers.values():
-            ch.terminate()
-        # log.debug("*******************Test Completed.*******************")
+        log.debug("*********************Chaos Test Completed**********************")


--- a/tests20/python_client/testcases/test_e2e.py
+++ b/tests20/python_client/testcases/test_e2e.py
@ -0,0 +1,54 @@
+import pytest
+
+from base.client_base import TestcaseBase
+from common import common_func as cf
+from common import common_type as ct
+from common.common_type import CaseLabel
+
+prefix = "e2e_"
+
+
+class TestE2e(TestcaseBase):
+    """ Test case of end to end"""
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.parametrize("name", [(cf.gen_unique_str(prefix))])
+    def test_milvus_default(self, name):
+        from utils.util_log import test_log as log
+        # create
+        collection_w = self.init_collection_wrap(name=name)
+        log.debug("assert create")
+        assert collection_w.name == name
+
+        # insert
+        data = cf.gen_default_list_data()
+        _, res = collection_w.insert(data)
+        log.debug("assert insert")
+        assert res
+
+        # flush
+        log.debug("assert flush")
+        assert collection_w.num_entities == len(data[0])
+
+        # search
+        collection_w.load()
+        search_vectors = cf.gen_vectors(1, ct.default_dim)
+        res_1, _ = collection_w.search(data=search_vectors,
+                                       anns_field=ct.default_float_vec_field_name,
+                                       param={"nprobe": 16}, limit=1)
+        log.debug("assert search")
+        assert len(res_1) == 1
+
+        # index
+        collection_w.insert(cf.gen_default_dataframe_data(nb=4000))
+        assert collection_w.num_entities == len(data[0]) + 4000
+        _index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
+        index, _ = collection_w.create_index(field_name=ct.default_float_vec_field_name,
+                                             index_params=_index_params,
+                                             name=cf.gen_unique_str())
+        log.debug("assert index")
+        assert len(collection_w.indexes) == 1
+
+        # # query
+        # term_expr = f'{ct.default_int64_field_name} in [1,2,3,4]'
+        # res, _ = collection_w.query(term_expr)
+        # assert len(res) == 4
--- a/tests20/python_client/testcases/test_partition.py
+++ b/tests20/python_client/testcases/test_partition.py
@ -308,13 +308,13 @@ class TestPartitionParams(TestcaseBase):

        # insert data
        partition_w.insert(data)
-        self._connect().flush([collection_w.name])
+        # self._connect().flush([collection_w.name])     # don't need flush for issue #5737
        assert not partition_w.is_empty
        assert partition_w.num_entities == nums

        # insert data
        partition_w.insert(data)
-        self._connect().flush([collection_w.name])
+        # self._connect().flush([collection_w.name])
        assert not partition_w.is_empty
        assert partition_w.num_entities == (nums + nums)

@ -481,9 +481,9 @@ class TestPartitionOperations(TestcaseBase):
            assert not collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.parametrize("flush", [True, False])
+    # @pytest.mark.parametrize("flush", [True, False])
    @pytest.mark.parametrize("partition_name", [cf.gen_unique_str(prefix)])
-    def test_partition_drop_non_empty_partition(self, flush, partition_name):
+    def test_partition_drop_non_empty_partition(self, partition_name):
        """
        target: verify drop a partition which has data inserted
        method: 1.create a partition with default schema
@ -502,25 +502,25 @@ class TestPartitionOperations(TestcaseBase):
        # insert data to partition
        partition_w.insert(cf.gen_default_dataframe_data())

-        # flush
-        if flush:
-            self._connect().flush([collection_w.name])
+        # # flush   remove flush for issue #5837
+        # if flush:
+        #      self._connect().flush([collection_w.name])

        # drop partition
        partition_w.drop()
        assert not collection_w.has_partition(partition_name)[0]

    @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.parametrize("flush", [True, False])
+    # @pytest.mark.parametrize("flush", [True, False])
    @pytest.mark.parametrize("partition_name, data", [(cf.gen_unique_str(prefix), cf.gen_default_list_data(nb=10))])
    @pytest.mark.parametrize("index_param", cf.gen_simple_index())
-    def test_partition_drop_indexed_partition(self, flush, partition_name, data, index_param):
+    def test_partition_drop_indexed_partition(self, partition_name, data, index_param):
        """
        target: verify drop an indexed partition
        method: 1.create a partition
                2. insert same data
                3. create an index
-                4. flush or not flush
+                4. flush or not flush (remove flush step for issue # 5837)
                5. drop the partition
        expected: drop successfully
        """
@ -537,9 +537,9 @@ class TestPartitionOperations(TestcaseBase):
        # create index of collection
        collection_w.create_index(ct.default_float_vec_field_name, index_param)

-        # flush
-        if flush:
-            self._connect().flush([collection_w.name])
+        # # flush
+        # if flush:
+        #     self._connect().flush([collection_w.name])

        # drop partition
        partition_w.drop()
@ -624,7 +624,10 @@ class TestPartitionOperations(TestcaseBase):
        assert collection_w.has_partition(partition_name)[0]

        # insert data to partition
-        partition_w.insert(cf.gen_default_list_data())
+        data = cf.gen_default_list_data()
+        partition_w.insert(data)
+        assert partition_w.num_entities == len(data[0])
+        assert collection_w.num_entities == len(data[0])

        # load partition
        partition_w.load()
@ -635,6 +638,7 @@ class TestPartitionOperations(TestcaseBase):
                                      params={"nprobe": 32}, limit=1)
        assert len(res_1) == 1

+
        # release collection
        collection_w.release()

@ -643,8 +647,8 @@ class TestPartitionOperations(TestcaseBase):
                                      anns_field=ct.default_float_vec_field_name,
                                      params={"nprobe": 32}, limit=1,
                                      check_task=ct.CheckTasks.err_res,
-                                      check_items={ct.err_code: 1,
-                                                   ct.err_msg: "collection has been released"})
+                                      check_items={ct.err_code: 0,
+                                                   ct.err_msg: "not loaded into memory"})
        # release partition
        partition_w.release()

@ -666,7 +670,7 @@ class TestPartitionOperations(TestcaseBase):

        # insert data to partition
        partition_w.insert(data)
-        self._connect().flush([collection_w.name])
+        # self._connect().flush([collection_w.name])
        assert partition_w.num_entities == len(data)

    @pytest.mark.tags(CaseLabel.L1)
@ -730,7 +734,7 @@ class TestPartitionOperations(TestcaseBase):
        # insert data to partition
        max_size = 100000  # TODO: clarify the max size of data
        partition_w.insert(cf.gen_default_dataframe_data(max_size))
-        self._connect().flush([collection_w.name])
+        # self._connect().flush([collection_w.name])
        assert partition_w.num_entities == max_size

    @pytest.mark.tags(CaseLabel.L1)
--- a/tests20/python_client/utils/api_request.py
+++ b/tests20/python_client/utils/api_request.py
@ -16,7 +16,7 @@ def api_request_catch():
        def inner_wrapper(*args, **kwargs):
            try:
                res = func(*args, **kwargs)
-                log.debug("(api_response) Response : %s " % str(res)[0:log_row_length])
+                # log.debug("(api_response) Response : %s " % str(res)[0:log_row_length])
                return res, True
            except Exception as e:
                log.error(traceback.format_exc())
@ -36,8 +36,8 @@ def api_request(_list, **kwargs):
            if len(_list) > 1:
                for a in _list[1:]:
                    arg.append(a)
-            log.debug("(api_request) Request: [%s] args: %s, kwargs: %s"
-                      % (str(func), str(arg)[0:log_row_length], str(kwargs)))
+            # log.debug("(api_request) Request: [%s] args: %s, kwargs: %s"
+            #           % (str(func), str(arg)[0:log_row_length], str(kwargs)))
            return func(*arg, **kwargs)
    return False, False