From 660659123827b13b51fd329d3d4bf7175276e1eb Mon Sep 17 00:00:00 2001
From: zhuwenxing <wenxing.zhu@zilliz.com>
Date: Wed, 24 Nov 2021 11:57:15 +0800
Subject: [PATCH] [skip ci]Update chaos test (#12206)

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
---
 .github/workflows/pod-failure-chaos-test.yaml |  8 ++++--
 .github/workflows/pod-kill-chaos-test.yaml    | 19 +++++++++-----
 tests/python_client/chaos/chaos_test.sh       | 26 +++++++++++--------
 tests/python_client/chaos/run.sh              |  8 ++++++
 .../chaos/scripts/hello_milvus.py             |  2 +-
 5 files changed, 42 insertions(+), 21 deletions(-)
 create mode 100644 tests/python_client/chaos/run.sh

diff --git a/.github/workflows/pod-failure-chaos-test.yaml b/.github/workflows/pod-failure-chaos-test.yaml
index 3d5964a445..0a990d9d23 100644
--- a/.github/workflows/pod-failure-chaos-test.yaml
+++ b/.github/workflows/pod-failure-chaos-test.yaml
@@ -86,7 +86,7 @@ jobs:
           sed -i "s/ALL_CHAOS_YAMLS =.*/ALL_CHAOS_YAMLS = \'chaos_${{ matrix.pod }}_pod_failure.yaml\'/g" constants.py
           sed -i "s/CHAOS_DURATION =.*/CHAOS_DURATION = 80/g" constants.py
           cat constants.py
-          pytest -s -v test_chaos.py --host 127.0.0.1
+          pytest -s -v test_chaos.py --host 127.0.0.1 --log-cli-level=INFO
 
       - name: Milvus E2E Test
         timeout-minutes: 5
@@ -95,6 +95,8 @@ jobs:
         working-directory: tests/python_client/chaos
         run: |
           kubectl get pod -n chaos-testing
+          kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=milvus-chaos -n chaos-testing --timeout=360s
+          kubectl wait --for=condition=Ready pod -l release=milvus-chaos -n chaos-testing --timeout=360s          
           python hello_milvus.py
 
       - name: Data Consist Test
@@ -103,7 +105,7 @@ jobs:
         shell: bash
         working-directory: tests/python_client/chaos
         run: |
-          pytest -s -v test_chaos_data_consist.py --host 127.0.0.1
+          pytest -s -v test_chaos_data_consist.py --host 127.0.0.1 --log-cli-level=INFO
 
       - name: Milvus E2E Test
         timeout-minutes: 5
@@ -112,6 +114,8 @@ jobs:
         working-directory: tests/python_client/chaos
         run: |
           kubectl get pod -n chaos-testing
+          kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=milvus-chaos -n chaos-testing --timeout=360s
+          kubectl wait --for=condition=Ready pod -l release=milvus-chaos -n chaos-testing --timeout=360s          
           python hello_milvus.py
 
       - name: Export logs
diff --git a/.github/workflows/pod-kill-chaos-test.yaml b/.github/workflows/pod-kill-chaos-test.yaml
index 992f5bb343..f00e7d65b3 100644
--- a/.github/workflows/pod-kill-chaos-test.yaml
+++ b/.github/workflows/pod-kill-chaos-test.yaml
@@ -85,13 +85,13 @@ jobs:
           sed -i "s/TESTS_CONFIG_LOCATION =.*/TESTS_CONFIG_LOCATION = \'chaos_objects\/pod_kill\/'/g" constants.py
           sed -i "s/ALL_CHAOS_YAMLS =.*/ALL_CHAOS_YAMLS = \'chaos_${{ matrix.pod }}_pod_kill.yaml\'/g" constants.py
           cat constants.py
-          timeout 14m pytest -s -v test_chaos.py --host 127.0.0.1 || echo "chaos test failed"
+          timeout 14m pytest -s -v test_chaos.py --host 127.0.0.1 --log-cli-level=INFO || echo "chaos test failed"
 
       - name: Milvus E2E Test
         timeout-minutes: 10
         if: ${{ always() }}
         shell: bash
-        working-directory: tests/python_client/chaos
+        working-directory: tests/python_client
         run: |
           kubectl get pod -n chaos-testing
           # wait all pod to be ready
@@ -104,7 +104,8 @@ jobs:
           sleep 20s
           nc -vz 127.0.0.1 19530
 
-          python scripts/hello_milvus.py
+          pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO
+          python chaos/scripts/hello_milvus.py --host 127.0.0.1
       
       - name: Deploy Milvus Again If Previous E2E Test Failed
         timeout-minutes: 15
@@ -133,21 +134,25 @@ jobs:
         shell: bash
         working-directory: tests/python_client/chaos
         run: |
-          timeout 4m pytest -s -v test_chaos_data_consist.py --host 127.0.0.1 || echo "data consist test failed"
+          pytest -s -v test_chaos_data_consist.py --host 127.0.0.1 --log-cli-level=INFO
 
       - name: Milvus E2E Test
-        timeout-minutes: 5
+        timeout-minutes: 10
         if: ${{ always() }}
         shell: bash
-        working-directory: tests/python_client/chaos
+        working-directory: tests/python_client
         run: |
           kubectl get pod -n chaos-testing
+          kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=milvus-chaos -n chaos-testing --timeout=360s
+          kubectl wait --for=condition=Ready pod -l release=milvus-chaos -n chaos-testing --timeout=360s
+          kubectl get pod -n chaos-testing       
           ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9  
           kubectl port-forward service/milvus-chaos 19530 -n chaos-testing >/dev/null 2>&1 &
           sleep 20s
           nc -vz 127.0.0.1 19530
 
-          python scripts/hello_milvus.py
+          pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO
+          python chaos/scripts/hello_milvus.py --host 127.0.0.1
 
       - name: Export logs
         if: ${{ always() }}
diff --git a/tests/python_client/chaos/chaos_test.sh b/tests/python_client/chaos/chaos_test.sh
index 553b37207a..13e328215f 100644
--- a/tests/python_client/chaos/chaos_test.sh
+++ b/tests/python_client/chaos/chaos_test.sh
@@ -12,17 +12,17 @@ elif [[ "$unamestr" == 'Darwin' ]]; then
 fi
 echo "platform: $platform"
 
-# define chaos testing object
-release=${1:-"milvus-chaos"}
-ns=${2:-"chaos-testing"}
+release="milvus-chaos"
+ns="chaos-testing"
 
 # switch namespace
 kubectl config set-context --current --namespace=${ns}
-pod="standalone"
-chaos_type="pod_kill"
-chaos_task="data-consist-test" # chaos-test or data-consist-test 
-release="milvus-chaos"
-ns="chaos-testing"
+
+# set parameters
+pod=${1:-"querynode"}
+chaos_type=${2:-"pod_kill"}
+chaos_task=${3:-"chaos-test"} # chaos-test or data-consist-test 
+
 
 # install milvus cluster for chaos testing
 pushd ./scripts
@@ -66,16 +66,20 @@ python scripts/hello_milvus.py --host "$host"
 # chaos test
 if [ "$chaos_task" == "chaos-test" ];
 then
-    pytest -s -v test_chaos.py --host "$host" || echo "chaos test fail"
+    pytest -s -v test_chaos.py --host "$host" --log-cli-level=INFO || echo "chaos test fail"
 fi
 # data consist test
 if [ "$chaos_task" == "data-consist-test" ];
 then
-    pytest -s -v test_chaos_data_consist.py --host "$host" || echo "chaos test fail"
+    pytest -s -v test_chaos_data_consist.py --host "$host" --log-cli-level=INFO || echo "chaos test fail"
 fi
 sleep 30s
 echo "start running e2e test"
+kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=milvus-chaos -n chaos-testing --timeout=360s
+kubectl wait --for=condition=Ready pod -l release=milvus-chaos -n chaos-testing --timeout=360s
+
 python scripts/hello_milvus.py --host "$host" || echo "e2e test fail"
 
 # save logs
-bash ../../scripts/export_log_k8s.sh ${ns} ${release} k8s_log/${pod}
+data=`date +%Y-%m-%d-%H-%M-%S`
+bash ../../scripts/export_log_k8s.sh ${ns} ${release} k8s_log/${pod}-${chaos_type}-${data}
diff --git a/tests/python_client/chaos/run.sh b/tests/python_client/chaos/run.sh
new file mode 100644
index 0000000000..294b824197
--- /dev/null
+++ b/tests/python_client/chaos/run.sh
@@ -0,0 +1,8 @@
+
+
+pods=("standalone" "datacoord" "proxy" "pulsar" "querynode" "rootcoord" "etcd")
+for pod in ${pods[*]}
+do
+echo "run pod kill chaos test for pod $pod "
+bash chaos_test.sh $pod
+done
\ No newline at end of file
diff --git a/tests/python_client/chaos/scripts/hello_milvus.py b/tests/python_client/chaos/scripts/hello_milvus.py
index 2cd16808dc..0fd49bb4a8 100644
--- a/tests/python_client/chaos/scripts/hello_milvus.py
+++ b/tests/python_client/chaos/scripts/hello_milvus.py
@@ -98,7 +98,7 @@ def hello_milvus(host="127.0.0.1"):
     sorted_res = sorted(res, key=lambda k: k['count'])
     for r in sorted_res:
         print(r)
-
+    collection.release()
 
 import argparse