diff --git a/.github/workflows/pod-failure-chaos-test.yaml b/.github/workflows/pod-failure-chaos-test.yaml index 3d5964a445..0a990d9d23 100644 --- a/.github/workflows/pod-failure-chaos-test.yaml +++ b/.github/workflows/pod-failure-chaos-test.yaml @@ -86,7 +86,7 @@ jobs: sed -i "s/ALL_CHAOS_YAMLS =.*/ALL_CHAOS_YAMLS = \'chaos_${{ matrix.pod }}_pod_failure.yaml\'/g" constants.py sed -i "s/CHAOS_DURATION =.*/CHAOS_DURATION = 80/g" constants.py cat constants.py - pytest -s -v test_chaos.py --host 127.0.0.1 + pytest -s -v test_chaos.py --host 127.0.0.1 --log-cli-level=INFO - name: Milvus E2E Test timeout-minutes: 5 @@ -95,6 +95,8 @@ jobs: working-directory: tests/python_client/chaos run: | kubectl get pod -n chaos-testing + kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=milvus-chaos -n chaos-testing --timeout=360s + kubectl wait --for=condition=Ready pod -l release=milvus-chaos -n chaos-testing --timeout=360s python hello_milvus.py - name: Data Consist Test @@ -103,7 +105,7 @@ jobs: shell: bash working-directory: tests/python_client/chaos run: | - pytest -s -v test_chaos_data_consist.py --host 127.0.0.1 + pytest -s -v test_chaos_data_consist.py --host 127.0.0.1 --log-cli-level=INFO - name: Milvus E2E Test timeout-minutes: 5 @@ -112,6 +114,8 @@ jobs: working-directory: tests/python_client/chaos run: | kubectl get pod -n chaos-testing + kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=milvus-chaos -n chaos-testing --timeout=360s + kubectl wait --for=condition=Ready pod -l release=milvus-chaos -n chaos-testing --timeout=360s python hello_milvus.py - name: Export logs diff --git a/.github/workflows/pod-kill-chaos-test.yaml b/.github/workflows/pod-kill-chaos-test.yaml index 992f5bb343..f00e7d65b3 100644 --- a/.github/workflows/pod-kill-chaos-test.yaml +++ b/.github/workflows/pod-kill-chaos-test.yaml @@ -85,13 +85,13 @@ jobs: sed -i "s/TESTS_CONFIG_LOCATION =.*/TESTS_CONFIG_LOCATION = \'chaos_objects\/pod_kill\/'/g" constants.py sed -i "s/ALL_CHAOS_YAMLS =.*/ALL_CHAOS_YAMLS = \'chaos_${{ matrix.pod }}_pod_kill.yaml\'/g" constants.py cat constants.py - timeout 14m pytest -s -v test_chaos.py --host 127.0.0.1 || echo "chaos test failed" + timeout 14m pytest -s -v test_chaos.py --host 127.0.0.1 --log-cli-level=INFO || echo "chaos test failed" - name: Milvus E2E Test timeout-minutes: 10 if: ${{ always() }} shell: bash - working-directory: tests/python_client/chaos + working-directory: tests/python_client run: | kubectl get pod -n chaos-testing # wait all pod to be ready @@ -104,7 +104,8 @@ jobs: sleep 20s nc -vz 127.0.0.1 19530 - python scripts/hello_milvus.py + pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO + python chaos/scripts/hello_milvus.py --host 127.0.0.1 - name: Deploy Milvus Again If Previous E2E Test Failed timeout-minutes: 15 @@ -133,21 +134,25 @@ jobs: shell: bash working-directory: tests/python_client/chaos run: | - timeout 4m pytest -s -v test_chaos_data_consist.py --host 127.0.0.1 || echo "data consist test failed" + pytest -s -v test_chaos_data_consist.py --host 127.0.0.1 --log-cli-level=INFO - name: Milvus E2E Test - timeout-minutes: 5 + timeout-minutes: 10 if: ${{ always() }} shell: bash - working-directory: tests/python_client/chaos + working-directory: tests/python_client run: | kubectl get pod -n chaos-testing + kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=milvus-chaos -n chaos-testing --timeout=360s + kubectl wait --for=condition=Ready pod -l release=milvus-chaos -n chaos-testing --timeout=360s + kubectl get pod -n chaos-testing ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9 kubectl port-forward service/milvus-chaos 19530 -n chaos-testing >/dev/null 2>&1 & sleep 20s nc -vz 127.0.0.1 19530 - python scripts/hello_milvus.py + pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO + python chaos/scripts/hello_milvus.py --host 127.0.0.1 - name: Export logs if: ${{ always() }} diff --git a/tests/python_client/chaos/chaos_test.sh b/tests/python_client/chaos/chaos_test.sh index 553b37207a..13e328215f 100644 --- a/tests/python_client/chaos/chaos_test.sh +++ b/tests/python_client/chaos/chaos_test.sh @@ -12,17 +12,17 @@ elif [[ "$unamestr" == 'Darwin' ]]; then fi echo "platform: $platform" -# define chaos testing object -release=${1:-"milvus-chaos"} -ns=${2:-"chaos-testing"} +release="milvus-chaos" +ns="chaos-testing" # switch namespace kubectl config set-context --current --namespace=${ns} -pod="standalone" -chaos_type="pod_kill" -chaos_task="data-consist-test" # chaos-test or data-consist-test -release="milvus-chaos" -ns="chaos-testing" + +# set parameters +pod=${1:-"querynode"} +chaos_type=${2:-"pod_kill"} +chaos_task=${3:-"chaos-test"} # chaos-test or data-consist-test + # install milvus cluster for chaos testing pushd ./scripts @@ -66,16 +66,20 @@ python scripts/hello_milvus.py --host "$host" # chaos test if [ "$chaos_task" == "chaos-test" ]; then - pytest -s -v test_chaos.py --host "$host" || echo "chaos test fail" + pytest -s -v test_chaos.py --host "$host" --log-cli-level=INFO || echo "chaos test fail" fi # data consist test if [ "$chaos_task" == "data-consist-test" ]; then - pytest -s -v test_chaos_data_consist.py --host "$host" || echo "chaos test fail" + pytest -s -v test_chaos_data_consist.py --host "$host" --log-cli-level=INFO || echo "chaos test fail" fi sleep 30s echo "start running e2e test" +kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=milvus-chaos -n chaos-testing --timeout=360s +kubectl wait --for=condition=Ready pod -l release=milvus-chaos -n chaos-testing --timeout=360s + python scripts/hello_milvus.py --host "$host" || echo "e2e test fail" # save logs -bash ../../scripts/export_log_k8s.sh ${ns} ${release} k8s_log/${pod} +data=`date +%Y-%m-%d-%H-%M-%S` +bash ../../scripts/export_log_k8s.sh ${ns} ${release} k8s_log/${pod}-${chaos_type}-${data} diff --git a/tests/python_client/chaos/run.sh b/tests/python_client/chaos/run.sh new file mode 100644 index 0000000000..294b824197 --- /dev/null +++ b/tests/python_client/chaos/run.sh @@ -0,0 +1,8 @@ + + +pods=("standalone" "datacoord" "proxy" "pulsar" "querynode" "rootcoord" "etcd") +for pod in ${pods[*]} +do +echo "run pod kill chaos test for pod $pod " +bash chaos_test.sh $pod +done \ No newline at end of file diff --git a/tests/python_client/chaos/scripts/hello_milvus.py b/tests/python_client/chaos/scripts/hello_milvus.py index 2cd16808dc..0fd49bb4a8 100644 --- a/tests/python_client/chaos/scripts/hello_milvus.py +++ b/tests/python_client/chaos/scripts/hello_milvus.py @@ -98,7 +98,7 @@ def hello_milvus(host="127.0.0.1"): sorted_res = sorted(res, key=lambda k: k['count']) for r in sorted_res: print(r) - + collection.release() import argparse