mirror of https://github.com/milvus-io/milvus.git
[skip e2e]Add memory stress chaos test (#15907)
Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>pull/15907/merge
parent
da077bcc6a
commit
7acfde46df
|
@ -0,0 +1,197 @@
|
|||
name: Memory Stress Chaos Test
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
|
||||
test-memory-stress-chaos:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 40
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
pod: [datanode, indexnode, proxy, pulsar, querynode, etcd, minio]
|
||||
|
||||
steps:
|
||||
|
||||
- name: Set env param
|
||||
run: |
|
||||
echo "RELEASE=test-${{ matrix.pod }}-memory-stress" >> $GITHUB_ENV
|
||||
|
||||
- name: Creating kind cluster
|
||||
uses: helm/kind-action@v1.2.0
|
||||
|
||||
- name: Print cluster information
|
||||
run: |
|
||||
kubectl config view
|
||||
kubectl cluster-info
|
||||
kubectl get nodes
|
||||
kubectl get pods -n kube-system
|
||||
helm version
|
||||
kubectl version
|
||||
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.8
|
||||
|
||||
- name: Install dependency
|
||||
uses: nick-invision/retry@v2
|
||||
with:
|
||||
timeout_minutes: 5
|
||||
max_attempts: 3
|
||||
retry_on: error
|
||||
shell: bash
|
||||
command: |
|
||||
pip install -r tests/python_client/requirements.txt --trusted-host https://test.pypi.org
|
||||
pip install --upgrade protobuf
|
||||
|
||||
- name: Deploy Chaos Mesh
|
||||
shell: bash
|
||||
run: |
|
||||
helm repo add chaos-mesh https://charts.chaos-mesh.org
|
||||
helm search repo chaos-mesh
|
||||
kubectl create ns chaos-testing
|
||||
helm install --wait --timeout 360s chaos-mesh chaos-mesh/chaos-mesh --namespace=chaos-testing --version v2.0.3 --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/run/containerd/containerd.sock
|
||||
kubectl get po -n chaos-testing
|
||||
|
||||
- name: Deploy Milvus
|
||||
shell: bash
|
||||
working-directory: tests/python_client/chaos
|
||||
run: |
|
||||
echo "latest tag:"
|
||||
bash ../../../scripts/docker_image_find_tag.sh -n milvusdb/milvus-dev -t master-latest -f master- -F -L -q
|
||||
helm repo add milvus https://milvus-io.github.io/milvus-helm
|
||||
helm repo update
|
||||
if [[ ${{ matrix.pod }} != *"standalone"* ]]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f cluster-values.yaml -n=chaos-testing; fi
|
||||
if [[ ${{ matrix.pod }} == *"standalone"* ]]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f standalone-values.yaml -n=chaos-testing; fi
|
||||
kubectl get pods -n chaos-testing
|
||||
sleep 20s
|
||||
kubectl get pods -n chaos-testing
|
||||
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
|
||||
sleep 20s
|
||||
# check whether port-forward success
|
||||
nc -vz 127.0.0.1 19530
|
||||
# check whether milvus server is healthy
|
||||
python scripts/hello_milvus.py
|
||||
|
||||
- name: Chaos Test
|
||||
timeout-minutes: 15
|
||||
shell: bash
|
||||
working-directory: tests/python_client/chaos
|
||||
run: |
|
||||
# replace chaos object
|
||||
sed -i "s/TESTS_CONFIG_LOCATION =.*/TESTS_CONFIG_LOCATION = \'chaos_objects\/mem_stress\/'/g" constants.py
|
||||
sed -i "s/ALL_CHAOS_YAMLS =.*/ALL_CHAOS_YAMLS = \'chaos_${{ matrix.pod }}_mem_stress.yaml\'/g" constants.py
|
||||
sed -i "s/RELEASE_NAME =.*/RELEASE_NAME = \'${{ env.RELEASE }}\'/g" constants.py
|
||||
cat constants.py
|
||||
timeout 14m pytest -s -v test_chaos.py --host 127.0.0.1 --log-cli-level=INFO --capture=no || echo "chaos test failed"
|
||||
|
||||
- name: Result Analysis
|
||||
timeout-minutes: 15
|
||||
shell: bash
|
||||
working-directory: tests/python_client/chaos/reports
|
||||
run: |
|
||||
echo "result analysis"
|
||||
cat ${{ env.RELEASE }}.log || echo "no log file"
|
||||
|
||||
- name: Milvus E2E Test
|
||||
timeout-minutes: 10
|
||||
if: ${{ always() }}
|
||||
shell: bash
|
||||
working-directory: tests/python_client
|
||||
run: |
|
||||
kubectl get networkchaos -n chaos-testing
|
||||
kubectl get pod -n chaos-testing
|
||||
# wait all pod to be ready
|
||||
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=${{ env.RELEASE }} -n chaos-testing --timeout=360s
|
||||
kubectl wait --for=condition=Ready pod -l release=${{ env.RELEASE }} -n chaos-testing --timeout=360s
|
||||
kubectl get pod -n chaos-testing
|
||||
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9
|
||||
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
|
||||
|
||||
sleep 20s
|
||||
nc -vz 127.0.0.1 19530
|
||||
|
||||
pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO --capture=no
|
||||
python chaos/scripts/hello_milvus.py --host 127.0.0.1
|
||||
|
||||
- name: Export logs
|
||||
if: ${{ always() }}
|
||||
shell: bash
|
||||
working-directory: tests/python_client/chaos
|
||||
run: |
|
||||
#in this step, verify whether pod has been killed by pod's age
|
||||
kubectl get po -n chaos-testing
|
||||
# export k8s log for chaos mesh and milvus
|
||||
bash ../../scripts/export_log_k8s.sh chaos-testing ${{ env.RELEASE }} k8s_logs/chaos-test
|
||||
|
||||
- name: Deploy Milvus Again If Previous E2E Test Failed
|
||||
timeout-minutes: 15
|
||||
if: ${{ failure() }}
|
||||
shell: bash
|
||||
working-directory: tests/python_client/chaos
|
||||
run: |
|
||||
kubectl config set-context --current --namespace=chaos-testing
|
||||
bash scripts/uninstall_milvus.sh ${{ env.RELEASE }}
|
||||
if [ ${{ matrix.pod }} != "standalone" ]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f cluster-values.yaml -n=chaos-testing; fi
|
||||
if [ ${{ matrix.pod }} == "standalone" ]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus --set cluster.enabled=false --set etcd.replicaCount=1 --set minio.mode=standalone --set pulsar.enabled=false -n=chaos-testing; fi
|
||||
kubectl get pods -n chaos-testing
|
||||
sleep 20s
|
||||
kubectl get pods -n chaos-testing
|
||||
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9
|
||||
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
|
||||
sleep 20s
|
||||
# check whether port-forward success
|
||||
nc -vz 127.0.0.1 19530
|
||||
# check whether milvus server is healthy
|
||||
python scripts/hello_milvus.py
|
||||
|
||||
- name: Data Consist Test
|
||||
timeout-minutes: 5
|
||||
if: ${{ always() }}
|
||||
shell: bash
|
||||
working-directory: tests/python_client/chaos
|
||||
run: |
|
||||
pytest -s -v test_chaos_data_consist.py --host 127.0.0.1 --log-cli-level=INFO --capture=no || echo "data consist chaos test failed"
|
||||
|
||||
- name: Milvus E2E Test
|
||||
timeout-minutes: 10
|
||||
if: ${{ always() }}
|
||||
shell: bash
|
||||
working-directory: tests/python_client
|
||||
run: |
|
||||
kubectl get pod -n chaos-testing
|
||||
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=${{ env.RELEASE }} -n chaos-testing --timeout=360s
|
||||
kubectl wait --for=condition=Ready pod -l release=${{ env.RELEASE }} -n chaos-testing --timeout=360s
|
||||
kubectl get pod -n chaos-testing
|
||||
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9
|
||||
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
|
||||
sleep 20s
|
||||
nc -vz 127.0.0.1 19530
|
||||
|
||||
pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO --capture=no
|
||||
python chaos/scripts/hello_milvus.py --host 127.0.0.1
|
||||
|
||||
- name: Export logs
|
||||
if: ${{ always() }}
|
||||
shell: bash
|
||||
working-directory: tests/python_client/chaos
|
||||
run: |
|
||||
#in this step, verify whether pod has been killed by pod's age
|
||||
kubectl get po -n chaos-testing
|
||||
# export k8s log for chaos mesh and milvus
|
||||
bash ../../scripts/export_log_k8s.sh chaos-testing ${{ env.RELEASE }} k8s_logs/data-consist-test
|
||||
bash ../../scripts/export_log_k8s.sh chaos-testing chaos-daemon k8s_logs/chaos-mesh-daemon
|
||||
|
||||
- name: Upload logs
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: logs-${{ matrix.pod }}
|
||||
path: |
|
||||
tests/python_client/chaos/k8s_logs
|
||||
tests/python_client/chaos/reports
|
|
@ -0,0 +1,21 @@
|
|||
kind: StressChaos
|
||||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
metadata:
|
||||
name: test-datanode-memory-stress
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
component: datanode
|
||||
mode: all
|
||||
stressors:
|
||||
cpu:
|
||||
workers: 4
|
||||
load: 80
|
||||
memory:
|
||||
workers: 4
|
||||
size: 2048Mi
|
||||
duration: 5m
|
|
@ -0,0 +1,21 @@
|
|||
kind: StressChaos
|
||||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
metadata:
|
||||
name: test-etcd-memory-stress
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
app.kubernetes.io/name: etcd
|
||||
mode: all
|
||||
stressors:
|
||||
cpu:
|
||||
workers: 4
|
||||
load: 80
|
||||
memory:
|
||||
workers: 4
|
||||
size: 2048Mi
|
||||
duration: 5m
|
|
@ -0,0 +1,21 @@
|
|||
kind: StressChaos
|
||||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
metadata:
|
||||
name: test-indexnode-memory-stress
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: pulsar-mem-stress-14-04-25
|
||||
component: indexnode
|
||||
mode: one
|
||||
stressors:
|
||||
cpu:
|
||||
workers: 4
|
||||
load: 80
|
||||
memory:
|
||||
workers: 4
|
||||
size: 2048Mi
|
||||
duration: 5m
|
|
@ -0,0 +1,21 @@
|
|||
kind: StressChaos
|
||||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
metadata:
|
||||
name: test-datanode-memory-stress
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing
|
||||
labelSelectors:
|
||||
release: milvus-chaos
|
||||
app: minio
|
||||
mode: one
|
||||
stressors:
|
||||
cpu:
|
||||
workers: 4
|
||||
load: 80
|
||||
memory:
|
||||
workers: 4
|
||||
size: 2048Mi
|
||||
duration: 5m
|
|
@ -0,0 +1,21 @@
|
|||
kind: StressChaos
|
||||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
metadata:
|
||||
name: test-datanode-memory-stress
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
component: proxy
|
||||
mode: one
|
||||
stressors:
|
||||
cpu:
|
||||
workers: 4
|
||||
load: 80
|
||||
memory:
|
||||
workers: 4
|
||||
size: 2048Mi
|
||||
duration: 5m
|
|
@ -0,0 +1,21 @@
|
|||
kind: StressChaos
|
||||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
metadata:
|
||||
name: test-datanode-memory-stress
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing
|
||||
labelSelectors:
|
||||
release: milvus-chaos
|
||||
app: pulsar
|
||||
mode: one
|
||||
stressors:
|
||||
cpu:
|
||||
workers: 4
|
||||
load: 80
|
||||
memory:
|
||||
workers: 4
|
||||
size: 2048Mi
|
||||
duration: 5m
|
|
@ -0,0 +1,21 @@
|
|||
kind: StressChaos
|
||||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
metadata:
|
||||
name: test-querynode-memory-stress
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
component: querynode
|
||||
mode: one
|
||||
stressors:
|
||||
cpu:
|
||||
workers: 4
|
||||
load: 80
|
||||
memory:
|
||||
workers: 4
|
||||
size: 2048Mi
|
||||
duration: 5m
|
|
@ -0,0 +1,21 @@
|
|||
kind: StressChaos
|
||||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
metadata:
|
||||
name: test-datanode-memory-stress
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
component: standalone
|
||||
mode: one
|
||||
stressors:
|
||||
cpu:
|
||||
workers: 4
|
||||
load: 80
|
||||
memory:
|
||||
workers: 4
|
||||
size: 2048Mi
|
||||
duration: 5m
|
|
@ -0,0 +1,85 @@
|
|||
# Memory Stress Testcases All-in-one
|
||||
# memory stress
|
||||
# standalone
|
||||
# todo
|
||||
# cluster-1-node
|
||||
# 11 pods(querynode, datanode, indexnode, pulsar, etcd, minio)
|
||||
# cluster-n-nodes
|
||||
# todo
|
||||
|
||||
Collections:
|
||||
-
|
||||
testcase:
|
||||
name: test_querynode_mem_stress
|
||||
chaos: chaos_querynode_mem_stress.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
search: fail
|
||||
query: fail
|
||||
cluster_n_nodes:
|
||||
search: degrade
|
||||
query: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_datanode_mem_stress
|
||||
chaos: chaos_datanode_mem_stress.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
flush: fail
|
||||
cluster_n_nodes:
|
||||
flush: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_proxy_mem_stress
|
||||
chaos: chaos_proxy_mem_stress.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
flush: fail
|
||||
cluster_n_nodes:
|
||||
flush: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_indexnode_mem_stress
|
||||
chaos: chaos_indexnode_mem_stress.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
flush: fail
|
||||
cluster_n_nodes:
|
||||
flush: degrade
|
||||
|
||||
-
|
||||
testcase:
|
||||
name: test_pulsar_mem_stress
|
||||
chaos: chaos_pulsar_mem_stress.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
flush: fail
|
||||
cluster_n_nodes:
|
||||
flush: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_minio_mem_stress
|
||||
chaos: chaos_minio_mem_stress.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
flush: fail
|
||||
cluster_n_nodes:
|
||||
flush: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_etcd_mem_stress
|
||||
chaos: chaos_etcd_mem_stress.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
flush: fail
|
||||
cluster_n_nodes:
|
||||
flush: degrade
|
||||
-
|
||||
testcase:
|
||||
name: test_standalone_mem_stress
|
||||
chaos: chaos_standalone_mem_stress.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
flush: fail
|
||||
cluster_n_nodes:
|
||||
flush: degrade
|
Loading…
Reference in New Issue