[test]Update health_checkers assertion for standby test (#26986)

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
pull/26999/head
zhuwenxing 2023-09-11 17:55:23 +08:00 committed by GitHub
parent c45c32fad4
commit 567fb23126
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 14 deletions

View File

@ -19,7 +19,7 @@ from chaos import constants
from delayed_assert import assert_expectations
from utils.util_k8s import (get_milvus_instance_name,
get_milvus_deploy_tool,
reset_healthy_checker_after_standby_activated)
record_time_when_standby_activated)
class TestBase:
@ -84,19 +84,22 @@ class TestOperations(TestBase):
if request_duration[-1] == "+":
request_duration = request_duration[:-1]
request_duration = eval(request_duration)
# start a thread to reset health_checkers when standby is activated.
t = threading.Thread(target=reset_healthy_checker_after_standby_activated,
args=(self.milvus_ns, self.release_name, target_component, self.health_checkers),
# start a thread to record the time when standby is activated
t = threading.Thread(target=record_time_when_standby_activated,
args=(self.milvus_ns, self.release_name, target_component),
kwargs={"timeout": request_duration//2},
daemon=True)
t.start()
# t.join()
log.info('start a thread to reset health_checkers when standby is activated')
for i in range(10):
sleep(request_duration//10)
for k, v in self.health_checkers.items():
v.check_result()
if is_check:
assert_statistic(self.health_checkers)
assert_expectations()
assert_statistic(self.health_checkers, succ_rate_threshold=0.99)
for k, v in self.health_checkers.items():
log.info(f"{k} rto: {v.get_rto()}")
rto = v.get_rto()
pytest.assume(rto < 30, f"{k} rto expect 30s but get {rto}s") # rto should be less than 30s
log.info("*********************Chaos Test Completed**********************")

View File

@ -423,12 +423,14 @@ def find_activate_standby_coord_pod(namespace, release_name, coord_type):
return activate_pod_list, standby_pod_list
def reset_healthy_checker_after_standby_activated(namespace, release_name, coord_type, health_checkers, timeout=360):
def record_time_when_standby_activated(namespace, release_name, coord_type, timeout=360):
activate_pod_list_before, standby_pod_list_before = find_activate_standby_coord_pod(namespace, release_name,
coord_type)
log.info(f"check standby switch: activate_pod_list_before {activate_pod_list_before}, "
f"standby_pod_list_before {standby_pod_list_before}")
standby_activated = False
activate_pod_list_after, standby_pod_list_after = find_activate_standby_coord_pod(namespace, release_name,
coord_type)
start_time = time.time()
end_time = time.time()
while not standby_activated and end_time - start_time < timeout:
@ -443,14 +445,10 @@ def reset_healthy_checker_after_standby_activated(namespace, release_name, coord
break
except Exception as e:
log.error(f"Exception when check standby switch: {e}")
time.sleep(10)
time.sleep(1)
end_time = time.time()
if standby_activated:
time.sleep(30)
cc.reset_counting(health_checkers)
for k, v in health_checkers.items():
log.info("reset health checkers")
v.check_result()
log.info(f"Standby {coord_type} pod {activate_pod_list_after[0]} activated")
else:
log.info(f"Standby {coord_type} pod does not switch standby mode")