mirror of https://github.com/milvus-io/milvus.git
[test]Update health_checkers assertion for standby test (#26986)
Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>pull/26999/head
parent
c45c32fad4
commit
567fb23126
|
@ -19,7 +19,7 @@ from chaos import constants
|
|||
from delayed_assert import assert_expectations
|
||||
from utils.util_k8s import (get_milvus_instance_name,
|
||||
get_milvus_deploy_tool,
|
||||
reset_healthy_checker_after_standby_activated)
|
||||
record_time_when_standby_activated)
|
||||
|
||||
|
||||
class TestBase:
|
||||
|
@ -84,19 +84,22 @@ class TestOperations(TestBase):
|
|||
if request_duration[-1] == "+":
|
||||
request_duration = request_duration[:-1]
|
||||
request_duration = eval(request_duration)
|
||||
# start a thread to reset health_checkers when standby is activated.
|
||||
t = threading.Thread(target=reset_healthy_checker_after_standby_activated,
|
||||
args=(self.milvus_ns, self.release_name, target_component, self.health_checkers),
|
||||
# start a thread to record the time when standby is activated
|
||||
t = threading.Thread(target=record_time_when_standby_activated,
|
||||
args=(self.milvus_ns, self.release_name, target_component),
|
||||
kwargs={"timeout": request_duration//2},
|
||||
daemon=True)
|
||||
t.start()
|
||||
# t.join()
|
||||
log.info('start a thread to reset health_checkers when standby is activated')
|
||||
for i in range(10):
|
||||
sleep(request_duration//10)
|
||||
for k, v in self.health_checkers.items():
|
||||
v.check_result()
|
||||
if is_check:
|
||||
assert_statistic(self.health_checkers)
|
||||
assert_expectations()
|
||||
assert_statistic(self.health_checkers, succ_rate_threshold=0.99)
|
||||
for k, v in self.health_checkers.items():
|
||||
log.info(f"{k} rto: {v.get_rto()}")
|
||||
rto = v.get_rto()
|
||||
pytest.assume(rto < 30, f"{k} rto expect 30s but get {rto}s") # rto should be less than 30s
|
||||
|
||||
log.info("*********************Chaos Test Completed**********************")
|
|
@ -423,12 +423,14 @@ def find_activate_standby_coord_pod(namespace, release_name, coord_type):
|
|||
return activate_pod_list, standby_pod_list
|
||||
|
||||
|
||||
def reset_healthy_checker_after_standby_activated(namespace, release_name, coord_type, health_checkers, timeout=360):
|
||||
def record_time_when_standby_activated(namespace, release_name, coord_type, timeout=360):
|
||||
activate_pod_list_before, standby_pod_list_before = find_activate_standby_coord_pod(namespace, release_name,
|
||||
coord_type)
|
||||
log.info(f"check standby switch: activate_pod_list_before {activate_pod_list_before}, "
|
||||
f"standby_pod_list_before {standby_pod_list_before}")
|
||||
standby_activated = False
|
||||
activate_pod_list_after, standby_pod_list_after = find_activate_standby_coord_pod(namespace, release_name,
|
||||
coord_type)
|
||||
start_time = time.time()
|
||||
end_time = time.time()
|
||||
while not standby_activated and end_time - start_time < timeout:
|
||||
|
@ -443,14 +445,10 @@ def reset_healthy_checker_after_standby_activated(namespace, release_name, coord
|
|||
break
|
||||
except Exception as e:
|
||||
log.error(f"Exception when check standby switch: {e}")
|
||||
time.sleep(10)
|
||||
time.sleep(1)
|
||||
end_time = time.time()
|
||||
if standby_activated:
|
||||
time.sleep(30)
|
||||
cc.reset_counting(health_checkers)
|
||||
for k, v in health_checkers.items():
|
||||
log.info("reset health checkers")
|
||||
v.check_result()
|
||||
log.info(f"Standby {coord_type} pod {activate_pod_list_after[0]} activated")
|
||||
else:
|
||||
log.info(f"Standby {coord_type} pod does not switch standby mode")
|
||||
|
||||
|
|
Loading…
Reference in New Issue