Add tests for bulk load (#16780)

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
pull/16795/head
yanliang567 2022-05-06 09:51:51 +08:00 committed by GitHub
parent 7cc995d085
commit 84baa93cb1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 1535 additions and 19 deletions

View File

@ -1,5 +1,5 @@
from datetime import datetime
import time
from pymilvus import utility
import sys
@ -16,6 +16,59 @@ class ApiUtilityWrapper:
ut = utility
def bulk_load(self, collection_name, partition_name="",
channels="", row_based=True, files="", timeout=None,
using="default", check_task=None, check_items=None, **kwargs):
func_name = sys._getframe().f_code.co_name
res, is_succ = api_request([self.ut.bulk_load, collection_name, partition_name,
channels, row_based,files, timeout,
using], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, is_succ,
collection_name=collection_name, using=using).run()
return res, check_result
def get_bulk_load_state(self, task_id, timeout=None, using="default", check_task=None, check_items=None, **kwargs):
func_name = sys._getframe().f_code.co_name
res, is_succ = api_request([self.ut.get_bulk_load_state, task_id, timeout, using], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, is_succ,
task_id=task_id, using=using).run()
return res, check_result
def wait_for_bulk_load_tasks_completed(self, task_ids, timeout=None, using="default", **kwargs):
start = time.time()
successes = {}
fails = {}
if timeout is not None:
task_timeout = timeout / len(task_ids)
else:
task_timeout = TIMEOUT
while True and (len(successes) + len(fails)) < len(task_ids):
in_progress = {}
time.sleep(0.5)
for task_id in task_ids:
if successes.get(task_id, None) is not None or fails.get(task_id, None) is not None:
continue
else:
state, _ = self.get_bulk_load_state(task_id, task_timeout, using, **kwargs)
if state.state_name == "BulkLoadPersisted": # "BulkLoadCompleted"
successes[task_id] = state
elif state.state_name == "BulkLoadFailed":
fails[task_id] = state
else:
in_progress[task_id] = state
end = time.time()
if timeout is not None:
if end - start > timeout:
in_progress.update(fails)
in_progress.update(successes)
return False, in_progress
if len(fails) == 0:
return True, successes
else:
fails.update(successes)
return False, fails
def get_query_segment_info(self, collection_name, timeout=None, using="default", check_task=None, check_items=None):
timeout = TIMEOUT if timeout is None else timeout
func_name = sys._getframe().f_code.co_name
@ -161,3 +214,4 @@ class ApiUtilityWrapper:
def mkts_from_hybridts(self, hybridts, milliseconds=0., delta=None):
res, _ = api_request([self.ut.mkts_from_hybridts, hybridts, milliseconds, delta])
return res

View File

@ -219,11 +219,14 @@ class ResponseChecker:
assert len(hits) == check_items["limit"]
assert len(hits.ids) == check_items["limit"]
else:
if check_items.get("ids", None) is not None:
ids_match = pc.list_contain_check(list(hits.ids),
list(check_items["ids"]))
if not ids_match:
log.error("search_results_check: ids searched not match")
assert ids_match
else:
pass # just check nq and topk, not specific ids need check
log.info("search_results_check: limit (topK) and "
"ids searched for %d queries are correct" % len(search_res))
return True
@ -348,3 +351,4 @@ class ResponseChecker:
assert len(compaction_plans.plans) == 1
assert len(compaction_plans.plans[0].sources) == segment_num
assert compaction_plans.plans[0].target not in compaction_plans.plans[0].sources

File diff suppressed because it is too large Load Diff