test: [cherry-pick-2.5] Add test cases for json path index (#41074)

related pr: #41016

---------

Signed-off-by: binbin lv <binbin.lv@zilliz.com>
pull/40873/head
binbin 2025-04-03 13:02:27 +08:00 committed by GitHub
parent 8065c801f2
commit 0f247c469e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 6294 additions and 117 deletions

View File

@ -144,6 +144,7 @@ class TestMilvusClientV2Base(Base):
timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.search, collection_name, data, filter, limit,
output_fields, search_params], **kwargs)
@ -154,10 +155,25 @@ class TestMilvusClientV2Base(Base):
return res, check_result
@trace()
def search_iterator(self, client, collection_name, data, batch_size=1000, limit=-1, filter=None, output_fields=None,
def hybrid_search(self, client, collection_name, reqs, ranker, limit=10, output_fields=None, timeout=None,
check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.hybrid_search, collection_name, reqs, ranker, limit,
output_fields], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
collection_name=collection_name, reqs=reqs, ranker=ranker, limit=limit,
output_fields=output_fields, **kwargs).run()
return res, check_result
@trace()
def search_iterator(self, client, collection_name, data, batch_size, limit=-1, filter=None, output_fields=None,
search_params=None, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.search_iterator, collection_name, data, batch_size, filter, limit,
output_fields, search_params], **kwargs)
@ -591,6 +607,16 @@ class TestMilvusClientV2Base(Base):
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
return res, check_result
@trace()
def use_database(self, client, db_name, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.use_database, db_name], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
return res, check_result
def create_user(self, client, user_name, password, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
@ -718,6 +744,19 @@ class TestMilvusClientV2Base(Base):
object_name=object_name, db_name=db_name, **kwargs).run()
return res, check_result
@trace()
def grant_privilege_v2(self, client, role_name, privilege, collection_name, db_name=None,
timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.grant_privilege_v2, role_name, privilege, collection_name,
db_name], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
role_name=role_name, privilege=privilege,
collection_name=collection_name, db_name=db_name, **kwargs).run()
return res, check_result
@trace()
def revoke_privilege(self, client, role_name, object_type, privilege, object_name, db_name="",
timeout=None, check_task=None, check_items=None, **kwargs):
@ -731,37 +770,89 @@ class TestMilvusClientV2Base(Base):
object_name=object_name, db_name=db_name, **kwargs).run()
return res, check_result
def create_privilege_group(self, client, privilege_group: str, check_task=None, check_items=None, **kwargs):
@trace()
def create_privilege_group(self, client, privilege_group: str, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.create_privilege_group, privilege_group], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
privilege_group=privilege_group, **kwargs).run()
return res, check_result
def drop_privilege_group(self, client, privilege_group: str, check_task=None, check_items=None, **kwargs):
@trace()
def drop_privilege_group(self, client, privilege_group: str, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.drop_privilege_group, privilege_group], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
privilege_group=privilege_group, **kwargs).run()
return res, check_result
def list_privilege_groups(self, client, check_task=None, check_items=None, **kwargs):
@trace()
def list_privilege_groups(self, client, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.list_privilege_groups], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
return res, check_result
def add_privileges_to_group(self, client, privilege_group: str, privileges: list, check_task=None, check_items=None,
**kwargs):
@trace()
def add_privileges_to_group(self, client, privilege_group: str, privileges: list, timeout=None,
check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.add_privileges_to_group, privilege_group, privileges], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
privilege_group=privilege_group, privileges=privileges, **kwargs).run()
return res, check_result
def remove_privileges_from_group(self, client, privilege_group: str, privileges: list, check_task=None, check_items=None,
**kwargs):
@trace()
def remove_privileges_from_group(self, client, privilege_group: str, privileges: list, timeout=None,
check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.remove_privileges_from_group, privilege_group, privileges],
**kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
privilege_group=privilege_group, privileges=privileges, **kwargs).run()
return res, check_result
@trace()
def grant_privilege_v2(self, client, role_name: str, privilege: str, collection_name: str, db_name=None, timeout=None,
check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.grant_privilege_v2, role_name, privilege, collection_name, db_name],
**kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
role_name=role_name, privilege=privilege,
collection_name=collection_name, db_name=db_name, **kwargs).run()
return res, check_result
@trace()
def revoke_privilege_v2(self, client, role_name: str, privilege: str, collection_name: str, db_name=None,
timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.revoke_privilege_v2, role_name, privilege, collection_name, db_name],
**kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
role_name=role_name, privilege=privilege,
collection_name=collection_name, db_name=db_name, **kwargs).run()
return res, check_result
@trace()
@ -813,7 +904,7 @@ class TestMilvusClientV2Base(Base):
@trace()
def alter_collection_field(self, client, collection_name, field_name, field_params, timeout=None,
check_task=None, check_items=None, **kwargs):
check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
func_name = sys._getframe().f_code.co_name
@ -824,7 +915,7 @@ class TestMilvusClientV2Base(Base):
@trace()
def alter_database_properties(self, client, db_name, properties, timeout=None,
check_task=None, check_items=None, **kwargs):
check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
@ -835,7 +926,7 @@ class TestMilvusClientV2Base(Base):
@trace()
def drop_database_properties(self, client, db_name, property_keys, timeout=None,
check_task=None, check_items=None, **kwargs):
check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
@ -844,16 +935,6 @@ class TestMilvusClientV2Base(Base):
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
return res, check_result
@trace()
def create_database(self, client, db_name, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.create_database, db_name], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
return res, check_result
@trace()
def describe_database(self, client, db_name, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
@ -884,4 +965,101 @@ class TestMilvusClientV2Base(Base):
check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
return res, check_result
@trace()
def run_analyzer(self, client, text, analyzer_params, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.run_analyzer, text, analyzer_params], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check, text=text,
analyzer_params=analyzer_params, **kwargs).run()
return res, check_result
def compact(self, client, collection_name, is_clustering=False, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.compact, collection_name, is_clustering], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
collection_name=collection_name, is_clustering=is_clustering, **kwargs).run()
return res, check_result
@trace()
def get_compaction_state(self, client, job_id, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.get_compaction_state, job_id], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check, job_id=job_id, **kwargs).run()
return res, check_result
@trace()
def create_resource_group(self, client, name, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.create_resource_group, name], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
name=name, **kwargs).run()
return res, check_result
@trace()
def update_resource_groups(self, client, configs, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.update_resource_groups, configs], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
configs=configs, **kwargs).run()
return res, check_result
@trace()
def drop_resource_group(self, client, name, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.update_resource_groups, name], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
name=name, **kwargs).run()
return res, check_result
@trace()
def describe_resource_group(self, client, name, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.describe_resource_group, name], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
name=name, **kwargs).run()
return res, check_result
@trace()
def list_resource_groups(self, client, timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.list_resource_groups], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
**kwargs).run()
return res, check_result
@trace()
def transfer_replica(self, client, source_group, target_group, collection_name, num_replicas,
timeout=None, check_task=None, check_items=None, **kwargs):
timeout = TIMEOUT if timeout is None else timeout
kwargs.update({"timeout": timeout})
func_name = sys._getframe().f_code.co_name
res, check = api_request([client.transfer_replica, source_group, target_group, collection_name, num_replicas], **kwargs)
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
source_group=source_group, target_group=target_group,
collection_name=collection_name, num_replicas=num_replicas,
**kwargs).run()
return res, check_result

View File

@ -117,10 +117,15 @@ class ResponseChecker:
elif self.check_task == CheckTasks.check_collection_fields_properties:
# check field properties in describe collection response
result = self.check_collection_fields_properties(self.response, self.func_name, self.check_items)
elif self.check_task == CheckTasks.check_describe_database_property:
# describe database interface(high level api) response check
result = self.check_describe_database_property(self.response, self.func_name, self.check_items)
elif self.check_task == CheckTasks.check_insert_result:
# check `insert` interface response
result = self.check_insert_response(check_items=self.check_items)
elif self.check_task == CheckTasks.check_describe_index_property:
# describe collection interface(high level api) response check
result = self.check_describe_index_property(self.response, self.func_name, self.check_items)
# Add check_items here if something new need verify
@ -297,6 +302,46 @@ class ResponseChecker:
assert field['params'].items() >= check_items[key].items()
return True
@staticmethod
def check_describe_database_property(res, func_name, check_items):
"""
According to the check_items to check database properties of res, which return from func_name
:param res: actual response of init database
:type res: Database
:param func_name: init database API
:type func_name: str
:param check_items: which items expected to be checked
:type check_items: dict, {check_key: expected_value}
"""
exp_func_name = "describe_database"
if func_name != exp_func_name:
log.warning("The function name is {} rather than {}".format(func_name, exp_func_name))
if len(check_items) == 0:
raise Exception("No expect values found in the check task")
if check_items.get("db_name", None) is not None:
assert res["name"] == check_items.get("db_name")
if check_items.get("database.force.deny.writing", None) is not None:
if check_items.get("database.force.deny.writing") == "Missing":
assert "database.force.deny.writing" not in res
else:
assert res["database.force.deny.writing"] == check_items.get("database.force.deny.writing")
if check_items.get("database.force.deny.reading", None) is not None:
if check_items.get("database.force.deny.reading") == "Missing":
assert "database.force.deny.reading" not in res
else:
assert res["database.force.deny.reading"] == check_items.get("database.force.deny.reading")
if check_items.get("database.replica.number", None) is not None:
if check_items.get("database.replica.number") == "Missing":
assert "database.replica.number" not in res
else:
assert res["database.replica.number"] == check_items.get("database.replica.number")
if check_items.get("properties_length", None) is not None:
assert len(res) == check_items.get("properties_length")
return True
@staticmethod
def check_partition_property(partition, func_name, check_items):
exp_func_name = "init_partition"
@ -428,16 +473,20 @@ class ResponseChecker:
if func_name != 'search_iterator':
log.warning("The function name is {} rather than {}".format(func_name, "search_iterator"))
search_iterator = search_res
expected_batch_size = check_items.get("batch_size", None)
expected_iterate_times = check_items.get("iterate_times", None)
pk_list = []
iterate_times = 0
while True:
try:
res = search_iterator.next()
if res is None or len(res) == 0:
iterate_times += 1
if not res:
log.info("search iteration finished, close")
search_iterator.close()
break
if check_items.get("batch_size", None):
assert len(res) <= check_items["batch_size"]
if expected_batch_size is not None:
assert len(res) <= expected_batch_size
if check_items.get("radius", None):
for distance in res.distances():
if check_items["metric_type"] == "L2":
@ -454,13 +503,14 @@ class ResponseChecker:
except Exception as e:
assert check_items["err_msg"] in str(e)
return False
if check_items.get("limit"):
if "range_filter" not in check_items and "radius" not in check_items:
assert len(pk_list) / check_items["limit"] >= 0.9
log.debug(f"check: total {len(pk_list)} results, set len: {len(set(pk_list))}")
if expected_iterate_times is not None:
assert iterate_times <= expected_iterate_times
if expected_iterate_times == 1:
assert len(pk_list) == 0 # expected batch size =0 if external filter all
assert iterate_times == 1
return True
log.debug(f"check: total {len(pk_list)} results, set len: {len(set(pk_list))}, iterate_times: {iterate_times}")
assert len(pk_list) == len(set(pk_list)) != 0
return True
@staticmethod
@ -686,3 +736,34 @@ class ResponseChecker:
assert self.response.insert_count == real, error_message.format(self.response.insert_count, real)
return True
@staticmethod
def check_describe_index_property(res, func_name, check_items):
"""
According to the check_items to check collection properties of res, which return from func_name
:param res: actual response of init collection
:type res: Collection
:param func_name: init collection API
:type func_name: str
:param check_items: which items expected to be checked, including name, schema, num_entities, primary
:type check_items: dict, {check_key: expected_value}
"""
exp_func_name = "describe_index"
if func_name != exp_func_name:
log.warning("The function name is {} rather than {}".format(func_name, exp_func_name))
if len(check_items) == 0:
raise Exception("No expect values found in the check task")
if check_items.get("json_cast_type", None) is not None:
assert res["json_cast_type"] == check_items.get("json_cast_type")
if check_items.get("index_type", None) is not None:
assert res["index_type"] == check_items.get("index_type")
if check_items.get("json_path", None) is not None:
assert res["json_path"] == check_items.get("json_path")
if check_items.get("field_name", None) is not None:
assert res["field_name"] == check_items.get("field_name")
if check_items.get("index_name", None) is not None:
assert res["index_name"] == check_items.get("index_name")
return True

View File

@ -432,7 +432,7 @@ def gen_dynamic_field_in_numpy_file(dir, rows, start=0, force=False):
# non vector columns
data = []
if rows > 0:
data = [json.dumps({str(i): i, "name": fake.name(), "address": fake.address()}) for i in range(start, rows+start)]
data = [json.dumps({str(i): i, "name": fake.name(), "address": fake.address(), "number": i}) for i in range(start, rows+start)]
arr = np.array(data)
log.info(f"file_name: {file_name} data type: {arr.dtype} data shape: {arr.shape}")
np.save(file, arr)
@ -460,7 +460,7 @@ def gen_json_in_numpy_file(dir, data_field, rows, start=0, force=False):
if not os.path.exists(file) or force:
data = []
if rows > 0:
data = [json.dumps({"name": fake.name(), "address": fake.address()}) for i in range(start, rows+start)]
data = [json.dumps({"name": fake.name(), "address": fake.address(), "number": i}) for i in range(start, rows+start)]
arr = np.array(data)
log.info(f"file_name: {file_name} data type: {arr.dtype} data shape: {arr.shape}")
np.save(file, arr)
@ -596,7 +596,10 @@ def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128
elif data_field == DataField.json_field:
if not nullable:
data = pd.Series([json.dumps({
gen_unique_str(): random.randint(-999999, 9999999)
gen_unique_str(): random.randint(-999999, 9999999),
"name": fake.name(),
"address": fake.address(),
"number": i
}) for i in range(start, rows + start)], dtype=np.dtype("str"))
else:
data = pd.Series([json.dumps({
@ -764,7 +767,8 @@ def gen_dict_data_by_data_field(data_fields, rows, start=0, float_vector=True, d
d[data_field] = random.choice([True, False])
elif data_field == DataField.json_field:
if not nullable:
d[data_field] = {str(r+start): r+start}
d[data_field] = {str(r+start): r+start, "name": fake.name(),
"address": fake.address(), "number": r+start}
else:
d[data_field] = {str(r + start): None}
elif data_field == DataField.array_bool_field:
@ -951,7 +955,7 @@ def gen_npy_files(float_vector, rows, dim, data_fields, file_size=None, file_num
def gen_dynamic_field_data_in_parquet_file(rows, start=0):
data = []
if rows > 0:
data = pd.Series([json.dumps({str(i): i, "name": fake.name(), "address": fake.address()}) for i in range(start, rows+start)], dtype=np.dtype("str"))
data = pd.Series([json.dumps({str(i): i, "name": fake.name(), "address": fake.address(), "number": i}) for i in range(start, rows+start)], dtype=np.dtype("str"))
return data

View File

@ -2409,6 +2409,43 @@ def gen_json_field_expressions_and_templates():
{"expr": "json_field['float'] <= {value_0} && json_field['float'] > {value_1} && json_field['float'] != {value_2}",
"expr_params": {"value_0": -4**5/2, "value_1": 500-1, "value_2": 500/2+260}}],
]
return expressions
def gen_json_field_expressions_all_single_operator():
"""
Gen a list of filter in expression-format(as a string)
"""
expressions = ["json_field['a'] <= 1", "json_field['a'] <= 1.0", "json_field['a'] >= 1", "json_field['a'] >= 1.0",
"json_field['a'] < 2", "json_field['a'] < 2.0", "json_field['a'] > 0", "json_field['a'] > 0.0",
"json_field['a'] <= '1'", "json_field['a'] >= '1'", "json_field['a'] < '2'", "json_field['a'] > '0'",
"json_field['a'] == 1", "json_field['a'] == 1.0", "json_field['a'] == True",
"json_field['a'] == 9707199254740993.0", "json_field['a'] == 9707199254740992", "json_field['a'] == '1'",
"json_field['a'] != '1'", "json_field['a'] like '1%'", "json_field['a'] like '%1'",
"json_field['a'] like '%1%'", "json_field['a'] LIKE '1%'", "json_field['a'] LIKE '%1'",
"json_field['a'] LIKE '%1%'", "EXISTS json_field['a']", "exists json_field['a']",
"EXISTS json_field['a']['b']", "exists json_field['a']['b']", "json_field['a'] + 1 >= 2",
"json_field['a'] - 1 <= 0", "json_field['a'] + 1.0 >= 2", "json_field['a'] - 1.0 <= 0",
"json_field['a'] * 2 == 2", "json_field['a'] * 1.0 == 1.0", "json_field / 1 == 1",
"json_field['a'] / 1.0 == 1", "json_field['a'] % 10 == 1", "json_field['a'] == 1**2",
"json_field['a'][0] == 1 && json_field['a'][1] == 2", "json_field['a'][0] == 1 and json_field['a'][1] == 2",
"json_field['a'][0]['b'] >=1 && json_field['a'][2] == 3",
"json_field['a'][0]['b'] >=1 and json_field['a'][2] == 3",
"json_field['a'] == 1 || json_field['a'] == '1'", "json_field['a'] == 1 or json_field['a'] == '1'",
"json_field['a'][0]['b'] >=1 || json_field['a']['b'] >=1",
"json_field['a'][0]['b'] >=1 or json_field['a']['b'] >=1",
"json_field['a'] in [1]", "json_contains(json_field['a'], 1)", "JSON_CONTAINS(json_field['a'], 1)",
"json_contains_all(json_field['a'], [2.0, '4'])", "JSON_CONTAINS_ALL(json_field['a'], [2.0, '4'])",
"json_contains_any(json_field['a'], [2.0, '4'])", "JSON_CONTAINS_ANY(json_field['a'], [2.0, '4'])",
"array_contains(json_field['a'], 2)", "ARRAY_CONTAINS(json_field['a'], 2)",
"array_contains_all(json_field['a'], [1.0, 2])", "ARRAY_CONTAINS_ALL(json_field['a'], [1.0, 2])",
"array_contains_any(json_field['a'], [1.0, 2])", "ARRAY_CONTAINS_ANY(json_field['a'], [1.0, 2])",
"array_length(json_field['a']) < 10", "ARRAY_LENGTH(json_field['a']) < 10",
"json_field is null", "json_field IS NULL", "json_field is not null", "json_field IS NOT NULL",
"json_field['a'] is null", "json_field['a'] IS NULL", "json_field['a'] is not null", "json_field['a'] IS NOT NULL"
]
return expressions

View File

@ -1,3 +1,5 @@
import numpy as np
""" Initialized parameters """
port = 19530
epsilon = 0.000001
@ -13,6 +15,7 @@ default_top_k = 10
default_nq = 2
default_limit = 10
default_batch_size = 1000
min_limit = 1
max_limit = 16384
max_top_k = 16384
max_partition_num = 1024
@ -212,6 +215,14 @@ get_wrong_format_dict = [
{"host": 0, "port": 19520}
]
get_all_kind_data_distribution = [
1, np.float64(1.0), np.double(1.0), 9707199254740993.0, 9707199254740992,
'1', '123', '321', '213', True, False, [1, 2], [1.0, 2], None, {}, {"a": 1},
{'a': 1.0}, {'a': 9707199254740993.0}, {'a': 9707199254740992}, {'a': '1'}, {'a': '123'},
{'a': '321'}, {'a': '213'}, {'a': True}, {'a': [1, 2, 3]}, {'a': [1.0, 2, '1']}, {'a': [1.0, 2]},
{'a': None}, {'a': {'b': 1}}, {'a': {'b': 1.0}}, {'a': [{'b': 1}, 2.0, np.double(3.0), '4', True, [1, 3.0], None]}
]
""" Specially defined list """
L0_index_types = ["IVF_SQ8", "HNSW", "DISKANN"]
all_index_types = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ",
@ -285,6 +296,8 @@ class CheckTasks:
check_describe_collection_property = "check_describe_collection_property"
check_insert_result = "check_insert_result"
check_collection_fields_properties = "check_collection_fields_properties"
check_describe_index_property = "check_describe_index_property"
check_describe_database_property = "check_describe_database_property"
class BulkLoadStates:

View File

@ -416,7 +416,7 @@ class TestMilvusClientAliasValid(TestMilvusClientV2Base):
self.drop_alias(client, alias)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.xfail(reason="pymilvus issue 1891, 1892")
@pytest.mark.skip(reason="pymilvus issue 1891, 1892")
def test_milvus_client_alias_default(self):
"""
target: test alias (high level api) normal case

View File

@ -113,7 +113,7 @@ class TestMilvusClientCollectionInvalid(TestMilvusClientV2Base):
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.xfail(reason="pymilvus issue 1554")
@pytest.mark.skip(reason="pymilvus issue 1554")
def test_milvus_client_collection_invalid_primary_field(self):
"""
target: test fast create collection name with invalid primary field
@ -163,7 +163,7 @@ class TestMilvusClientCollectionInvalid(TestMilvusClientV2Base):
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.xfail(reason="pymilvus issue 1872")
@pytest.mark.skip(reason="pymilvus issue 1872")
@pytest.mark.parametrize("metric_type", [1, " ", "invalid"])
def test_milvus_client_collection_invalid_metric_type(self, metric_type):
"""
@ -1114,7 +1114,7 @@ class TestMilvusClientUsingDatabaseInvalid(TestMilvusClientV2Base):
"""
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.xfail(reason="pymilvus issue 1900")
@pytest.mark.skip(reason="pymilvus issue 1900")
@pytest.mark.parametrize("db_name", ["12-s", "12 s", "(mn)", "中文", "%$#"])
def test_milvus_client_using_database_not_exist_db_name(self, db_name):
"""
@ -1137,3 +1137,158 @@ class TestMilvusClientUsingDatabaseInvalid(TestMilvusClientV2Base):
expected: drop successfully
"""
pass
class TestMilvusClientCollectionPropertiesInvalid(TestMilvusClientV2Base):
""" Test case of alter/drop collection properties """
"""
******************************************************************
# The following are invalid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("alter_name", ["%$#", "test", " "])
def test_milvus_client_alter_collection_properties_invalid_collection_name(self, alter_name):
"""
target: test alter collection properties with invalid collection name
method: alter collection properties with non-existent collection name
expected: raise exception
"""
client = self._client()
# alter collection properties
properties = {'mmap.enabled': True}
error = {ct.err_code: 100, ct.err_msg: f"collection not found[database=default][collection={alter_name}]"}
self.alter_collection_properties(client, alter_name, properties,
check_task=CheckTasks.err_res,
check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("properties", [""])
def test_milvus_client_alter_collection_properties_invalid_properties(self, properties):
"""
target: test alter collection properties with invalid properties
method: alter collection properties with invalid properties
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, id_type="string", max_length=ct.default_length)
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
error = {ct.err_code: 1, ct.err_msg: f"`properties` value {properties} is illegal"}
self.alter_collection_properties(client, collection_name, properties,
check_task=CheckTasks.err_res,
check_items=error)
self.drop_collection(client, collection_name)
#TODO properties with non-existent params
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("drop_name", ["%$#", "test", " "])
def test_milvus_client_drop_collection_properties_invalid_collection_name(self, drop_name):
"""
target: test drop collection properties with invalid collection name
method: drop collection properties with non-existent collection name
expected: raise exception
"""
client = self._client()
# drop collection properties
properties = {'mmap.enabled': True}
error = {ct.err_code: 100, ct.err_msg: f"collection not found[database=default][collection={drop_name}]"}
self.drop_collection_properties(client, drop_name, properties,
check_task=CheckTasks.err_res,
check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("property_keys", ["", {}, []])
def test_milvus_client_drop_collection_properties_invalid_properties(self, property_keys):
"""
target: test drop collection properties with invalid properties
method: drop collection properties with invalid properties
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, id_type="string", max_length=ct.default_length)
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
error = {ct.err_code: 65535, ct.err_msg: f"The collection properties to alter and keys to delete must not be empty at the same time"}
self.drop_collection_properties(client, collection_name, property_keys,
check_task=CheckTasks.err_res,
check_items=error)
self.drop_collection(client, collection_name)
#TODO properties with non-existent params
class TestMilvusClientCollectionPropertiesValid(TestMilvusClientV2Base):
""" Test case of alter/drop collection properties """
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_collection_alter_collection_properties(self):
"""
target: test alter collection
method: alter collection
expected: alter successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim)
collections = self.list_collections(client)[0]
assert collection_name in collections
self.release_collection(client, collection_name)
properties = {"mmap.enabled": True}
self.alter_collection_properties(client, collection_name, properties)
describe = self.describe_collection(client, collection_name)[0].get("properties")
assert describe["mmap.enabled"] == 'True'
self.release_collection(client, collection_name)
properties = {"mmap.enabled": False}
self.alter_collection_properties(client, collection_name, properties)
describe = self.describe_collection(client, collection_name)[0].get("properties")
assert describe["mmap.enabled"] == 'False'
#TODO add case that confirm the parameter is actually valid
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_collection_drop_collection_properties(self):
"""
target: test drop collection
method: drop collection
expected: drop successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim)
collections = self.list_collections(client)[0]
assert collection_name in collections
self.release_collection(client, collection_name)
properties = {"mmap.enabled": True}
self.alter_collection_properties(client, collection_name, properties)
describe = self.describe_collection(client, collection_name)[0].get("properties")
assert describe["mmap.enabled"] == 'True'
property_keys = ["mmap.enabled"]
self.drop_collection_properties(client, collection_name, property_keys)
describe = self.describe_collection(client, collection_name)[0].get("properties")
assert "mmap.enabled" not in describe
#TODO add case that confirm the parameter is actually invalid
self.drop_collection(client, collection_name)

View File

@ -0,0 +1,268 @@
import pytest
import time
from base.client_v2_base import TestMilvusClientV2Base
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel, CheckTasks
from utils.util_pymilvus import *
from common.constants import *
from pymilvus import DataType
from pymilvus import AnnSearchRequest
from pymilvus import WeightedRanker
prefix = "client_compact"
epsilon = ct.epsilon
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_search_string_exp = "varchar >= \"0\""
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
default_invaild_string_exp = "varchar >= 0"
default_json_search_exp = "json_field[\"number\"] >= 0"
perfix_expr = 'varchar like "0%"'
default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params
default_primary_key_field_name = "id"
default_vector_field_name = "vector"
default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name
default_int32_array_field_name = ct.default_int32_array_field_name
default_string_array_field_name = ct.default_string_array_field_name
class TestMilvusClientCompactInvalid(TestMilvusClientV2Base):
""" Test case of compact interface """
"""
******************************************************************
# The following are invalid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip(reason="pymilvus issue 2588")
@pytest.mark.parametrize("name", [1, "12-s", "12 s", "(mn)", "中文", "%$#"])
def test_milvus_client_compact_invalid_collection_name_string(self, name):
"""
target: test compact with invalid collection name
method: create connection, collection, insert and hybrid search with invalid collection name
expected: Raise exception
"""
client = self._client()
error = {ct.err_code: 1100,
ct.err_msg: f"Invalid collection name: {name}. the first character of a collection name "
f"must be an underscore or letter: invalid parameter"}
self.compact(client, name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip(reason="pymilvus issue 2587")
@pytest.mark.parametrize("name", [1])
def test_milvus_client_compact_invalid_collection_name_non_string(self, name):
"""
target: test compact with invalid collection name
method: create connection, collection, insert and hybrid search with invalid collection name
expected: Raise exception
"""
client = self._client()
error = {ct.err_code: 1100,
ct.err_msg: f"Invalid collection name: {name}. the first character of a collection name "
f"must be an underscore or letter: invalid parameter"}
self.compact(client, name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("invalid_clustering", ["12-s", "12 s", "(mn)", "中文", "%$#"])
def test_milvus_client_compact_invalid_is_clustering(self, invalid_clustering):
"""
target: test compact with invalid collection name
method: create connection, collection, insert and hybrid search with invalid collection name
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
error = {ct.err_code: 1,
ct.err_msg: f"is_clustering value {invalid_clustering} is illegal"}
self.compact(client, collection_name, is_clustering=invalid_clustering,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("invalid_job_id", ["12-s"])
def test_milvus_client_get_compact_state_invalid_job_id(self, invalid_job_id):
"""
target: test compact with invalid collection name
method: create connection, collection, insert and hybrid search with invalid collection name
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
error = {ct.err_code: 1,
ct.err_msg: f"compaction_id value {invalid_job_id} is illegal"}
self.get_compaction_state(client, invalid_job_id,
check_task=CheckTasks.err_res, check_items=error)
class TestMilvusClientCompactValid(TestMilvusClientV2Base):
""" Test case of hybrid search interface """
@pytest.fixture(scope="function", params=[False, True])
def is_clustering(self, request):
yield request.param
@pytest.fixture(scope="function", params=["INVERTED"])
def supported_varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["DOUBLE", "VARCHAR", "BOOL", "double", "varchar", "bool"])
def supported_json_cast_type(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_compact_normal(self, is_clustering):
"""
target: test hybrid search with default normal case (2 vector fields)
method: create connection, collection, insert and hybrid search
expected: successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
dim = 128
# 1. create collection
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(default_vector_field_name+"new", DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64,
is_partition_key=True, is_clustering_key=is_clustering)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
index_params.add_index(default_vector_field_name+"new", metric_type="L2")
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [
{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_vector_field_name+"new": list(rng.random((1, default_dim))[0]),
default_string_field_name: str(i)} for i in range(10*default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. compact
compact_id = self.compact(client, collection_name, is_clustering=is_clustering)[0]
cost = 180
start = time.time()
while True:
time.sleep(1)
res = self.get_compaction_state(client, compact_id, is_clustering=is_clustering)[0]
if res == "Completed":
break
if time.time() - start > cost:
raise Exception(1, f"Compact after index cost more than {cost}s")
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_compact_empty_collection(self, is_clustering):
"""
target: test compact to empty collection
method: create connection, collection, compact
expected: successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
dim = 128
# 1. create collection
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64,
is_partition_key=True, is_clustering_key=is_clustering)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
# 2. compact
self.compact(client, collection_name, is_clustering=is_clustering)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_compact_json_path_index(self, is_clustering, supported_varchar_scalar_index,
supported_json_cast_type):
"""
target: test hybrid search with default normal case (2 vector fields)
method: create connection, collection, insert and hybrid search
expected: successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
dim = 128
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(default_vector_field_name+"new", DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64,
is_partition_key=True, is_clustering_key=is_clustering)
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
index_params.add_index(default_vector_field_name+"new", metric_type="L2")
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]"})
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [
{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_vector_field_name+"new": list(rng.random((1, default_dim))[0]),
default_string_field_name: str(i),
json_field_name: {'a': {"b": i}}} for i in range(10*default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. compact
compact_id = self.compact(client, collection_name, is_clustering=is_clustering)[0]
cost = 180
start = time.time()
while True:
time.sleep(1)
res = self.get_compaction_state(client, compact_id, is_clustering=is_clustering)[0]
if res == "Completed":
break
if time.time() - start > cost:
raise Exception(1, f"Compact after index cost more than {cost}s")
self.drop_collection(client, collection_name)

View File

@ -0,0 +1,580 @@
import pytest
from base.client_v2_base import TestMilvusClientV2Base
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel, CheckTasks
from utils.util_pymilvus import *
from common.constants import *
from pymilvus import DataType
prefix = "client_search"
partition_prefix = "client_partition"
db_prefix = "client_database"
epsilon = ct.epsilon
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_search_string_exp = "varchar >= \"0\""
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
default_invaild_string_exp = "varchar >= 0"
default_json_search_exp = "json_field[\"number\"] >= 0"
perfix_expr = 'varchar like "0%"'
default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params
default_primary_key_field_name = "id"
default_vector_field_name = "vector"
default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name
default_int32_array_field_name = ct.default_int32_array_field_name
default_string_array_field_name = ct.default_string_array_field_name
class TestMilvusClientDatabaseInvalid(TestMilvusClientV2Base):
""" Test case of database """
"""
******************************************************************
# The following are invalid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("db_name", ["12-s", "12 s", "(mn)", "中文", "%$#", " "])
def test_milvus_client_create_database_invalid_db_name(self, db_name):
"""
target: test fast create database with invalid db name
method: create database with invalid db name
expected: raise exception
"""
client = self._client()
# 1. create database
error = {ct.err_code: 802, ct.err_msg: f"the first character of a database name must be an underscore or letter: "
f"invalid database name[database={db_name}]"}
self.create_database(client, db_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_create_database_name_over_max_length(self):
"""
target: test fast create database with over max db name length
method: create database with over max db name length
expected: raise exception
"""
client = self._client()
# 1. create database
db_name = "a".join("a" for i in range(256))
error = {ct.err_code: 802, ct.err_msg: f"the length of a database name must be less than 255 characters"}
self.create_database(client, db_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_create_database_name_with_default(self):
"""
target: test fast create db name with default
method: create db name with default
expected: raise exception
"""
client = self._client()
# 1. create database
db_name = "default"
error = {ct.err_code: 65535, ct.err_msg: f"database already exist: {db_name}"}
self.create_database(client, db_name, default_dim,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_create_database_with_existed_name(self):
"""
target: test fast create db name with existed name
method: create db name with existed name
expected: raise exception
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
self.create_database(client, db_name)
dbs = self.list_databases(client)[0]
assert db_name in dbs
error = {ct.err_code: 65535, ct.err_msg: f"database already exist: {db_name}"}
self.create_database(client, db_name, default_dim,
check_task=CheckTasks.err_res, check_items=error)
self.drop_database(client, db_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip(reason="pymilvus issue 2683")
@pytest.mark.parametrize("properties", ["hhh", []])
def test_milvus_client_create_database_with_invalid_properties(self, properties):
"""
target: test fast create db name with invalid properties
method: create db name with invalid properties
expected: raise exception
actual: Currently such errors are not very readable,
and entries of numeric types such as 1.11, 111 are not blocked
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
error = {ct.err_code: 1, ct.err_msg: f"Unexpected error, message=<unsupported operand type(s) for +: 'float' and '{type(properties).__name__}'>"}
self.create_database(client, db_name, properties,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("properties", [{"database.rep.number": 3}])
@pytest.mark.skip("A param that does not currently exist will simply have no effect, "
"but it would be better if an error were reported.")
def test_milvus_client_create_database_with_nonexistent_property_params(self, properties):
"""
target: test fast create db name with nonexistent property params
method: create db name with nonexistent property params
expected: raise exception
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
error = {ct.err_code: 1, ct.err_msg: f""}
self.create_database(client, db_name, properties=properties,
check_task=CheckTasks.err_res, check_items=error)
self.drop_database(client, db_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("db_name", ["12-s", "12 s", "(mn)", "中文", "%$#", " "])
def test_milvus_client_drop_database_invalid_db_name(self, db_name):
"""
target: test drop database with invalid db name
method: drop database with invalid db name
expected: raise exception
"""
client = self._client()
error = {ct.err_code: 802, ct.err_msg: f"the first character of a database name must be an underscore or letter: "
f"invalid database name[database={db_name}]"}
self.drop_database(client, db_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("db_name", ["nonexistent"])
@pytest.mark.skip("Deleting a db that does not exist does not report an error, "
"but it would be better if an error were reported.")
def test_milvus_client_drop_database_nonexistent_db_name(self, db_name):
"""
target: test drop database with nonexistent db name
method: drop database with nonexistent db name
expected: raise exception
"""
client = self._client()
error = {ct.err_code: 802, ct.err_msg: f""}
self.drop_database(client, db_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_drop_database_has_collections(self):
"""
target: test drop database which has collections
method: drop database which has collections
expected: raise exception
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
self.create_database(client, db_name)
dbs = self.list_databases(client)[0]
assert db_name in dbs
# 2. create collection
self.use_database(client, db_name)
collection_name = cf.gen_unique_str(prefix)
self.create_collection(client, collection_name, default_dim)
collections = self.list_collections(client)[0]
assert collection_name in collections
#3. drop database
error = {ct.err_code: 65535, ct.err_msg: f"{db_name} not empty, must drop all collections before drop database"}
self.drop_database(client, db_name,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
self.drop_database(client, db_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("db_name", ["default"])
def test_milvus_client_list_databases_with_params(self, db_name):
"""
target: test list database with params
method: list database with params
expected: raise exception
"""
client = self._client()
error = {ct.err_code: 1, ct.err_msg: f"Unexpected error, message=<unsupported operand type(s) for +: 'float' and 'str'>"}
self.list_databases(client, db_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("db_name", ["12-s", "12 s", "(mn)", "中文", "%$#", " ", "nonexistent"])
def test_milvus_client_describe_database_invalid_db_name(self, db_name):
"""
target: test describe database with invalid db name
method: describe database with invalid db name
expected: raise exception
"""
client = self._client()
# 1. create database
error = {ct.err_code: 800, ct.err_msg: f"database not found[database={db_name}]"}
self.describe_database(client, db_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("db_name", ["%$#", "test", " "])
def test_milvus_client_alter_database_properties_nonexistent_db_name(self, db_name):
"""
target: test alter database properties with nonexistent db name
method: alter database properties with nonexistent db name
expected: raise exception
"""
client = self._client()
# alter database properties
properties = {"database.replica.number": 2}
error = {ct.err_code: 800, ct.err_msg: f"database not found[database={db_name}]"}
self.alter_database_properties(client, db_name, properties,
check_task=CheckTasks.err_res,
check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("properties", ["tt"])
def test_milvus_client_alter_database_properties_invalid_format(self, properties):
"""
target: test alter database properties with invalid properties format
method: alter database properties with invalid properties format
expected: raise exception
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
self.create_database(client, db_name)
dbs = self.list_databases(client)[0]
assert db_name in dbs
error = {ct.err_code: 1, ct.err_msg: f"'str' object has no attribute 'items'"}
self.alter_database_properties(client, db_name, properties,
check_task=CheckTasks.err_res,
check_items=error)
self.drop_database(client, db_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_alter_database_properties_invalid_params(self):
"""
target: test describe database with invalid db name
method: describe database with invalid db name
expected: raise exception
actual: run successfully
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
properties = {"database.force.deny.writing": "true",
"database.replica.number": "3"}
self.create_database(client, db_name, properties=properties)
dbs = self.list_databases(client)[0]
assert db_name in dbs
self.describe_database(client, db_name,
check_task=CheckTasks.check_describe_database_property,
check_items={"db_name": db_name,
"database.force.deny.writing": "true",
"database.replica.number": "3"})
alter_properties = {"data.replica.number": 2}
self.alter_database_properties(client, db_name, properties=alter_properties)
describe = self.describe_database(client, db_name)[0]
self.drop_database(client, db_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("db_name", ["%$#", "test", " "])
def test_milvus_client_drop_database_properties_nonexistent_db_name(self, db_name):
"""
target: test drop database properties with nonexistent db name
method: drop database properties with nonexistent db name
expected: raise exception
"""
client = self._client()
# alter database properties
properties = {"data.replica.number": 2}
error = {ct.err_code: 800, ct.err_msg: f"database not found[database={db_name}]"}
self.drop_database_properties(client, db_name, properties,
check_task=CheckTasks.err_res,
check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("properties", ["", {}, []])
def test_milvus_client_drop_database_properties_invalid_format(self, properties):
"""
target: test drop database properties with invalid properties format
method: drop database properties with invalid properties format
expected: raise exception
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
self.create_database(client, db_name)
dbs = self.list_databases(client)[0]
assert db_name in dbs
error = {ct.err_code: 65535, ct.err_msg: f"alter database requires either properties or deletekeys to modify or delete keys, both cannot be empty"}
self.drop_database_properties(client, db_name, property_keys=properties,
check_task=CheckTasks.err_res,
check_items=error)
self.drop_database(client, db_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_drop_database_properties_invalid_params(self):
"""
target: test drop database properties with invalid properties
method: drop database properties with invalid properties
expected: raise exception
actual: case success, nothing changed
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
properties = {"database.force.deny.writing": "true",
"database.replica.number": "3"}
self.create_database(client, db_name, properties=properties)
dbs = self.list_databases(client)[0]
assert db_name in dbs
self.describe_database(client, db_name,
check_task=CheckTasks.check_describe_database_property,
check_items={"db_name": db_name,
"database.force.deny.writing": "true",
"database.replica.number": "3"})
drop_properties = {"data.replica.number": 2}
self.drop_database_properties(client, db_name, property_keys=drop_properties)
describe = self.describe_database(client, db_name)[0]
self.drop_database(client, db_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("db_name", ["nonexistent"])
def test_milvus_client_use_database_nonexistent_db_name(self, db_name):
"""
target: test use database with nonexistent db name
method: use database with nonexistent db name
expected: raise exception
"""
client = self._client()
error = {ct.err_code: 800, ct.err_msg: f"database not found[database={db_name}]"}
self.use_database(client, db_name,
check_task=CheckTasks.err_res, check_items=error)
self.using_database(client, db_name,
check_task=CheckTasks.err_res, check_items=error)
class TestMilvusClientDatabaseValid(TestMilvusClientV2Base):
""" Test case of database interface """
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_create_drop_database_default(self):
"""
target: test create and drop database normal case
method: 1. create database 2. create collection 3. insert data 4. search & query 5. drop collection & database
expected: run successfully
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
self.create_database(client, db_name)
dbs = self.list_databases(client)[0]
assert db_name in dbs
self.using_database(client, db_name)
# 2. create collection
collection_name = cf.gen_unique_str(prefix)
dim = 128
self.create_collection(client, collection_name, dim)
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": dim,
"consistency_level": 0})
# 3. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 4. search
vectors_to_search = rng.random((1, default_dim))
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"limit": default_limit})
# 5. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"primary_field": default_primary_key_field_name})
# 6. drop action
self.drop_collection(client, collection_name)
self.drop_database(client, db_name)
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_create_database_with_properties(self):
"""
target: test create database with properties
method: 1. create database 2. create collection 3. insert data 4. search & query 5. drop collection & database
expected: run successfully
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
properties = {"database.force.deny.writing": "false",
"database.replica.number": "3"}
self.create_database(client, db_name, properties=properties)
describe = self.describe_database(client, db_name)
dbs = self.list_databases(client)[0]
assert db_name in dbs
self.describe_database(client, db_name,
check_task=CheckTasks.check_describe_database_property,
check_items={"db_name": db_name,
"database.force.deny.writing": "false",
"database.replica.number": "3"})
self.using_database(client, db_name)
# 2. create collection
collection_name = cf.gen_unique_str(prefix)
dim = 128
self.create_collection(client, collection_name, dim)
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": dim,
"consistency_level": 0})
# 3. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 4. search
vectors_to_search = rng.random((1, default_dim))
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"limit": default_limit})
# 5. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"primary_field": default_primary_key_field_name})
# 6. drop action
self.drop_collection(client, collection_name)
self.drop_database(client, db_name)
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_alter_database_properties_default(self):
"""
target: test alter database with properties
method: 1. create database 2. alter database properties
expected: run successfully
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
properties = {"database.force.deny.writing": "true",
"database.replica.number": "3"}
self.create_database(client, db_name, properties=properties)
dbs = self.list_databases(client)[0]
assert db_name in dbs
self.describe_database(client, db_name,
check_task=CheckTasks.check_describe_database_property,
check_items={"db_name": db_name,
"database.force.deny.writing": "true",
"database.replica.number": "3"})
self.using_database(client, db_name)
alter_properties = {"database.replica.number": "2",
"database.force.deny.reading": "true"}
self.alter_database_properties(client, db_name, properties=alter_properties)
self.describe_database(client, db_name,
check_task=CheckTasks.check_describe_database_property,
check_items={"db_name": db_name,
"database.force.deny.writing": "true",
"database.force.deny.reading": "true",
"database.replica.number": "2"})
# 6. drop action
self.drop_database(client, db_name)
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_drop_database_properties_default(self):
"""
target: test drop database with properties
method: 1. create database 2. drop database properties
expected: run successfully
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
properties = {"database.force.deny.writing": "true",
"database.force.deny.reading": "true",
"database.replica.number": "3",
"database.max.collections": 100,
"database.diskQuota.mb": 10240}
self.create_database(client, db_name, properties=properties)
dbs = self.list_databases(client)[0]
assert db_name in dbs
self.describe_database(client, db_name,
check_task=CheckTasks.check_describe_database_property,
check_items=properties)
self.using_database(client, db_name)
drop1 = {"database.replica.number"}
self.drop_database_properties(client, db_name, property_keys=drop1)
describe = self.describe_database(client, db_name)[0]
self.describe_database(client, db_name,
check_task=CheckTasks.check_describe_database_property,
check_items={"database.replica.number": "Missing"})
drop2 = ["database.force.deny.writing", "database.force.deny.reading"]
self.drop_database_properties(client, db_name, property_keys=drop2)
describe = self.describe_database(client, db_name)[0]
self.describe_database(client, db_name,
check_task=CheckTasks.check_describe_database_property,
check_items={"database.force.deny.writing": "Missing",
"database.force.deny.reading": "Missing",
"properties_length": 3})
# drop3 = "database.max.collections"
# self.drop_database_properties(client, db_name, property_keys=drop3)
# it doesn't work, but no error reported
# 6. drop action
self.drop_database(client, db_name)
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_use_database_default(self):
"""
target: test use_database
method: 1. create another database 2. create collection in defalut db & another db 3. list collections
expected: run successfully
"""
client = self._client()
# 1. create database
db_name = cf.gen_unique_str(db_prefix)
self.create_database(client, db_name)
dbs = self.list_databases(client)[0]
assert db_name in dbs
collection_name_default_db = cf.gen_unique_str(prefix)
self.create_collection(client, collection_name_default_db, default_dim)
collections_default_db = self.list_collections(client)[0]
assert collection_name_default_db in collections_default_db
self.use_database(client, db_name)
collection_name = cf.gen_unique_str(prefix)
self.create_collection(client, collection_name, default_dim)
collections = self.list_collections(client)[0]
assert collection_name in collections
assert collections_default_db not in collections
# 6. drop action
self.drop_collection(client, collection_name)
self.drop_database(client, db_name)
self.use_database(client, "default")
self.drop_collection(client, collection_name_default_db)

View File

@ -76,7 +76,7 @@ class TestMilvusClientDeleteInvalid(TestMilvusClientV2Base):
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.xfail(reason="pymilvus issue 1869")
@pytest.mark.skip(reason="pymilvus issue 1869")
def test_milvus_client_delete_with_invalid_id_type(self):
"""
target: test delete (high level api)
@ -122,6 +122,14 @@ class TestMilvusClientDeleteValid(TestMilvusClientV2Base):
def metric_type(self, request):
yield request.param
@pytest.fixture(scope="function", params=["INVERTED"])
def supported_varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["DOUBLE", "VARCHAR", "BOOL", "double", "varchar", "bool"])
def supported_json_cast_type(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
@ -252,3 +260,92 @@ class TestMilvusClientDeleteValid(TestMilvusClientV2Base):
"with_vec": True,
"primary_field": default_primary_key_field_name})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
@pytest.mark.parametrize("is_flush", [True, False])
@pytest.mark.parametrize("is_release", [True, False])
def test_milvus_client_delete_with_filters_json_path_index(self, enable_dynamic_field, supported_varchar_scalar_index,
supported_json_cast_type, is_flush, is_release):
"""
target: test delete after json path index created
method: create connection, collection, index, insert, delete, and search
Step: 1. create schema
2. prepare index_params with vector and all the json path index params
3. create collection with the above schema and index params
4. insert
5. flush if specified
6. release collection if specified
7. load collection if specified
8. delete with expression on json path
9. search and query to check that the deleted entities not searched
expected: Delete and search/query successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_float_field_name, DataType.FLOAT)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="L2")
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]"})
self.create_collection(client, collection_name, schema=schema,
index_params=index_params, metric_type="L2")
# 2. insert
default_nb = 1000
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i),
json_field_name: {'a': {'b': i}}} for i in range(default_nb)]
pks = self.insert(client, collection_name, rows)[0]
if is_flush:
self.flush(client, collection_name)
if is_release:
self.release_collection(client, collection_name)
self.load_collection(client, collection_name)
# 3. delete
delete_num = 3
self.delete(client, collection_name, filter=f"{json_field_name}['a']['b'] < {delete_num}")
# 4. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
for insert_id in range(delete_num):
if insert_id in insert_ids:
insert_ids.remove(insert_id)
limit = default_nb - delete_num
self.search(client, collection_name, vectors_to_search, limit=default_nb,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": limit})
# 5. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows[delete_num:],
"with_vec": True,
"primary_field": default_primary_key_field_name})
self.drop_collection(client, collection_name)

View File

@ -0,0 +1,488 @@
import pytest
from base.client_v2_base import TestMilvusClientV2Base
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel, CheckTasks
from utils.util_pymilvus import *
from common.constants import *
from pymilvus import DataType
from pymilvus import AnnSearchRequest
from pymilvus import WeightedRanker
prefix = "client_hybrid_search"
epsilon = ct.epsilon
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_search_string_exp = "varchar >= \"0\""
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
default_invaild_string_exp = "varchar >= 0"
default_json_search_exp = "json_field[\"number\"] >= 0"
perfix_expr = 'varchar like "0%"'
default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params
default_primary_key_field_name = "id"
default_vector_field_name = "vector"
default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name
default_int32_array_field_name = ct.default_int32_array_field_name
default_string_array_field_name = ct.default_string_array_field_name
class TestMilvusClientHybridSearchInvalid(TestMilvusClientV2Base):
""" Test case of hybrid search interface """
"""
******************************************************************
# The following are invalid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("name", ["12-s", "12 s", "(mn)", "中文", "%$#"])
def test_milvus_client_hybrid_search_invalid_collection_name_string(self, name):
"""
target: test hybrid search with invalid collection name
method: create connection, collection, insert and hybrid search with invalid collection name
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. hybrid search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, 8))
sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100")
ranker = WeightedRanker(0.2, 0.8)
error = {ct.err_code: 100,
ct.err_msg: f"collection not found[database=default][collection={name}]"}
self.hybrid_search(client, name, [sub_search1], ranker, limit=default_limit,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip(reason="pymilvus issue 2587")
@pytest.mark.parametrize("name", [1])
def test_milvus_client_hybrid_search_invalid_collection_name_non_string(self, name):
"""
target: test hybrid search with invalid collection name
method: create connection, collection, insert and hybrid search with invalid collection name
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. hybrid search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, 8))
sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100")
ranker = WeightedRanker(0.2, 0.8)
error = {ct.err_code: 100,
ct.err_msg: f"collection not found[database=default][collection={name}]"}
self.hybrid_search(client, name, [sub_search1], ranker, limit=default_limit,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip(reason="pymilvus issue 2588")
@pytest.mark.parametrize("reqs", ["12-s", 1])
def test_milvus_client_hybrid_search_invalid_reqs(self, reqs):
"""
target: test hybrid search with invalid reqs
method: create connection, collection, insert and hybrid search with invalid reqs
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. hybrid search
ranker = WeightedRanker(0.2, 0.8)
error = {ct.err_code: 100,
ct.err_msg: f"collection not found[database=default][collection=1]"}
self.hybrid_search(client, collection_name, reqs, ranker, limit=default_limit,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip(reason="pymilvus issue 2588")
@pytest.mark.parametrize("invalid_ranker", [1])
def test_milvus_client_hybrid_search_invalid_ranker(self, invalid_ranker):
"""
target: test hybrid search with invalid ranker
method: create connection, collection, insert and hybrid search with invalid ranker
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. hybrid search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, 8))
sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100")
ranker = WeightedRanker(0.2, 0.8)
error = {ct.err_code: 100,
ct.err_msg: f"collection not found[database=default][collection=1]"}
self.hybrid_search(client, collection_name, [sub_search1], invalid_ranker, limit=default_limit,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("invalid_limit", [-1, ct.min_limit-1, "1", "12-s", "中文", "%$#"])
def test_milvus_client_hybrid_search_invalid_limit(self, invalid_limit):
"""
target: test hybrid search with invalid limit
method: create connection, collection, insert and hybrid search with invalid limit
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. hybrid search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, 8))
sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100")
ranker = WeightedRanker(0.2, 0.8)
error = {ct.err_code: 1,
ct.err_msg: f"`limit` value {invalid_limit} is illegal"}
self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=invalid_limit,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("invalid_limit", [ct.max_limit+1])
def test_milvus_client_hybrid_search_limit_out_of_range(self, invalid_limit):
"""
target: test hybrid search with invalid limit (out of range)
method: create connection, collection, insert and hybrid search with invalid limit
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. hybrid search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, 8))
sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100")
ranker = WeightedRanker(0.2, 0.8)
error = {ct.err_code: 65535,
ct.err_msg: "invalid max query result window, (offset+limit) should be in range [1, 16384], but got 16385"}
self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=invalid_limit,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("invalid_output_fields", [1, "1"])
def test_milvus_client_hybrid_search_invalid_output_fields(self, invalid_output_fields):
"""
target: test hybrid search with invalid output_fields
method: create connection, collection, insert and hybrid search with invalid output_fields
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. hybrid search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, 8))
sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100")
ranker = WeightedRanker(0.2, 0.8)
error = {ct.err_code: 1,
ct.err_msg: f"`output_fields` value {invalid_output_fields} is illegal"}
self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=default_limit,
output_fields=invalid_output_fields, check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip(reason="pymilvus issue 2589")
@pytest.mark.parametrize("invalid_partition_names", [1, "1"])
def test_milvus_client_hybrid_search_invalid_partition_names(self, invalid_partition_names):
"""
target: test hybrid search with invalid partition names
method: create connection, collection, insert and hybrid search with invalid partition names
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. hybrid search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, 8))
sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100")
ranker = WeightedRanker(0.2, 0.8)
error = {ct.err_code: 1,
ct.err_msg: f"`partition_name_array` value {invalid_partition_names} is illegal"}
self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=default_limit,
partition_names=invalid_partition_names, check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("invalid_partition_names", ["not_exist"])
def test_milvus_client_hybrid_search_not_exist_partition_names(self, invalid_partition_names):
"""
target: test hybrid search with not exist partition names
method: create connection, collection, insert and hybrid search with not exist partition names
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. hybrid search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, 8))
sub_search1 = AnnSearchRequest(vectors_to_search, "embeddings", {"level": 1}, 20, expr="id<100")
ranker = WeightedRanker(0.2, 0.8)
error = {ct.err_code: 65535,
ct.err_msg: f"partition name {invalid_partition_names} not found"}
self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=default_limit,
partition_names=[invalid_partition_names], check_task=CheckTasks.err_res,
check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_hybrid_search_not_exist_vector_name(self):
"""
target: test hybrid search normal default case
method: create connection, collection, insert and hybrid search
expected: successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. hybrid search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, 8))
not_exist_vector_field = "not_exist_vector_field"
sub_search1 = AnnSearchRequest(vectors_to_search, not_exist_vector_field, {"level": 1}, 20, expr="id<100")
ranker = WeightedRanker(0.2, 0.8)
error = {ct.err_code: 1100,
ct.err_msg: f"failed to create query plan: failed to get field schema by name: "
f"fieldName({not_exist_vector_field}) not found: invalid parameter"}
self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=default_limit,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_hybrid_search_requests_mismatch(self):
"""
target: test hybrid search when the length of weights param mismatch with ann search requests
method: create connection, collection, insert and hybrid search
expected: successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. hybrid search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, default_dim))
sub_search1 = AnnSearchRequest(vectors_to_search, "vector", {"level": 1}, 20, expr="id<100")
ranker = WeightedRanker(0.2, 0.8)
error = {ct.err_code: 1100,
ct.err_msg: "the length of weights param mismatch with ann search requests: "
"invalid parameter[expected=1][actual=2]"}
self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=default_limit,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
class TestMilvusClientHybridSearchValid(TestMilvusClientV2Base):
""" Test case of hybrid search interface """
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=["COSINE", "L2"])
def metric_type(self, request):
yield request.param
@pytest.fixture(scope="function", params=["INVERTED"])
def supported_varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["DOUBLE", "VARCHAR", "BOOL", "double", "varchar", "bool"])
def supported_json_cast_type(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_hybrid_search_default(self):
"""
target: test hybrid search with default normal case (2 vector fields)
method: create connection, collection, insert and hybrid search
expected: successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
dim = 128
# 1. create collection
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(default_vector_field_name+"new", DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_partition_key=True)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
index_params.add_index(default_vector_field_name+"new", metric_type="L2")
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [
{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_vector_field_name+"new": list(rng.random((1, default_dim))[0]),
default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 3. hybrid search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
sub_search1 = AnnSearchRequest(vectors_to_search, default_vector_field_name, {"level": 1}, 20, expr="id>=0")
sub_search2 = AnnSearchRequest(vectors_to_search, default_vector_field_name+"new", {"level": 1}, 20, expr="id>=0")
ranker = WeightedRanker(0.2, 0.8)
self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, limit=default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_hybrid_search_single_vector(self):
"""
target: test hybrid search with just one vector field
method: create connection, collection, insert and hybrid search
expected: successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [
{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0])}
for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 3. hybrid search
rng = np.random.default_rng(seed=19530)
insert_ids = [i for i in range(default_nb)]
vectors_to_search = rng.random((1, default_dim))
sub_search1 = AnnSearchRequest(vectors_to_search, "vector", {"level": 1}, 20, expr="id<100")
ranker = WeightedRanker(1)
self.hybrid_search(client, collection_name, [sub_search1], ranker, limit=default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("is_flush", [True, False])
@pytest.mark.parametrize("is_release", [True, False])
def test_milvus_client_hybrid_search_after_json_path_index(self, supported_varchar_scalar_index,
supported_json_cast_type, is_flush, is_release):
"""
target: test hybrid search after json path index created
method: create connection, collection, insert and hybrid search
Step: 1. create schema
2. prepare index_params with the required vector index params and json path index
3. create collection with the above schema and index params
4. insert
5. hybrid search
expected: Search successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
dim = 128
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(default_vector_field_name+"new", DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64, is_partition_key=True)
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
index_params.add_index(default_vector_field_name+"new", metric_type="L2")
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']['b']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]"})
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [
{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_vector_field_name+"new": list(rng.random((1, default_dim))[0]),
default_string_field_name: str(i),
json_field_name: {'a': {"b": i}}} for i in range(default_nb)]
self.insert(client, collection_name, rows)
if is_flush:
self.flush(client, collection_name)
if is_release:
self.release_collection(client, collection_name)
self.load_collection(client, collection_name)
# 3. hybrid search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
sub_search1 = AnnSearchRequest(vectors_to_search, default_vector_field_name, {"level": 1}, 20, expr="id>=0")
sub_search2 = AnnSearchRequest(vectors_to_search, default_vector_field_name+"new", {"level": 1}, 20, expr="id>=0")
ranker = WeightedRanker(0.2, 0.8)
self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, limit=default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
sub_search1 = AnnSearchRequest(vectors_to_search, default_vector_field_name, {"level": 1}, 20,
expr=f"{json_field_name}['a']['b']>=10")
sub_search2 = AnnSearchRequest(vectors_to_search, default_vector_field_name + "new", {"level": 1}, 20,
expr=f"{json_field_name}['a']['b']>=10")
ranker = WeightedRanker(0.2, 0.8)
insert_ids = [i for i in range(10, default_nb)]
self.hybrid_search(client, collection_name, [sub_search1, sub_search2], ranker, limit=default_limit,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
self.drop_collection(client, collection_name)

File diff suppressed because it is too large Load Diff

View File

@ -50,7 +50,7 @@ class TestMilvusClientInsertInvalid(TestMilvusClientV2Base):
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.xfail(reason="pymilvus issue 1883")
@pytest.mark.skip(reason="pymilvus issue 1883")
def test_milvus_client_insert_column_data(self):
"""
target: test insert column data
@ -509,7 +509,7 @@ class TestMilvusClientUpsertInvalid(TestMilvusClientV2Base):
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.xfail(reason="pymilvus issue 1883")
@pytest.mark.skip(reason="pymilvus issue 1883")
def test_milvus_client_upsert_column_data(self):
"""
target: test insert column data
@ -793,6 +793,39 @@ class TestMilvusClientUpsertInvalid(TestMilvusClientV2Base):
self.upsert(client, collection_name, data=rows, partition_name=partition_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("nullable", [True, False])
def test_milvus_client_insert_array_element_null(self, nullable):
"""
target: test search with null expression on each key of json
method: create connection, collection, insert and search
expected: raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
dim = 5
# 1. create collection
nullable_field_name = "nullable_field"
schema = self.create_schema(client, enable_dynamic_field=False)[0]
schema.add_field(default_primary_key_field_name, DataType.VARCHAR, max_length=64, is_primary=True,
auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(nullable_field_name, DataType.ARRAY, element_type=DataType.INT64, max_capacity=12,
max_length=64, nullable=nullable)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
# 2. insert
vectors = cf.gen_vectors(default_nb, dim)
rows = [{default_primary_key_field_name: str(i), default_vector_field_name: vectors[i],
nullable_field_name: [None, 2, 3]} for i in range(default_nb)]
error = {ct.err_code: 1,
ct.err_msg: "The Input data type is inconsistent with defined schema, {nullable_field} field "
"should be a array, but got a {<class 'list'>} instead."}
self.insert(client, collection_name, rows,
check_task=CheckTasks.err_res,
check_items=error)
class TestMilvusClientUpsertValid(TestMilvusClientV2Base):
""" Test case of search interface """
@ -969,3 +1002,257 @@ class TestMilvusClientUpsertValid(TestMilvusClientV2Base):
self.drop_partition(client, collection_name, partition_name)
if self.has_collection(client, collection_name)[0]:
self.drop_collection(client, collection_name)
class TestMilvusClientInsertJsonPathIndexValid(TestMilvusClientV2Base):
""" Test case of insert interface """
@pytest.fixture(scope="function", params=["INVERTED"])
def supported_varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["DOUBLE", "VARCHAR", "BOOL", "Double", "Varchar", "Bool"])
def supported_json_cast_type(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_insert_before_json_path_index(self, enable_dynamic_field, supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test insert and then create json path index
method: create json path index after insert
steps: 1. create schema
2. create collection
3. insert
4. prepare json path index params with parameter "json_cast_type" and "json_path"
5. create index
expected: insert and create json path index successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert with different data distribution
vectors = cf.gen_vectors(default_nb+50, default_dim)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in
range(default_nb)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: i} for i in
range(default_nb, default_nb+10)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {}} for i in
range(default_nb+10, default_nb+20)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in
range(default_nb + 20, default_nb + 30)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in
range(default_nb + 20, default_nb + 30)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in
range(default_nb + 30, default_nb + 40)]
self.insert(client, collection_name, rows)
# 2. prepare index params
index_name = "json_index"
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '1',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '2',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '3',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '4',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]"})
# 3. create index
self.create_index(client, collection_name, index_params)
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name})
self.describe_index(client, collection_name, index_name + '1',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '1'})
self.describe_index(client, collection_name, index_name +'2',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '2'})
self.describe_index(client, collection_name, index_name + '3',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '3'})
self.describe_index(client, collection_name, index_name + '4',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '4'})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_insert_after_json_path_index(self, enable_dynamic_field, supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test insert after create json path index
method: create json path index after insert
steps: 1. create schema
2. create all the index parameters including json path index
3. create collection with schema and index params
4. insert
5. check the index
expected: insert successfully after create json path index
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection with schema and all the index parameters
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_name = "json_index"
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
index_params.add_index(field_name=json_field_name, index_name=index_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '1',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '2',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '3',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']"})
index_params.add_index(field_name=json_field_name, index_name=index_name + '4',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]"})
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert with different data distribution
vectors = cf.gen_vectors(default_nb+50, default_dim)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': {"b": i}}} for i in
range(default_nb)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: i} for i in
range(default_nb, default_nb+10)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {}} for i in
range(default_nb+10, default_nb+20)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [1, 2, 3]}} for i in
range(default_nb + 20, default_nb + 30)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [{'b': 1}, 2, 3]}} for i in
range(default_nb + 20, default_nb + 30)]
self.insert(client, collection_name, rows)
rows = [{default_primary_key_field_name: i, default_vector_field_name: vectors[i],
default_string_field_name: str(i), json_field_name: {'a': [{'b': None}, 2, 3]}} for i in
range(default_nb + 30, default_nb + 40)]
self.insert(client, collection_name, rows)
# 3. check the json path index
self.describe_index(client, collection_name, index_name,
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name})
self.describe_index(client, collection_name, index_name + '1',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '1'})
self.describe_index(client, collection_name, index_name +'2',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '2'})
self.describe_index(client, collection_name, index_name + '3',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '3'})
self.describe_index(client, collection_name, index_name + '4',
check_task=CheckTasks.check_describe_index_property,
check_items={
"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]",
"index_type": supported_varchar_scalar_index,
"field_name": json_field_name,
"index_name": index_name + '4'})

View File

@ -470,7 +470,7 @@ class TestMilvusClientReleasePartitionInvalid(TestMilvusClientV2Base):
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.xfail(reason="pymilvus issue 1896")
@pytest.mark.skip(reason="pymilvus issue 1896")
@pytest.mark.parametrize("partition_name", ["12 s", "(mn)", "中文", "%$#"])
def test_milvus_client_release_partition_invalid_partition_name(self, partition_name):
"""
@ -488,7 +488,7 @@ class TestMilvusClientReleasePartitionInvalid(TestMilvusClientV2Base):
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.xfail(reason="pymilvus issue 1896")
@pytest.mark.skip(reason="pymilvus issue 1896")
def test_milvus_client_release_partition_invalid_partition_name_list(self):
"""
target: test release partition -- invalid partition name value

View File

@ -439,3 +439,150 @@ class TestMilvusClientGetValid(TestMilvusClientV2Base):
assert first_pk_data == first_pk_data_1
assert len(first_pk_data_1[0]) == len(output_fields_array)
self.drop_collection(client, collection_name)
class TestMilvusClientQueryJsonPathIndex(TestMilvusClientV2Base):
""" Test case of search interface """
@pytest.fixture(scope="function", params=["INVERTED"])
def supported_varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["DOUBLE", "VARCHAR", "BOOL"])
def supported_json_cast_type(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
@pytest.mark.parametrize("is_flush", [True, False])
@pytest.mark.parametrize("is_release", [True, False])
@pytest.mark.parametrize("single_data_num", [50])
def test_milvus_client_search_json_path_index_all_expressions(self, enable_dynamic_field, supported_json_cast_type,
supported_varchar_scalar_index, is_flush, is_release,
single_data_num):
"""
target: test query after json path index with all supported basic expressions
method: Query after json path index with all supported basic expressions
step: 1. create collection
2. insert with different data distribution
3. flush if specified
4. query when there is no json path index under all expressions
5. release if specified
6. prepare index params with json path index
7. create json path index
8. create same json index twice
9. reload collection if released before to make sure the new index load successfully
10. sleep for 60s to make sure the new index load successfully without release and reload operations
11. query after there is json path index under all expressions which should get the same result
with that without json path index
expected: query successfully after there is json path index under all expressions which should get the same result
with that without json path index
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "json_field"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON, nullable=True)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(default_vector_field_name, metric_type="COSINE")
self.create_collection(client, collection_name, schema=schema, index_params=index_params)
# 2. insert with different data distribution
vectors = cf.gen_vectors(default_nb+60, default_dim)
inserted_data_distribution = ct.get_all_kind_data_distribution
nb_single = single_data_num
for i in range(len(inserted_data_distribution)):
rows = [{default_primary_key_field_name: j, default_vector_field_name: vectors[j],
default_string_field_name: f"{j}", json_field_name: inserted_data_distribution[i]} for j in
range(i * nb_single, (i + 1) * nb_single)]
assert len(rows) == nb_single
self.insert(client, collection_name=collection_name, data=rows)
log.info(f"inserted {nb_single} {inserted_data_distribution[i]}")
# 3. flush if specified
if is_flush:
self.flush(client, collection_name)
# 4. query when there is no json path index under all expressions
# skip negative expression for issue 40685
# "my_json['a'] != 1", "my_json['a'] != 1.0", "my_json['a'] != '1'", "my_json['a'] != 1.1", "my_json['a'] not in [1]"
express_list = cf.gen_json_field_expressions_all_single_operator()
compare_dict = {}
for i in range(len(express_list)):
json_list = []
id_list = []
log.info(f"query with filter {express_list[i]} before json path index is:")
res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=["count(*)"])[0]
count = res[0]['count(*)']
log.info(f"The count(*) after query with filter {express_list[i]} before json path index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i], output_fields=[f"{json_field_name}"])[0]
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
json_list.append(single[f"{json_field_name}"])
assert count == len(id_list)
assert count == len(json_list)
compare_dict.setdefault(f'{i}', {})
compare_dict[f'{i}']["id_list"] = id_list
compare_dict[f'{i}']["json_list"] = json_list
# 5. release if specified
if is_release:
self.release_collection(client, collection_name)
self.drop_index(client, collection_name, default_vector_field_name)
# 6. prepare index params with json path index
index_name = "json_index"
index_params = self.prepare_index_params(client)[0]
json_path_list = [f"{json_field_name}", f"{json_field_name}[0]", f"{json_field_name}[1]",
f"{json_field_name}[6]", f"{json_field_name}['a']", f"{json_field_name}['a']['b']",
f"{json_field_name}['a'][0]", f"{json_field_name}['a'][6]", f"{json_field_name}['a'][0]['b']",
f"{json_field_name}['a']['b']['c']", f"{json_field_name}['a']['b'][0]['d']",
f"{json_field_name}[10000]", f"{json_field_name}['a']['c'][0]['d']"]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type="COSINE")
for i in range(len(json_path_list)):
index_params.add_index(field_name=json_field_name, index_name=index_name + f'{i}',
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": json_path_list[i]})
# 7. create json path index
self.create_index(client, collection_name, index_params)
# 8. create same json index twice
self.create_index(client, collection_name, index_params)
# 9. reload collection if released before to make sure the new index load successfully
if is_release:
self.load_collection(client, collection_name)
else:
# 10. sleep for 60s to make sure the new index load successfully without release and reload operations
time.sleep(60)
# 11. query after there is json path index under all expressions which should get the same result
# with that without json path index
for i in range(len(express_list)):
json_list = []
id_list = []
log.info(f"query with filter {express_list[i]} after json path index is:")
count = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=["count(*)"])[0]
log.info(f"The count(*) after query with filter {express_list[i]} after json path index is: {count}")
res = self.query(client, collection_name=collection_name, filter=express_list[i],
output_fields=[f"{json_field_name}"])[0]
for single in res:
id_list.append(single[f"{default_primary_key_field_name}"])
json_list.append(single[f"{json_field_name}"])
if len(json_list) != len(compare_dict[f'{i}']["json_list"]):
log.debug(f"json field after json path index under expression {express_list[i]} is:")
log.debug(json_list)
log.debug(f"json field before json path index to be compared under expression {express_list[i]} is:")
log.debug(compare_dict[f'{i}']["json_list"])
assert json_list == compare_dict[f'{i}']["json_list"]
if len(id_list) != len(compare_dict[f'{i}']["id_list"]):
log.debug(f"primary key field after json path index under expression {express_list[i]} is:")
log.debug(id_list)
log.debug(f"primary key field before json path index to be compared under expression {express_list[i]} is:")
log.debug(compare_dict[f'{i}']["id_list"])
assert id_list == compare_dict[f'{i}']["id_list"]
log.info(f"PASS with expression {express_list[i]}")

File diff suppressed because it is too large Load Diff

View File

@ -7,12 +7,16 @@ from pymilvus import DataType
from base.client_v2_base import TestMilvusClientV2Base
prefix = "milvus_client_api_search_iterator"
default_metric_type = "COSINE"
epsilon = ct.epsilon
user_pre = "user"
role_pre = "role"
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
default_batch_size = ct.default_batch_size
default_metric_type = "COSINE"
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_search_string_exp = "varchar >= \"0\""
@ -31,6 +35,31 @@ default_int32_array_field_name = ct.default_int32_array_field_name
default_string_array_field_name = ct.default_string_array_field_name
def external_filter_half(hits):
return hits[0: len(hits) // 2]
def external_filter_all(hits):
return []
def external_filter_nothing(hits):
return hits
def external_filter_invalid_arguments(hits, iaminvalid):
pass
def external_filter_with_outputs(hits):
results = []
for hit in hits:
# equals filter nothing if there are output_fields
if hit.distance < 1.0 and len(hit.fields) > 0:
results.append(hit)
return results
class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
""" Test case of search iterator interface """
@ -39,7 +68,7 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
yield request.param
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_search_iterator_using_mul_db(self, search_params):
def test_milvus_client_search_iterator_using_mul_db(self):
"""
target: test search iterator(high level api) case about mul db
method: create connection, collection, insert and search iterator
@ -70,7 +99,7 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
self.flush(client, collection_name)
# 5. search_iterator
vectors_to_search = cf.gen_vectors(1, default_dim)
search_params = {"params": search_params}
search_params = {"params": {}}
error_msg = "alias or database may have been changed"
self.search_iterator(client, collection_name, vectors_to_search, batch_size, search_params=search_params,
use_mul_db=True, another_db=my_db,
@ -80,7 +109,7 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_search_iterator_alias_different_col(self, search_params):
def test_milvus_client_search_iterator_alias_different_col(self):
"""
target: test search iterator(high level api) case about alias
method: create connection, collection, insert and search iterator
@ -109,7 +138,7 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
self.flush(client, collection_name_new)
# 3. search_iterator
vectors_to_search = cf.gen_vectors(1, default_dim)
search_params = {"params": search_params}
search_params = {"params": {}}
error_msg = "alias or database may have been changed"
self.search_iterator(client, alias, vectors_to_search, batch_size, search_params=search_params,
use_alias=True, another_collection=collection_name_new,
@ -121,6 +150,499 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
self.drop_alias(client, alias)
self.drop_collection(client, collection_name_new)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip("ambiguous error info")
def test_milvus_client_search_iterator_collection_not_existed(self):
"""
target: test search iterator
method: search iterator with nonexistent collection name
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str("nonexistent")
error = {ct.err_code: 100,
ct.err_msg: f"collection not found[database=default]"
f"[collection={collection_name}]"}
vectors_to_search = cf.gen_vectors(1, default_dim)
insert_ids = [i for i in range(default_nb)]
self.search_iterator(client, collection_name, vectors_to_search,
batch_size=5,
check_task=CheckTasks.err_res,
check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("data", ["str", [[1, 2], [3, 4]]])
def test_milvus_client_search_iterator_with_multiple_vectors(self, data):
"""
target: test search iterator with multiple vectors
method: run search iterator with multiple vectors
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 2})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search
error = {ct.err_code: 1,
ct.err_msg: f"search_iterator_v2 does not support processing multiple vectors simultaneously"}
self.search_iterator(client, collection_name, data,
batch_size=5,
check_task=CheckTasks.err_res,
check_items=error)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("data", [[]])
def test_milvus_client_search_iterator_with_empty_data(self, data):
"""
target: test search iterator with empty vector
method: run search iterator with empty vector
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 2})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search
error = {ct.err_code: 1,
ct.err_msg: f"The vector data for search cannot be empty"}
self.search_iterator(client, collection_name, data,
batch_size=5,
check_task=CheckTasks.err_res,
check_items=error)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("batch_size", [-1])
def test_milvus_client_search_iterator_with_invalid_batch_size(self, batch_size):
"""
target: test search iterator with invalid batch size
method: run search iterator with invalid batch size
expected: Raise exception
"""
# These are two inappropriate error messages:
# 1.5: `limit` value 1.5 is illegal
# "1": '<' not supported between instances of 'str' and 'int'
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 2})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search
vectors_to_search = cf.gen_vectors(1, default_dim)
error = {ct.err_code: 1,
ct.err_msg: f"batch size cannot be less than zero"}
self.search_iterator(client, collection_name, vectors_to_search,
batch_size=batch_size,
check_task=CheckTasks.err_res,
check_items=error)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("expr", ["invalidexpr"])
def test_milvus_client_search_iterator_with_invalid_expr(self, expr):
"""
target: test search iterator with invalid expr
method: run search iterator with invalid expr
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 2})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search
vectors_to_search = cf.gen_vectors(1, default_dim)
error = {ct.err_code: 1100,
ct.err_msg: f"failed to create query plan: predicate is not a boolean expression: invalidexpr, "
f"data type: JSON: invalid parameter"}
self.search_iterator(client, collection_name, vectors_to_search,
filter=expr,
batch_size=20,
check_task=CheckTasks.err_res,
check_items=error)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("limit", [-10])
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/39066")
def test_milvus_client_search_iterator_with_invalid_limit(self, limit):
"""
target: test search iterator with invalid limit
method: run search iterator with invalid limit
expected: Raise exception
note: limit param of search_iterator will be deprecated in the future
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 2})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search
vectors_to_search = cf.gen_vectors(1, default_dim)
error = {ct.err_code: 1,
ct.err_msg: f"`limit` value {limit} is illegal"}
self.search_iterator(client, collection_name, vectors_to_search,
batch_size=5,
limit=limit,
check_task=CheckTasks.err_res,
check_items=error)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("output_fields", ["id"])
@pytest.mark.skip("A field that does not currently exist will simply have no effect, "
"but it would be better if an error were reported.")
def test_milvus_client_search_iterator_with_invalid_output(self, output_fields):
"""
target: test search iterator with nonexistent output field
method: run search iterator with nonexistent output field
expected: Raise exception
actual: have no error, just have no effect
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 2})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search
vectors_to_search = cf.gen_vectors(1, default_dim)
error = {ct.err_code: 1,
ct.err_msg: f"`output_fields` value {output_fields} is illegal"}
self.search_iterator(client, collection_name, vectors_to_search,
batch_size=5,
limit=10,
output_fields=output_fields,
check_task=CheckTasks.err_res,
check_items=error)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("search_params", ["tt"])
@pytest.mark.skip("A param that does not currently exist will simply have no effect, "
"but it would be better if an error were reported.")
def test_milvus_client_search_iterator_with_invalid_search_params(self, search_params):
"""
target: test search iterator with nonexistent search_params key
method: run search iterator with nonexistent search_params key
expected: Raise exception
actual: have no error, just have no effect
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 2})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search
vectors_to_search = cf.gen_vectors(1, default_dim)
error = {ct.err_code: 1,
ct.err_msg: f"'str' object has no attribute 'get'"}
self.search_iterator(client, collection_name, vectors_to_search,
batch_size=5,
limit=10,
output_fields=["id", "float", "varchar"],
search_params=search_params,
check_task=CheckTasks.err_res,
check_items=error)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("partition_name", ["client_partition_85Jv3Pf3"])
def test_milvus_client_search_iterator_with_invalid_partition_name(self, partition_name):
"""
target: test search iterator with invalid partition name
method: run search iterator with invalid partition name
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
self.create_partition(client, collection_name, partition_name)
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 2,
"num_partitions": 2})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search
vectors_to_search = cf.gen_vectors(1, default_dim)
error = {ct.err_code: 1,
ct.err_msg: f"`partition_name_array` value {partition_name} is illegal"}
self.search_iterator(client, collection_name, vectors_to_search,
partition_names=partition_name,
batch_size=5,
limit=10,
output_fields=["id", "float", "varchar"],
check_task=CheckTasks.err_res,
check_items=error)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("partition_name", ["nonexistent"])
def test_milvus_client_search_iterator_with_nonexistent_partition_name(self, partition_name):
"""
target: test search iterator with invalid partition name
method: run search iterator with invalid partition name
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 2})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search
vectors_to_search = cf.gen_vectors(1, default_dim)
error = {ct.err_code: 65535,
ct.err_msg: f"partition name {partition_name} not found"}
self.search_iterator(client, collection_name, vectors_to_search,
partition_names=[partition_name],
batch_size=5,
limit=10,
output_fields=["id", "float", "varchar"],
check_task=CheckTasks.err_res,
check_items=error)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("anns_field", ["nonexistent", ])
def test_milvus_client_search_iterator_with_nonexistent_anns_field(self, anns_field):
"""
target: test search iterator with nonexistent anns field
method: run search iterator with nonexistent anns field
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 2})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search
vectors_to_search = cf.gen_vectors(1, default_dim)
error = {ct.err_code: 1100,
ct.err_msg: f"failed to create query plan: failed to get field schema by name: "
f"fieldName({anns_field}) not found: invalid parameter"}
self.search_iterator(client, collection_name, vectors_to_search,
batch_size=5,
limit=10,
anns_field=anns_field,
output_fields=["id", "float", "varchar"],
check_task=CheckTasks.err_res,
check_items=error)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("round_decimal", ["tt"])
def test_milvus_client_search_iterator_with_invalid_round_decimal(self, round_decimal):
"""
target: test search iterator with invalid round_decimal
method: run search iterator with invalid round_decimal
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 2})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search
vectors_to_search = cf.gen_vectors(1, default_dim)
error = {ct.err_code: 1,
ct.err_msg: f"`round_decimal` value {round_decimal} is illegal"}
self.search_iterator(client, collection_name, vectors_to_search,
batch_size=5,
limit=10,
round_decimal=round_decimal,
output_fields=["id", "float", "varchar"],
check_task=CheckTasks.err_res,
check_items=error)
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_search_iterator_with_invalid_external_func(self):
"""
target: test search iterator (high level api) normal case
method: create connection, collection, insert and search iterator
expected: search iterator successfully
"""
batch_size = 20
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search iterator
vectors_to_search = cf.gen_vectors(1, default_dim)
search_params = {}
with pytest.raises(TypeError, match="got an unexpected keyword argument 'metric_type'"):
self.search_iterator(client, collection_name, vectors_to_search, batch_size,
search_params=search_params, limit=100,
external_filter_func=external_filter_invalid_arguments(metric_type="L2"),
check_task=CheckTasks.check_nothing)
it = self.search_iterator(client, collection_name, vectors_to_search, batch_size,
search_params=search_params, limit=100,
external_filter_func=external_filter_invalid_arguments,
check_task=CheckTasks.check_nothing)[0]
with pytest.raises(TypeError, match="missing 1 required positional argument: 'iaminvalid'"):
it.next()
class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
""" Test case of search iterator interface """
@ -137,6 +659,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
def search_params(self, request):
yield request.param
@pytest.fixture(scope="function", params=["INVERTED"])
def supported_varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["DOUBLE", "VARCHAR", "BOOL", "double", "varchar", "bool"])
def supported_json_cast_type(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
@ -144,7 +674,8 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
"""
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_search_iterator_default(self, search_params):
@pytest.mark.parametrize("metric_type", ct.float_metrics)
def test_milvus_client_search_iterator_default(self, metric_type):
"""
target: test search iterator (high level api) normal case
method: create connection, collection, insert and search iterator
@ -152,10 +683,12 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
"""
batch_size = 20
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
self.create_collection(client, collection_name, default_dim, metric_type=metric_type,
consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
@ -164,24 +697,69 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
"dim": default_dim,
"consistency_level": 0})
# 2. insert
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
rows = [{default_primary_key_field_name: i,
default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0,
default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search iterator
vectors_to_search = cf.gen_vectors(1, default_dim)
check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
if "range_filter" in search_params:
check_items["range_filter"] = search_params["range_filter"]
search_params = {"params": search_params}
self.search_iterator(client, collection_name, vectors_to_search, batch_size, search_params=search_params,
check_task=CheckTasks.check_search_iterator, check_items=check_items)
search_params = {"params": {}}
self.search_iterator(client, collection_name=collection_name, data=vectors_to_search,
anns_field=default_vector_field_name,
search_params=search_params, batch_size=batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"metric_type": metric_type, "batch_size": batch_size})
limit = 200
res = self.search(client, collection_name, vectors_to_search,
search_params=search_params, limit=200,
check_task=CheckTasks.check_search_results,
check_items={"nq": 1, "limit": limit, "enable_milvus_client_api": True})[0]
for limit in [batch_size - 3, batch_size, batch_size * 2, -1]:
if metric_type != "L2":
radius = res[0][limit // 2].get('distance', 0) - 0.1 # pick a radius to make sure there exists results
range_filter = res[0][0].get('distance', 0) + 0.1
else:
radius = res[0][limit // 2].get('distance', 0) + 0.1
range_filter = res[0][0].get('distance', 0) - 0.1
search_params = {"params": {"radius": radius, "range_filter": range_filter}}
log.debug(f"search iterator with limit={limit} radius={radius}, range_filter={range_filter}")
expected_batch_size = batch_size if limit == -1 else min(batch_size, limit)
# external filter not set
self.search_iterator(client, collection_name, vectors_to_search, batch_size,
search_params=search_params, limit=limit,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": expected_batch_size})
# external filter half
self.search_iterator(client, collection_name, vectors_to_search, batch_size,
search_params=search_params, limit=limit,
external_filter_func=external_filter_half,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": expected_batch_size})
# external filter nothing
self.search_iterator(client, collection_name, vectors_to_search, batch_size,
search_params=search_params, limit=limit,
external_filter_func=external_filter_nothing,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": expected_batch_size})
# external filter with outputs
self.search_iterator(client, collection_name, vectors_to_search, batch_size,
search_params=search_params, limit=limit, output_fields=["*"],
external_filter_func=external_filter_with_outputs,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": expected_batch_size})
# external filter all
self.search_iterator(client, collection_name, vectors_to_search, batch_size,
search_params=search_params, limit=limit,
external_filter_func=external_filter_all,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": 0, "iterate_times": 1})
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip("TODO: need update the case steps and assertion")
@pytest.mark.parametrize("nullable", [True, False])
def test_milvus_client_search_iterator_about_nullable_default(self, nullable, search_params):
"""
@ -226,6 +804,7 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip("TODO: need update the case steps and assertion")
def test_milvus_client_rename_search_iterator_default(self, search_params):
"""
target: test search iterator(high level api) normal case
@ -235,8 +814,10 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
batch_size = 20
client = self._client()
collection_name = cf.gen_unique_str(prefix)
self.using_database(client, "default")
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Bounded")
self.create_collection(client, collection_name, default_dim, metric_type=metric_type,
consistency_level="Bounded")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
@ -442,7 +1023,8 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_search_iterator_delete_with_filters(self, search_params):
@pytest.mark.parametrize('id_type', ["int", "string"])
def test_milvus_client_search_iterator_delete_with_filters(self, search_params, id_type):
"""
target: test delete (high level api)
method: create connection, collection, insert delete, and search iterator
@ -451,37 +1033,165 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
self.create_collection(client, collection_name, default_dim, id_type=id_type, max_length=128,
consistency_level="Strong")
# 2. insert
default_nb = 1000
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_nb = 2000
if id_type == 'int':
rows = [{default_primary_key_field_name: i,
default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
else:
rows = [
{default_primary_key_field_name: cf.gen_unique_str()+str(i),
default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
pks = self.insert(client, collection_name, rows)[0]
# 3. delete
delete_num = 3
self.delete(client, collection_name, filter=f"id < {delete_num}")
# 4. search_iterator
self.insert(client, collection_name, rows)[0]
# 3. search_iterator and delete
vectors_to_search = cf.gen_vectors(1, default_dim)
insert_ids = [i for i in range(default_nb)]
for insert_id in range(delete_num):
if insert_id in insert_ids:
insert_ids.remove(insert_id)
limit = default_nb - delete_num
check_items = {"batch_size": default_batch_size, "limit": limit, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
if "range_filter" in search_params:
check_items["range_filter"] = search_params["range_filter"]
search_params = {"params": search_params}
self.search_iterator(client, collection_name, vectors_to_search, batch_size=default_batch_size,
search_params=search_params, limit=default_nb,
batch_size = 200
search_params = {"params": {}}
it = self.search_iterator(client, collection_name, vectors_to_search, batch_size=batch_size,
search_params=search_params, limit=500,
check_task=CheckTasks.check_nothing)[0]
res = it.next()
it.close()
delete_ids = res.ids()
self.delete(client, collection_name, ids=delete_ids)
# search iterator again
it2 = self.search_iterator(client, collection_name, vectors_to_search, batch_size=batch_size,
search_params=search_params, limit=500,
check_task=CheckTasks.check_nothing)[0]
res2 = it2.next()
it2.close()
for del_id in delete_ids:
assert del_id not in res2.ids()
# search iterator again
self.search_iterator(client, collection_name, vectors_to_search, batch_size=batch_size,
search_params=search_params, limit=500,
check_task=CheckTasks.check_search_iterator,
check_items=check_items)
# 5. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows[delete_num:],
"with_vec": True,
"primary_field": default_primary_key_field_name})
check_items={"batch_size": batch_size})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_search_iterator_external_filter_func_default(self):
pass
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("metric_type", ct.float_metrics)
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_client_search_iterator_after_json_path_index(self, metric_type, enable_dynamic_field,
supported_json_cast_type,
supported_varchar_scalar_index):
"""
target: test search iterator after creating json path index
method: Search iterator after creating json path index
Step: 1. create schema
2. prepare index_params with vector and all the json path index params
3. create collection with the above schema and index params
4. insert
5. flush
6. release collection
7. load collection
8. search iterator
expected: Search successfully
"""
batch_size = 20
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
json_field_name = "my_json"
schema = self.create_schema(client, enable_dynamic_field=enable_dynamic_field)[0]
schema.add_field(default_primary_key_field_name, DataType.INT64, is_primary=True, auto_id=False)
schema.add_field(default_vector_field_name, DataType.FLOAT_VECTOR, dim=default_dim)
schema.add_field(default_float_field_name, DataType.FLOAT)
schema.add_field(default_string_field_name, DataType.VARCHAR, max_length=64)
if not enable_dynamic_field:
schema.add_field(json_field_name, DataType.JSON)
index_params = self.prepare_index_params(client)[0]
index_params.add_index(field_name=default_vector_field_name, index_type="AUTOINDEX", metric_type=metric_type)
index_params.add_index(field_name=json_field_name, index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type, "json_path": f"{json_field_name}['a']['b']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]['b']"})
index_params.add_index(field_name=json_field_name,
index_type=supported_varchar_scalar_index,
params={"json_cast_type": supported_json_cast_type,
"json_path": f"{json_field_name}['a'][0]"})
self.create_collection(client, collection_name, schema=schema,
index_params=index_params, metric_type=metric_type)
# 2. insert
rows = [{default_primary_key_field_name: i,
default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0,
default_string_field_name: str(i),
json_field_name: {'a': {"b": i}}} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. release and load collection to make sure the new index is loaded
self.release_collection(client, collection_name)
self.load_collection(client, collection_name)
# 4. search iterator
vectors_to_search = cf.gen_vectors(1, default_dim)
search_params = {"params": {}}
self.search_iterator(client, collection_name=collection_name, data=vectors_to_search,
anns_field=default_vector_field_name,
search_params=search_params, batch_size=batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"metric_type": metric_type, "batch_size": batch_size})
limit = 200
res = self.search(client, collection_name, vectors_to_search,
search_params=search_params, limit=limit,
check_task=CheckTasks.check_search_results,
check_items={"nq": 1, "limit": limit, "enable_milvus_client_api": True})[0]
for limit in [batch_size - 3, batch_size, batch_size * 2, -1]:
if metric_type != "L2":
radius = res[0][limit // 2].get('distance', 0) - 0.1 # pick a radius to make sure there exists results
range_filter = res[0][0].get('distance', 0) + 0.1
else:
radius = res[0][limit // 2].get('distance', 0) + 0.1
range_filter = res[0][0].get('distance', 0) - 0.1
search_params = {"params": {"radius": radius, "range_filter": range_filter}}
log.debug(f"search iterator with limit={limit} radius={radius}, range_filter={range_filter}")
expected_batch_size = batch_size if limit == -1 else min(batch_size, limit)
# external filter not set
self.search_iterator(client, collection_name, vectors_to_search, batch_size,
search_params=search_params, limit=limit,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": expected_batch_size})
# external filter half
self.search_iterator(client, collection_name, vectors_to_search, batch_size,
search_params=search_params, limit=limit,
external_filter_func=external_filter_half,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": expected_batch_size})
# external filter nothing
self.search_iterator(client, collection_name, vectors_to_search, batch_size,
search_params=search_params, limit=limit,
external_filter_func=external_filter_nothing,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": expected_batch_size})
# external filter with outputs
self.search_iterator(client, collection_name, vectors_to_search, batch_size,
search_params=search_params, limit=limit, output_fields=["*"],
external_filter_func=external_filter_with_outputs,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": expected_batch_size})
# external filter all
self.search_iterator(client, collection_name, vectors_to_search, batch_size,
search_params=search_params, limit=limit,
external_filter_func=external_filter_all,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": 0, "iterate_times": 1})
self.release_collection(client, collection_name)
self.drop_collection(client, collection_name)

View File

@ -28,8 +28,8 @@ pytest-parallel
pytest-random-order
# pymilvus
pymilvus==2.5.6rc6
pymilvus[bulk_writer]==2.5.6rc6
pymilvus==2.5.7rc3
pymilvus[bulk_writer]==2.5.7rc3
# for customize config test
python-benedict==0.24.3

View File

@ -664,6 +664,19 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
self.collection_wrap.create_index(
field_name=f, index_params=ct.default_binary_index
)
# add json path index for json field
json_path_index_params_double = {"index_type": "INVERTED", "params": {"json_cast_type": "double",
"json_path": f"{df.json_field}['number']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_double)
json_path_index_params_varchar = {"index_type": "INVERTED", "params": {"json_cast_type": "VARCHAR",
"json_path": f"{df.json_field}['address']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_varchar)
json_path_index_params_bool = {"index_type": "INVERTED", "params": {"json_cast_type": "Bool",
"json_path": f"{df.json_field}['name']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_bool)
json_path_index_params_not_exist = {"index_type": "INVERTED", "params": {"json_cast_type": "Double",
"json_path": f"{df.json_field}['not_exist']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_not_exist)
self.collection_wrap.load()
t0 = time.time()
@ -737,6 +750,10 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
if enable_dynamic_field:
assert "name" in fields_from_search
assert "address" in fields_from_search
res, _ = self.collection_wrap.query(expr=f"{df.json_field}['number'] >= 0", output_fields=[df.json_field])
assert len(res) == entities
res, _ = self.collection_wrap.query(expr=f"{df.json_field}['number'] == 1", output_fields=[df.json_field])
assert len(res) == 1
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("auto_id", [True])
@ -821,6 +838,19 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
self.collection_wrap.create_index(
field_name=f, index_params=ct.default_binary_index
)
# add json path index for json field
json_path_index_params_double = {"index_type": "INVERTED", "params": {"json_cast_type": "double",
"json_path": f"{df.json_field}['number']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_double)
json_path_index_params_varchar = {"index_type": "INVERTED", "params": {"json_cast_type": "VARCHAR",
"json_path": f"{df.json_field}['address']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_varchar)
json_path_index_params_bool = {"index_type": "INVERTED", "params": {"json_cast_type": "Bool",
"json_path": f"{df.json_field}['name']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_bool)
json_path_index_params_not_exist = {"index_type": "INVERTED", "params": {"json_cast_type": "Double",
"json_path": f"{df.json_field}['not_exist']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_not_exist)
self.collection_wrap.load()
log.info(f"wait for load finished and be ready for search")
time.sleep(2)
@ -901,6 +931,16 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
assert 0 < len(res) < entities
if enable_partition_key:
assert len(self.collection_wrap.partitions) > 1
res, _ = self.collection_wrap.query(expr=f"{df.json_field}['number'] >= 0", output_fields=[df.json_field])
if not nullable:
assert len(res) == entities
else:
assert len(res) == 0
res, _ = self.collection_wrap.query(expr=f"{df.json_field}['number'] == 1", output_fields=[df.json_field])
if not nullable:
assert len(res) == 1
else:
assert len(res) == 0
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("auto_id", [True, False])
@ -980,6 +1020,19 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
self.collection_wrap.create_index(
field_name=f, index_params=index_params
)
# add json path index for json field
json_path_index_params_double = {"index_type": "INVERTED", "params": {"json_cast_type": "double",
"json_path": f"{df.json_field}['number']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_double)
json_path_index_params_varchar = {"index_type": "INVERTED", "params": {"json_cast_type": "VARCHAR",
"json_path": f"{df.json_field}['address']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_varchar)
json_path_index_params_bool = {"index_type": "INVERTED", "params": {"json_cast_type": "Bool",
"json_path": f"{df.json_field}['name']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_bool)
json_path_index_params_not_exist = {"index_type": "INVERTED", "params": {"json_cast_type": "Double",
"json_path": f"{df.json_field}['not_exist']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_not_exist)
for f in binary_vec_fields:
self.collection_wrap.create_index(
field_name=f, index_params=ct.default_binary_index
@ -1054,6 +1107,10 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
assert 0 < len(res) < entities
if enable_partition_key:
assert len(self.collection_wrap.partitions) > 1
res, _ = self.collection_wrap.query(expr=f"{df.json_field}['number'] >= 0", output_fields=[df.json_field])
assert len(res) == entities
res, _ = self.collection_wrap.query(expr=f"{df.json_field}['number'] == 1", output_fields=[df.json_field])
assert len(res) == 1
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("auto_id", [True, False])
@ -1141,6 +1198,19 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
self.collection_wrap.create_index(
field_name=f, index_params=ct.default_binary_index
)
# add json path index for json field
json_path_index_params_double = {"index_type": "INVERTED", "params": {"json_cast_type": "double",
"json_path": f"{df.json_field}['number']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_double)
json_path_index_params_varchar = {"index_type": "INVERTED", "params": {"json_cast_type": "VARCHAR",
"json_path": f"{df.json_field}['address']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_varchar)
json_path_index_params_bool = {"index_type": "INVERTED", "params": {"json_cast_type": "Bool",
"json_path": f"{df.json_field}['name']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_bool)
json_path_index_params_not_exist = {"index_type": "INVERTED", "params": {"json_cast_type": "Double",
"json_path": f"{df.json_field}['not_exist']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_not_exist)
self.collection_wrap.load()
log.info(f"wait for load finished and be ready for search")
time.sleep(2)
@ -1221,6 +1291,16 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
if enable_partition_key:
assert len(self.collection_wrap.partitions) > 1
res, _ = self.collection_wrap.query(expr=f"{df.json_field}['number'] >= 0", output_fields=[df.json_field])
if not nullable:
assert len(res) == entities
else:
assert len(res) == 0
res, _ = self.collection_wrap.query(expr=f"{df.json_field}['number'] == 1", output_fields=[df.json_field])
if not nullable:
assert len(res) == 1
else:
assert len(res) == 0
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("auto_id", [True, False])
@ -1520,6 +1600,9 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
# ["1", "2", "3"],
# [1, 2, "3"],
{"key": "value"},
{"number": 1},
{"name": fake.name()},
{"address": fake.address()}
]
for i in range(entities):
row = {
@ -1573,6 +1656,19 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
self.collection_wrap.create_index(
field_name=f, index_params=ct.default_sparse_inverted_index
)
# add json path index for json field
json_path_index_params_double = {"index_type": "INVERTED", "params": {"json_cast_type": "double",
"json_path": f"{df.json_field}['number']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_double)
json_path_index_params_varchar = {"index_type": "INVERTED", "params": {"json_cast_type": "VARCHAR",
"json_path": f"{df.json_field}['address']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_varchar)
json_path_index_params_bool = {"index_type": "INVERTED", "params": {"json_cast_type": "Bool",
"json_path": f"{df.json_field}['name']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_bool)
json_path_index_params_not_exist = {"index_type": "INVERTED", "params": {"json_cast_type": "Double",
"json_path": f"{df.json_field}['not_exist']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_not_exist)
self.collection_wrap.load()
log.info(f"wait for load finished and be ready for search")
time.sleep(2)
@ -1596,7 +1692,11 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
if enable_dynamic_field:
assert "name" in fields_from_search
assert "address" in fields_from_search
res, _ = self.collection_wrap.query(expr=f"{df.json_field}['number'] == 1", output_fields=[df.json_field])
if not nullable:
assert len(res) == int(entities/len(json_value))
else:
assert 0 < len(res) < int(entities/len(json_value))
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("auto_id", [True])
@ -1663,7 +1763,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
# [1, 2, 3],
# ["1", "2", "3"],
# [1, 2, "3"],
{"key": "value"},
{"key": "value"}
]
for i in range(entities):
row = {
@ -1810,6 +1910,9 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
# ["1", "2", "3"],
# [1, 2, "3"],
{"key": "value"},
{"number": 1},
{"name": fake.name()},
{"address": fake.address()}
]
for i in range(entities):
row = {
@ -1858,6 +1961,19 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
self.collection_wrap.create_index(
field_name=f, index_params=ct.default_sparse_inverted_index
)
# add json path index for json field
json_path_index_params_double = {"index_type": "INVERTED", "params": {"json_cast_type": "double",
"json_path": f"{df.json_field}['number']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_double)
json_path_index_params_varchar = {"index_type": "INVERTED", "params": {"json_cast_type": "VARCHAR",
"json_path": f"{df.json_field}['address']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_varchar)
json_path_index_params_bool = {"index_type": "INVERTED", "params": {"json_cast_type": "Bool",
"json_path": f"{df.json_field}['name']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_bool)
json_path_index_params_not_exist = {"index_type": "INVERTED", "params": {"json_cast_type": "Double",
"json_path": f"{df.json_field}['not_exist']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_not_exist)
self.collection_wrap.load()
log.info(f"wait for load finished and be ready for search")
time.sleep(2)
@ -1881,6 +1997,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
if enable_dynamic_field:
assert "name" in fields_from_search
assert "address" in fields_from_search
res, _ = self.collection_wrap.query(expr=f"{df.json_field}['number'] == 1", output_fields=[df.json_field])
assert len(res) == int(entities / len(json_value))
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("auto_id", [True, False])
@ -1930,6 +2048,9 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
# ["1", "2", "3"],
# [1, 2, "3"],
{"key": "value"},
{"number": 1},
{"name": fake.name()},
{"address": fake.address()}
]
for i in range(entities):
row = {
@ -1983,6 +2104,19 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
self.collection_wrap.create_index(
field_name=f, index_params=ct.default_sparse_inverted_index
)
# add json path index for json field
json_path_index_params_double = {"index_type": "INVERTED", "params": {"json_cast_type": "double",
"json_path": f"{df.json_field}['number']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_double)
json_path_index_params_varchar = {"index_type": "INVERTED", "params": {"json_cast_type": "VARCHAR",
"json_path": f"{df.json_field}['address']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_varchar)
json_path_index_params_bool = {"index_type": "INVERTED", "params": {"json_cast_type": "Bool",
"json_path": f"{df.json_field}['name']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_bool)
json_path_index_params_not_exist = {"index_type": "INVERTED", "params": {"json_cast_type": "Double",
"json_path": f"{df.json_field}['not_exist']"}}
self.collection_wrap.create_index(field_name=df.json_field, index_params=json_path_index_params_not_exist)
self.collection_wrap.load()
log.info(f"wait for load finished and be ready for search")
time.sleep(2)
@ -2006,7 +2140,11 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
if enable_dynamic_field:
assert "name" in fields_from_search
assert "address" in fields_from_search
res, _ = self.collection_wrap.query(expr=f"{df.json_field}['number'] == 1", output_fields=[df.json_field])
if not nullable:
assert len(res) == int(entities/len(json_value))
else:
assert 0 < len(res) < int(entities/len(json_value))
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("auto_id", [True])

View File

@ -3509,9 +3509,6 @@ class TestCollectionSearch(TestcaseBase):
filter_ids_set = set(filter_ids)
for hits in search_res:
ids = hits.ids
log.info("binbin2")
log.info(ids)
log.info(filter_ids_set)
assert set(ids).issubset(filter_ids_set)
# 7. create json index
default_json_path_index = {"index_type": "INVERTED", "params": {"json_cast_type": "double",
@ -3555,10 +3552,6 @@ class TestCollectionSearch(TestcaseBase):
# 11. search again with expression template and hint after json path index
search_params = default_search_params.copy()
search_params.update({"hints": "iterative_filter"})
log.info("binbin")
log.info(expr)
log.info(expr_params)
log.info(search_params)
search_res, _ = collection_w.search(search_vectors[:default_nq], default_search_field,
search_params,
limit=nb, expr=expr, expr_params=expr_params,
@ -3569,11 +3562,8 @@ class TestCollectionSearch(TestcaseBase):
filter_ids_set = set(filter_ids)
for hits in search_res:
ids = hits.ids
log.info(ids)
log.info(filter_ids_set)
assert set(ids).issubset(filter_ids_set)
@pytest.mark.tags(CaseLabel.L2)
def test_search_expression_all_data_type(self, nq, _async, null_data_percent):
"""
@ -11360,7 +11350,6 @@ class TestCollectionHybridSearchValid(TestcaseBase):
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name])
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36273")
def test_hybrid_search_overall_limit_larger_sum_each_limit(self, nq, primary_field, metric_type):
"""