From a556671119492f945f0f83c1df20dc8376e29f78 Mon Sep 17 00:00:00 2001 From: binbin <83755740+binbinlv@users.noreply.github.com> Date: Tue, 20 Feb 2024 11:58:51 +0800 Subject: [PATCH] test: add hybrid search cases (#29830) issue: #29799 Signed-off-by: binbin lv --- tests/python_client/base/client_base.py | 25 +- .../python_client/base/collection_wrapper.py | 16 + tests/python_client/check/func_check.py | 4 +- tests/python_client/common/common_func.py | 258 ++- tests/python_client/common/common_type.py | 3 + tests/python_client/requirements.txt | 3 + tests/python_client/testcases/test_index.py | 206 ++ tests/python_client/testcases/test_query.py | 5 +- tests/python_client/testcases/test_search.py | 1767 ++++++++++++++++- 9 files changed, 2193 insertions(+), 94 deletions(-) diff --git a/tests/python_client/base/client_base.py b/tests/python_client/base/client_base.py index 9cdc23b31a..1c93327c97 100644 --- a/tests/python_client/base/client_base.py +++ b/tests/python_client/base/client_base.py @@ -229,7 +229,9 @@ class TestcaseBase(Base): partition_num=0, is_binary=False, is_all_data_type=False, auto_id=False, dim=ct.default_dim, is_index=True, primary_field=ct.default_int64_field_name, is_flush=True, name=None, - enable_dynamic_field=False, with_json=True, random_primary_key=False, **kwargs): + enable_dynamic_field=False, with_json=True, random_primary_key=False, + multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR", + **kwargs): """ target: create specified collections method: 1. create collections (binary/non-binary, default/all data type, auto_id or not) @@ -251,7 +253,9 @@ class TestcaseBase(Base): # 1 create collection default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field, enable_dynamic_field=enable_dynamic_field, - with_json=with_json) + with_json=with_json, multiple_dim_array=multiple_dim_array, + is_partition_key=is_partition_key, + vector_data_type=vector_data_type) if is_binary: default_schema = cf.gen_default_binary_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field) @@ -262,6 +266,7 @@ class TestcaseBase(Base): with_json=with_json) log.info("init_collection_general: collection creation") collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs) + vector_name_list = cf.extract_vector_field_name_list(collection_w) # 2 add extra partitions if specified (default is 1 partition named "_default") if partition_num > 0: cf.gen_partitions(collection_w, partition_num) @@ -270,22 +275,22 @@ class TestcaseBase(Base): collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp = \ cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id, dim=dim, enable_dynamic_field=enable_dynamic_field, with_json=with_json, - random_primary_key=random_primary_key) + random_primary_key=random_primary_key, multiple_dim_array=multiple_dim_array, + primary_field=primary_field, vector_data_type=vector_data_type) if is_flush: assert collection_w.is_empty is False assert collection_w.num_entities == nb + # 4 create default index if specified + if is_index: # This condition will be removed after auto index feature - if is_index: - if is_binary: - collection_w.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index) - else: - collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index) - collection_w.load() - elif is_index: if is_binary: collection_w.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index) else: collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index) + if len(multiple_dim_array) != 0 or is_all_data_type: + for vector_name in vector_name_list: + collection_w.create_index(vector_name, ct.default_flat_index) + collection_w.load() return collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp diff --git a/tests/python_client/base/collection_wrapper.py b/tests/python_client/base/collection_wrapper.py index 58e5974b73..2e2c230d5e 100644 --- a/tests/python_client/base/collection_wrapper.py +++ b/tests/python_client/base/collection_wrapper.py @@ -176,6 +176,22 @@ class ApiCollectionWrapper: timeout=timeout, **kwargs).run() return res, check_result + @trace() + def hybrid_search(self, reqs, rerank, limit, partition_names=None, + output_fields=None, timeout=None, round_decimal=-1, + check_task=None, check_items=None, **kwargs): + timeout = TIMEOUT if timeout is None else timeout + + func_name = sys._getframe().f_code.co_name + res, check = api_request([self.collection.hybrid_search, reqs, rerank, limit, partition_names, + output_fields, timeout, round_decimal], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, check_items, check, + reqs=reqs, rerank=rerank, limit=limit, + partition_names=partition_names, + output_fields=output_fields, + timeout=timeout, **kwargs).run() + return res, check_result + @trace() def search_iterator(self, data, anns_field, param, batch_size, limit=-1, expr=None, partition_names=None, output_fields=None, timeout=None, round_decimal=-1, diff --git a/tests/python_client/check/func_check.py b/tests/python_client/check/func_check.py index 0c892b58db..2783e390e7 100644 --- a/tests/python_client/check/func_check.py +++ b/tests/python_client/check/func_check.py @@ -293,8 +293,8 @@ class ResponseChecker: expected: check the search is ok """ log.info("search_results_check: checking the searching results") - if func_name != 'search': - log.warning("The function name is {} rather than {}".format(func_name, "search")) + if func_name != 'search' or func_name != 'hybrid_search': + log.warning("The function name is {} rather than {} or {}".format(func_name, "search", "hybrid_search")) if len(check_items) == 0: raise Exception("No expect values found in the check task") if check_items.get("_async", None): diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index 804897c0c1..adbe4e342a 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -18,6 +18,7 @@ from base.schema_wrapper import ApiCollectionSchemaWrapper, ApiFieldSchemaWrappe from common import common_type as ct from utils.util_log import test_log as log from customize.milvus_operator import MilvusOperator +import tensorflow as tf fake = Faker() """" Methods of processing data """ @@ -142,8 +143,14 @@ def gen_double_field(name=ct.default_double_field_name, is_primary=False, descri def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim, - description=ct.default_desc, **kwargs): - float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.FLOAT_VECTOR, + description=ct.default_desc, vector_data_type="FLOAT_VECTOR", **kwargs): + if vector_data_type == "FLOAT_VECTOR": + dtype = DataType.FLOAT_VECTOR + elif vector_data_type == "FLOAT16_VECTOR": + dtype = DataType.FLOAT16_VECTOR + elif vector_data_type == "BFLOAT16_VECTOR": + dtype = DataType.BFLOAT16_VECTOR + float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=dtype, description=description, dim=dim, is_primary=is_primary, **kwargs) return float_vec_field @@ -157,28 +164,60 @@ def gen_binary_vec_field(name=ct.default_binary_vec_field_name, is_primary=False return binary_vec_field +def gen_float16_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim, + description=ct.default_desc, **kwargs): + float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.FLOAT16_VECTOR, + description=description, dim=dim, + is_primary=is_primary, **kwargs) + return float_vec_field + + +def gen_bfloat16_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim, + description=ct.default_desc, **kwargs): + float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.BFLOAT16_VECTOR, + description=description, dim=dim, + is_primary=is_primary, **kwargs) + return float_vec_field + + + def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name, auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True, - multiple_dim_array=[], **kwargs): + multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR", + **kwargs): if enable_dynamic_field: if primary_field is ct.default_int64_field_name: - fields = [gen_int64_field(), gen_float_vec_field(dim=dim)] + if is_partition_key is None: + fields = [gen_int64_field(), gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)] + else: + fields = [gen_int64_field(is_partition_key=(is_partition_key == ct.default_int64_field_name)), + gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)] elif primary_field is ct.default_string_field_name: - fields = [gen_string_field(), gen_float_vec_field(dim=dim)] + if is_partition_key is None: + fields = [gen_string_field(), gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)] + else: + fields = [gen_string_field(is_partition_key=(is_partition_key == ct.default_string_field_name)), + gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)] else: log.error("Primary key only support int or varchar") assert False - if len(multiple_dim_array) != 0: - for other_dim in multiple_dim_array: - fields.append(gen_float_vec_field(gen_unique_str("multiple_vector"), dim=other_dim)) else: - fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(), - gen_float_vec_field(dim=dim)] + if is_partition_key is None: + int64_field = gen_int64_field() + vchar_field = gen_string_field() + else: + int64_field = gen_int64_field(is_partition_key=(is_partition_key == ct.default_int64_field_name)) + vchar_field = gen_string_field(is_partition_key=(is_partition_key == ct.default_string_field_name)) + fields = [int64_field, gen_float_field(), vchar_field, gen_json_field(), + gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)] if with_json is False: fields.remove(gen_json_field()) - if len(multiple_dim_array) != 0: - for other_dim in multiple_dim_array: - fields.append(gen_float_vec_field(gen_unique_str("multiple_vector"), dim=other_dim)) + + if len(multiple_dim_array) != 0: + for other_dim in multiple_dim_array: + fields.append(gen_float_vec_field(gen_unique_str("multiple_vector"), dim=other_dim, + vector_data_type=vector_data_type)) + schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description, primary_field=primary_field, auto_id=auto_id, @@ -278,11 +317,15 @@ def gen_collection_schema_all_datatype(description=ct.default_desc, auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True, **kwargs): if enable_dynamic_field: - fields = [gen_int64_field(), gen_float_vec_field(dim=dim)] + fields = [gen_int64_field(), gen_float_vec_field(dim=dim), + gen_float_vec_field(name=ct.default_float16_vec_field_name, dim=dim, vector_data_type="FLOAT16_VECTOR"), + gen_float_vec_field(name=ct.default_bfloat16_vec_field_name, dim=dim, vector_data_type="BFLOAT16_VECTOR")] else: fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(), gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(), - gen_json_field(), gen_float_vec_field(dim=dim)] + gen_json_field(), gen_float_vec_field(dim=dim), + gen_float_vec_field(name=ct.default_float16_vec_field_name, dim=dim, vector_data_type="FLOAT16_VECTOR"), + gen_float_vec_field(name=ct.default_bfloat16_vec_field_name, dim=dim, vector_data_type="BFLOAT16_VECTOR")] if with_json is False: fields.remove(gen_json_field()) schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description, @@ -324,11 +367,18 @@ def gen_schema_multi_string_fields(string_fields): return schema -def gen_vectors(nb, dim): - vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] +def gen_vectors(nb, dim, vector_data_type="FLOAT_VECTOR"): + if vector_data_type == "FLOAT_VECTOR": + vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] + elif vector_data_type == "FLOAT16_VECTOR": + vectors = gen_fp16_vectors(nb, dim)[1] + elif vector_data_type == "BFLOAT16_VECTOR": + vectors = gen_bf16_vectors(nb, dim)[1] + if dim > 1: - vectors = preprocessing.normalize(vectors, axis=1, norm='l2') - vectors = vectors.tolist() + if vector_data_type=="FLOAT_VECTOR": + vectors = preprocessing.normalize(vectors, axis=1, norm='l2') + vectors = vectors.tolist() return vectors @@ -349,7 +399,8 @@ def gen_binary_vectors(num, dim): def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, - random_primary_key=False): + random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[], + vector_data_type="FLOAT_VECTOR"): if not random_primary_key: int_values = pd.Series(data=[i for i in range(start, start + nb)]) else: @@ -357,7 +408,7 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32") string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string") json_values = [{"number": i, "float": i*1.0} for i in range(start, start + nb)] - float_vec_values = gen_vectors(nb, dim) + float_vec_values = gen_vectors(nb, dim, vector_data_type=vector_data_type) df = pd.DataFrame({ ct.default_int64_field_name: int_values, ct.default_float_field_name: float_values, @@ -365,24 +416,38 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi ct.default_json_field_name: json_values, ct.default_float_vec_field_name: float_vec_values }) + if with_json is False: df.drop(ct.default_json_field_name, axis=1, inplace=True) + if len(multiple_dim_array) != 0: + if len(multiple_vector_field_name) != len(multiple_dim_array): + log.error("multiple vector feature is enabled, please input the vector field name list " + "not including the default vector field") + assert len(multiple_vector_field_name) == len(multiple_dim_array) + for i in range(len(multiple_dim_array)): + new_float_vec_values = gen_vectors(nb, multiple_dim_array[i], vector_data_type=vector_data_type) + df[multiple_vector_field_name[i]] = new_float_vec_values return df -def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True): +def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, multiple_dim_array=[], + multiple_vector_field_name=[], vector_data_type="FLOAT_VECTOR"): array = [] for i in range(start, start + nb): dict = {ct.default_int64_field_name: i, ct.default_float_field_name: i*1.0, ct.default_string_field_name: str(i), ct.default_json_field_name: {"number": i, "float": i*1.0}, - ct.default_float_vec_field_name: gen_vectors(1, dim)[0] + ct.default_float_vec_field_name: gen_vectors(1, dim, vector_data_type=vector_data_type)[0] } if with_json is False: dict.pop(ct.default_json_field_name, None) array.append(dict) + if len(multiple_dim_array) != 0: + for i in range(len(multiple_dim_array)): + dict[multiple_vector_field_name[i]] = gen_vectors(1, multiple_dim_array[i], + vector_data_type=vector_data_type)[0] return array @@ -497,6 +562,8 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, w json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(start, start + nb)] float_vec_values = gen_vectors(nb, dim) + float16_vec_values = gen_vectors(nb, dim, "FLOAT16_VECTOR") + bfloat16_vec_values = gen_vectors(nb, dim, "BFLOAT16_VECTOR") df = pd.DataFrame({ ct.default_int64_field_name: int64_values, ct.default_int32_field_name: int32_values, @@ -507,8 +574,9 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, w ct.default_double_field_name: double_values, ct.default_string_field_name: string_values, ct.default_json_field_name: json_values, - ct.default_float_vec_field_name: float_vec_values - + ct.default_float_vec_field_name: float_vec_values, + ct.default_float16_vec_field_name: float16_vec_values, + ct.default_bfloat16_vec_field_name: bfloat16_vec_values }) if with_json is False: df.drop(ct.default_json_field_name, axis=1, inplace=True) @@ -531,7 +599,9 @@ def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, st ct.default_string_field_name: str(i), ct.default_json_field_name: {"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(i, i + ct.default_json_list_length)]}, - ct.default_float_vec_field_name: gen_vectors(1, dim)[0] + ct.default_float_vec_field_name: gen_vectors(1, dim)[0], + ct.default_float16_vec_field_name: gen_vectors(1, dim, "FLOAT16_VECTOR")[0], + ct.default_bfloat16_vec_field_name: gen_vectors(1, dim, "BFLOAT16_VECTOR")[0] } if with_json is False: dict.pop(ct.default_json_field_name, None) @@ -1384,7 +1454,8 @@ def gen_partitions(collection_w, partition_num=1): def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_type=False, auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True, - random_primary_key=False): + random_primary_key=False, multiple_dim_array=[], primary_field=ct.default_int64_field_name, + vector_data_type="FLOAT_VECTOR"): """ target: insert non-binary/binary data method: insert non-binary/binary data into partitions if any @@ -1396,13 +1467,23 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ binary_raw_vectors = [] insert_ids = [] start = insert_offset - log.info(f"inserted {nb} data into collection {collection_w.name}") + log.info(f"inserting {nb} data into collection {collection_w.name}") + # extract the vector field name list + vector_name_list = extract_vector_field_name_list(collection_w) + # prepare data for i in range(num): log.debug("Dynamic field is enabled: %s" % enable_dynamic_field) - default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json, - random_primary_key=random_primary_key) - if enable_dynamic_field: - default_data = gen_default_rows_data(nb // num, dim=dim, start=start, with_json=with_json) + if not enable_dynamic_field: + default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json, + random_primary_key=random_primary_key, + multiple_dim_array=multiple_dim_array, + multiple_vector_field_name=vector_name_list, + vector_data_type=vector_data_type) + else: + default_data = gen_default_rows_data(nb // num, dim=dim, start=start, with_json=with_json, + multiple_dim_array=multiple_dim_array, + multiple_vector_field_name=vector_name_list, + vector_data_type=vector_data_type) if is_binary: default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num, dim=dim, start=start) binary_raw_vectors.extend(binary_raw_data) @@ -1414,10 +1495,18 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ if auto_id: if enable_dynamic_field: for data in default_data: - data.pop(ct.default_int64_field_name, None) + if primary_field == ct.default_int64_field_name: + data.pop(ct.default_int64_field_name, None) + elif primary_field == ct.default_string_field_name: + data.pop(ct.default_string_field_name, None) else: - default_data.drop(ct.default_int64_field_name, axis=1, inplace=True) + if primary_field == ct.default_int64_field_name: + default_data.drop(ct.default_int64_field_name, axis=1, inplace=True) + elif primary_field == ct.default_string_field_name: + default_data.drop(ct.default_string_field_name, axis=1, inplace=True) + # insert insert_res = collection_w.insert(default_data, par[i].name)[0] + log.info(f"inserted {nb} data into collection {collection_w.name}") time_stamp = insert_res.timestamp insert_ids.extend(insert_res.primary_keys) vectors.append(default_data) @@ -1559,3 +1648,104 @@ def get_wildcard_output_field_names(collection_w, output_fields): output_fields.remove("*") output_fields.extend(all_fields) return output_fields + + +def extract_vector_field_name_list(collection_w): + """ + extract the vector field name list + collection_w : the collection object to be extracted thea name of all the vector fields + return: the vector field name list without the default float vector field name + """ + schema_dict = collection_w.schema.to_dict() + fields = schema_dict.get('fields') + vector_name_list = [] + for field in fields: + if str(field['type']) == 'DataType.FLOAT_VECTOR' \ + or str(field['type']) == 'DataType.FLOAT16_VECTOR' \ + or str(field['type']) == 'DataType.BFLOAT16_VECTOR': + if field['name'] != ct.default_float_vec_field_name: + vector_name_list.append(field['name']) + + return vector_name_list + + +def get_hybrid_search_base_results(search_res_dict_array): + """ + merge the element in the dicts array + search_res_dict_array : the dict array in which the elements to be merged + return: the sorted id and score answer + """ + # calculate hybrid search base line + search_res_dict_merge = {} + ids_answer = [] + score_answer = [] + for i in range(len(search_res_dict_array) - 1): + for key in search_res_dict_array[i]: + if search_res_dict_array[i + 1].get(key): + search_res_dict_merge[key] = search_res_dict_array[i][key] + search_res_dict_array[i + 1][key] + else: + search_res_dict_merge[key] = search_res_dict_array[i][key] + for key in search_res_dict_array[i + 1]: + if not search_res_dict_array[i].get(key): + search_res_dict_merge[key] = search_res_dict_array[i + 1][key] + sorted_list = sorted(search_res_dict_merge.items(), key=lambda x: x[1], reverse=True) + + for sort in sorted_list: + ids_answer.append(int(sort[0])) + score_answer.append(float(sort[1])) + + return ids_answer, score_answer + + +def gen_bf16_vectors(num, dim): + """ + generate brain float16 vector data + raw_vectors : the vectors + bf16_vectors: the bytes used for insert + return: raw_vectors and bf16_vectors + """ + raw_vectors = [] + bf16_vectors = [] + for _ in range(num): + raw_vector = [random.random() for _ in range(dim)] + raw_vectors.append(raw_vector) + # bf16_vector = np.array(raw_vector, dtype=tf.bfloat16).view(np.uint8).tolist() + bf16_vector = tf.cast(raw_vector, dtype=tf.bfloat16).numpy().view(np.uint8).tolist() + bf16_vectors.append(bytes(bf16_vector)) + + return raw_vectors, bf16_vectors + + +def gen_fp16_vectors(num, dim): + """ + generate float16 vector data + raw_vectors : the vectors + fp16_vectors: the bytes used for insert + return: raw_vectors and fp16_vectors + """ + raw_vectors = [] + fp16_vectors = [] + for _ in range(num): + raw_vector = [random.random() for _ in range(dim)] + raw_vectors.append(raw_vector) + fp16_vector = np.array(raw_vector, dtype=np.float16).view(np.uint8).tolist() + fp16_vectors.append(bytes(fp16_vector)) + + return raw_vectors, fp16_vectors + + +def gen_vectors_based_on_vector_type(num, dim, vector_data_type): + """ + generate float16 vector data + raw_vectors : the vectors + fp16_vectors: the bytes used for insert + return: raw_vectors and fp16_vectors + """ + if vector_data_type == "FLOAT_VECTOR": + vectors = [[random.random() for _ in range(dim)] for _ in range(num)] + elif vector_data_type == "FLOAT16_VECTOR": + vectors = gen_fp16_vectors(num, dim)[1] + elif vector_data_type == "BFLOAT16_VECTOR": + vectors = gen_bf16_vectors(num, dim)[1] + + return vectors \ No newline at end of file diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py index d836c2cd5a..3374046c09 100644 --- a/tests/python_client/common/common_type.py +++ b/tests/python_client/common/common_type.py @@ -44,6 +44,8 @@ default_int32_array_field_name = "int32_array" default_float_array_field_name = "float_array" default_string_array_field_name = "string_array" default_float_vec_field_name = "float_vector" +default_float16_vec_field_name = "float16_vector" +default_bfloat16_vec_field_name = "bfloat16_vector" another_float_vec_field_name = "float_vector1" default_binary_vec_field_name = "binary_vector" default_partition_name = "_default" @@ -81,6 +83,7 @@ default_db = "default" max_database_num = 64 max_collections_per_db = 65536 max_collection_num = 65536 +max_hybrid_search_req_num = 1024 IMAGE_REPOSITORY_MILVUS = "harbor.milvus.io/dockerhub/milvusdb/milvus" diff --git a/tests/python_client/requirements.txt b/tests/python_client/requirements.txt index d01827b97a..754b4466de 100644 --- a/tests/python_client/requirements.txt +++ b/tests/python_client/requirements.txt @@ -53,3 +53,6 @@ deepdiff==6.7.1 prettytable==3.8.0 pyarrow==14.0.1 fastparquet==2023.7.0 + +# for generating bfloat16 data +tensorflow==2.13.1 diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index c540abf54d..3aa42d9266 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -1252,6 +1252,14 @@ class TestIndexInvalid(TestcaseBase): Test create / describe / drop index interfaces with invalid collection names """ + @pytest.fixture(scope="function", params=["Trie", "STL_SORT", "INVERTED"]) + def scalar_index(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + def vector_data_type(self, request): + yield request.param + @pytest.fixture( scope="function", params=gen_invalid_strs() @@ -1346,6 +1354,107 @@ class TestIndexInvalid(TestcaseBase): check_items={ct.err_code: 1100, ct.err_msg: "create index on JSON field is not supported"}) + @pytest.mark.tags(CaseLabel.L1) + def test_create_scalar_index_on_vector_field(self, scalar_index, vector_data_type): + """ + target: test create scalar index on vector field + method: 1.create collection, and create index + expected: Raise exception + """ + collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, + dim=ct.default_dim, is_index=False, + vector_data_type=vector_data_type)[0:4] + scalar_index_params = {"index_type": scalar_index} + collection_w.create_index(ct.default_float_vec_field_name, index_params=scalar_index_params, + check_task=CheckTasks.err_res, + check_items={ct.err_code: 65535, + ct.err_msg: f"invalid index type: {scalar_index}"}) + + @pytest.mark.tags(CaseLabel.L1) + def test_create_scalar_index_on_binary_vector_field(self, scalar_index): + """ + target: test create scalar index on binary vector field + method: 1.create collection, and create index + expected: Raise exception + """ + collection_w = self.init_collection_general(prefix, is_binary=True, is_index=False)[0] + scalar_index_params = {"index_type": scalar_index} + collection_w.create_index(ct.default_binary_vec_field_name, index_params=scalar_index_params, + check_task=CheckTasks.err_res, + check_items={ct.err_code: 65535, + ct.err_msg: f"invalid index type: {scalar_index}"}) + + @pytest.mark.tags(CaseLabel.L1) + def test_create_inverted_index_on_json_field(self, vector_data_type): + """ + target: test create scalar index on json field + method: 1.create collection, and create index + expected: Raise exception + """ + collection_w = self.init_collection_general(prefix, is_index=False, vector_data_type=vector_data_type)[0] + scalar_index_params = {"index_type": "INVERTED"} + collection_w.create_index(ct.default_json_field_name, index_params=scalar_index_params, + check_task=CheckTasks.err_res, + check_items={ct.err_code: 1100, + ct.err_msg: "create index on JSON field is not supported"}) + + @pytest.mark.tags(CaseLabel.L1) + def test_create_inverted_index_on_array_field(self): + """ + target: test create scalar index on array field + method: 1.create collection, and create index + expected: Raise exception + """ + # 1. create a collection + schema = cf.gen_array_collection_schema() + collection_w = self.init_collection_wrap(schema=schema) + # 2. create index + scalar_index_params = {"index_type": "INVERTED"} + collection_w.create_index(ct.default_int32_array_field_name, index_params=scalar_index_params, + check_task=CheckTasks.err_res, + check_items={ct.err_code: 1100, + ct.err_msg: "create index on Array field is not supported"}) + + @pytest.mark.tags(CaseLabel.L1) + def test_create_inverted_index_no_vector_index(self): + """ + target: test create scalar index on array field + method: 1.create collection, and create index + expected: Raise exception + """ + # 1. create a collection + collection_w = self.init_collection_general(prefix, is_index=False)[0] + # 2. create index + scalar_index_params = {"index_type": "INVERTED"} + collection_w.create_index(ct.default_float_field_name, index_params=scalar_index_params) + collection_w.load(check_task=CheckTasks.err_res, + check_items={ct.err_code: 65535, + ct.err_msg: "there is no vector index on field: [float_vector], " + "please create index firstly"}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("scalar_index", ["STL_SORT", "INVERTED"]) + def test_create_inverted_index_no_all_vector_index(self, scalar_index): + """ + target: test create scalar index on array field + method: 1.create collection, and create index + expected: Raise exception + """ + # 1. create a collection + multiple_dim_array = [ct.default_dim, ct.default_dim] + collection_w = self.init_collection_general(prefix, is_index=False, multiple_dim_array=multiple_dim_array)[0] + # 2. create index + scalar_index_params = {"index_type": scalar_index} + collection_w.create_index(ct.default_float_field_name, index_params=scalar_index_params) + vector_name_list = cf.extract_vector_field_name_list(collection_w) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "L2"} + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + collection_w.load(check_task=CheckTasks.err_res, + check_items={ct.err_code: 65535, + ct.err_msg: f"there is no vector index on field: " + f"[{vector_name_list[0]} {vector_name_list[1]}], " + f"please create index firstly"}) + @pytest.mark.tags(CaseLabel.GPU) class TestNewIndexAsync(TestcaseBase): @@ -2024,3 +2133,100 @@ class TestScaNNIndex(TestcaseBase): ct.err_msg: f"dimension must be able to be divided by 2, dimension: {dim}"} collection_w.create_index(default_field_name, index_params, check_task=CheckTasks.err_res, check_items=error) + + +@pytest.mark.tags(CaseLabel.GPU) +class TestInvertedIndexValid(TestcaseBase): + """ + Test create / describe / drop index interfaces with inverted index + """ + + @pytest.fixture(scope="function", params=["Trie", "STL_SORT", "INVERTED"]) + def scalar_index(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + def vector_data_type(self, request): + yield request.param + + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("scalar_field_name", [ct.default_int8_field_name, ct.default_int16_field_name, + ct.default_int32_field_name, ct.default_int64_field_name, + ct.default_float_field_name, ct.default_double_field_name, + ct.default_string_field_name, ct.default_bool_field_name]) + def test_create_inverted_index_on_all_supported_scalar_field(self, scalar_field_name): + """ + target: test create scalar index all supported scalar field + method: 1.create collection, and create index + expected: create index successfully + """ + collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False, is_all_data_type=True)[0] + scalar_index_params = {"index_type": "INVERTED"} + index_name = "scalar_index_name" + collection_w.create_index(scalar_field_name, index_params=scalar_index_params, index_name=index_name) + assert collection_w.has_index(index_name=index_name)[0] is True + index_list = self.utility_wrap.list_indexes(collection_w.name)[0] + assert index_name in index_list + collection_w.flush() + result = self.utility_wrap.index_building_progress(collection_w.name, index_name)[0] + # assert False + start = time.time() + while True: + time.sleep(1) + res, _ = self.utility_wrap.index_building_progress(collection_w.name, index_name) + if 0 < res['indexed_rows'] <= default_nb: + break + if time.time() - start > 5: + raise MilvusException(1, f"Index build completed in more than 5s") + + @pytest.mark.tags(CaseLabel.L2) + def test_create_multiple_inverted_index(self): + """ + target: test create multiple scalar index + method: 1.create collection, and create index + expected: create index successfully + """ + collection_w = self.init_collection_general(prefix, is_index=False, is_all_data_type=True)[0] + scalar_index_params = {"index_type": "INVERTED"} + index_name = "scalar_index_name_0" + collection_w.create_index(ct.default_int8_field_name, index_params=scalar_index_params, index_name=index_name) + assert collection_w.has_index(index_name=index_name)[0] is True + index_name = "scalar_index_name_1" + collection_w.create_index(ct.default_int32_field_name, index_params=scalar_index_params, index_name=index_name) + assert collection_w.has_index(index_name=index_name)[0] is True + + @pytest.mark.tags(CaseLabel.L2) + def test_create_all_inverted_index(self): + """ + target: test create multiple scalar index + method: 1.create collection, and create index + expected: create index successfully + """ + collection_w = self.init_collection_general(prefix, is_index=False, is_all_data_type=True)[0] + scalar_index_params = {"index_type": "INVERTED"} + scalar_fields = [ct.default_int8_field_name, ct.default_int16_field_name, + ct.default_int32_field_name, ct.default_int64_field_name, + ct.default_float_field_name, ct.default_double_field_name, + ct.default_string_field_name, ct.default_bool_field_name] + for i in range(len(scalar_fields)): + index_name = f"scalar_index_name_{i}" + collection_w.create_index(scalar_fields[i], index_params=scalar_index_params, index_name=index_name) + assert collection_w.has_index(index_name=index_name)[0] is True + + @pytest.mark.tags(CaseLabel.L2) + def test_create_all_scalar_index(self): + """ + target: test create multiple scalar index + method: 1.create collection, and create index + expected: create index successfully + """ + collection_w = self.init_collection_general(prefix, is_index=False, is_all_data_type=True)[0] + scalar_index = ["Trie", "STL_SORT", "INVERTED"] + scalar_fields = [ct.default_string_field_name, ct.default_int16_field_name, + ct.default_int32_field_name] + for i in range(len(scalar_fields)): + index_name = f"scalar_index_name_{i}" + scalar_index_params = {"index_type": f"{scalar_index[i]}"} + collection_w.create_index(scalar_fields[i], index_params=scalar_index_params, index_name=index_name) + assert collection_w.has_index(index_name=index_name)[0] is True diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index d19c1e6ea7..0bc7dfde36 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -1333,12 +1333,14 @@ class TestQueryParams(TestcaseBase): assert set(res[0].keys()) == {ct.default_int64_field_name, ct.default_float_field_name} @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue 30437") def test_query_output_all_fields(self, enable_dynamic_field, random_primary_key): """ target: test query with none output field method: query with output field=None expected: return all fields """ + enable_dynamic_field = False # 1. initialize with data collection_w, df, _, insert_ids = \ self.init_collection_general(prefix, True, nb=10, is_all_data_type=True, @@ -1347,7 +1349,8 @@ class TestQueryParams(TestcaseBase): all_fields = [ct.default_int64_field_name, ct.default_int32_field_name, ct.default_int16_field_name, ct.default_int8_field_name, ct.default_bool_field_name, ct.default_float_field_name, ct.default_double_field_name, ct.default_string_field_name, ct.default_json_field_name, - ct.default_float_vec_field_name] + ct.default_float_vec_field_name, ct.default_float16_vec_field_name, + ct.default_bfloat16_vec_field_name] if enable_dynamic_field: res = df[0][:2] else: diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index d1ec0d359c..39ac56dece 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -1,5 +1,6 @@ import numpy as np from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY +from pymilvus import AnnSearchRequest, RRFRanker, WeightedRanker from common.constants import * from utils.util_pymilvus import * from common.common_type import CaseLabel, CheckTasks @@ -26,6 +27,7 @@ search_num = 10 max_dim = ct.max_dim min_dim = ct.min_dim epsilon = ct.epsilon +hybrid_search_epsilon = 0.01 gracefulTime = ct.gracefulTime default_nb = ct.default_nb default_nb_medium = ct.default_nb_medium @@ -63,6 +65,7 @@ default_query, _ = gen_search_vectors_params(field_name, entities, default_top_k index_name1 = cf.gen_unique_str("float") index_name2 = cf.gen_unique_str("varhar") half_nb = ct.default_nb // 2 +max_hybrid_search_req_num = ct.max_hybrid_search_req_num class TestCollectionSearchInvalid(TestcaseBase): @@ -297,6 +300,7 @@ class TestCollectionSearchInvalid(TestcaseBase): "by name: %s not found" % invalid_search_field}) @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue 30356") def test_search_param_invalid_metric_type(self, get_invalid_metric_type): """ target: test search with invalid parameter values @@ -313,7 +317,27 @@ class TestCollectionSearchInvalid(TestcaseBase): default_limit, default_search_exp, check_task=CheckTasks.err_res, check_items={"err_code": 65535, - "err_msg": "collection not loaded"}) + "err_msg": "metric type not match"}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue 30356") + def test_search_param_metric_type_not_match(self): + """ + target: test search with invalid parameter values + method: search with invalid metric type + expected: raise exception and report the error + """ + # 1. initialize with data + collection_w = self.init_collection_general(prefix)[0] + # 2. search with invalid metric_type + log.info("test_search_param_metric_type_not_match: searching with not matched metric_type") + search_params = {"metric_type": "L2", "params": {"nprobe": 10}} + collection_w.search(vectors[:default_nq], default_search_field, search_params, + default_limit, default_search_exp, + check_task=CheckTasks.err_res, + check_items={"err_code": 65535, + "err_msg": "metric type not match: invalid parameter" + "[expected=COSINE][actual=L2]"}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip("issue #29020") @@ -503,8 +527,7 @@ class TestCollectionSearchInvalid(TestcaseBase): expected: raise exception and report the error """ # 1. initialize with data - collection_w = self.init_collection_general( - prefix, is_all_data_type=True)[0] + collection_w = self.init_collection_general(prefix, is_all_data_type=True)[0] # 2 search with invalid bool expr invalid_search_expr_bool = f"{default_bool_field_name} == {get_invalid_expr_bool_value}" log.info("test_search_param_invalid_expr_bool: searching with " @@ -1103,6 +1126,7 @@ class TestCollectionSearchInvalid(TestcaseBase): "err_msg": f"`round_decimal` value {round_decimal} is illegal"}) @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.xfail(reason="issue 30365") def test_range_search_invalid_radius(self, get_invalid_range_search_paras): """ target: test range search with invalid radius @@ -1121,10 +1145,11 @@ class TestCollectionSearchInvalid(TestcaseBase): range_search_params, default_limit, default_search_exp, check_task=CheckTasks.err_res, - check_items={"err_code": 65535, - "err_msg": "collection not loaded"}) + check_items={"err_code": 1, + "err_msg": "type must be number"}) @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.xfail(reason="issue 30365") def test_range_search_invalid_range_filter(self, get_invalid_range_search_paras): """ target: test range search with invalid range_filter @@ -1132,7 +1157,12 @@ class TestCollectionSearchInvalid(TestcaseBase): expected: raise exception and report the error """ # 1. initialize with data - collection_w = self.init_collection_general(prefix)[0] + collection_w = self.init_collection_general(prefix, is_index=False)[0] + # 2. create index + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "L2"} + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + # 3. load + collection_w.load() # 2. range search log.info("test_range_search_invalid_range_filter: Range searching collection %s" % collection_w.name) @@ -1143,10 +1173,11 @@ class TestCollectionSearchInvalid(TestcaseBase): range_search_params, default_limit, default_search_exp, check_task=CheckTasks.err_res, - check_items={"err_code": 65535, - "err_msg": "collection not loaded"}) + check_items={"err_code": 1, + "err_msg": "type must be number"}) @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue 30365") def test_range_search_invalid_radius_range_filter_L2(self): """ target: test range search with invalid radius and range_filter for L2 @@ -1154,8 +1185,13 @@ class TestCollectionSearchInvalid(TestcaseBase): expected: raise exception and report the error """ # 1. initialize with data - collection_w = self.init_collection_general(prefix)[0] - # 2. range search + collection_w = self.init_collection_general(prefix, is_index=False)[0] + # 2. create index + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "L2"} + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + # 3. load + collection_w.load() + # 4. range search log.info("test_range_search_invalid_radius_range_filter_L2: Range searching collection %s" % collection_w.name) range_search_params = {"metric_type": "L2", "params": {"nprobe": 10, "radius": 1, "range_filter": 10}} @@ -1164,9 +1200,10 @@ class TestCollectionSearchInvalid(TestcaseBase): default_search_exp, check_task=CheckTasks.err_res, check_items={"err_code": 65535, - "err_msg": "collection not loaded"}) + "err_msg": "range_filter must less than radius except IP"}) @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue 30365") def test_range_search_invalid_radius_range_filter_IP(self): """ target: test range search with invalid radius and range_filter for IP @@ -1174,8 +1211,13 @@ class TestCollectionSearchInvalid(TestcaseBase): expected: raise exception and report the error """ # 1. initialize with data - collection_w = self.init_collection_general(prefix)[0] - # 2. range search + collection_w = self.init_collection_general(prefix, is_index=False)[0] + # 2. create index + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "IP"} + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + # 3. load + collection_w.load() + # 4. range search log.info("test_range_search_invalid_radius_range_filter_IP: Range searching collection %s" % collection_w.name) range_search_params = {"metric_type": "IP", @@ -1185,7 +1227,7 @@ class TestCollectionSearchInvalid(TestcaseBase): default_search_exp, check_task=CheckTasks.err_res, check_items={"err_code": 65535, - "err_msg": "collection not loaded"}) + "err_msg": "range_filter must more than radius when IP"}) @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip(reason="annoy not supported any more") @@ -1360,6 +1402,14 @@ class TestCollectionSearch(TestcaseBase): def random_primary_key(self, request): yield request.param + @pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + def vector_data_type(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["STL_SORT", "INVERTED"]) + def scalar_index(self, request): + yield request.param + """ ****************************************************************** # The following are valid base cases @@ -1367,7 +1417,7 @@ class TestCollectionSearch(TestcaseBase): """ @pytest.mark.tags(CaseLabel.L0) - def test_search_normal(self, nq, dim, auto_id, is_flush, enable_dynamic_field): + def test_search_normal(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type): """ target: test search normal case method: create connection, collection, insert and search @@ -1376,9 +1426,11 @@ class TestCollectionSearch(TestcaseBase): # 1. initialize with data collection_w, _, _, insert_ids, time_stamp = \ self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, - enable_dynamic_field=enable_dynamic_field)[0:5] - vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. search after insert + enable_dynamic_field=enable_dynamic_field, + vector_data_type=vector_data_type)[0:5] + # 2. generate search data + vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) + # 3. search after insert collection_w.search(vectors[:nq], default_search_field, default_search_params, default_limit, default_search_exp, @@ -1472,6 +1524,34 @@ class TestCollectionSearch(TestcaseBase): # verify that top 1 hit is itself,so min distance is 0 assert 1.0 - hits.distances[0] <= epsilon + @pytest.mark.tags(CaseLabel.L2) + def test_search_multi_vector_fields(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type): + """ + target: test search normal case + method: create connection, collection, insert and search + expected: 1. search successfully with limit(topK) + """ + # 1. initialize with data + multiple_dim_array = [dim, dim] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, + enable_dynamic_field=enable_dynamic_field, + multiple_dim_array=multiple_dim_array, + vector_data_type=vector_data_type)[0:5] + # 2. generate search data + vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(default_search_field) + # 3. search after insert + for search_field in vector_name_list: + collection_w.search(vectors[:nq], search_field, + default_search_params, default_limit, + default_search_exp, + check_task=CheckTasks.check_search_results, + check_items={"nq": nq, + "ids": insert_ids, + "limit": default_limit}) + @pytest.mark.tags(CaseLabel.L1) def test_search_random_primary_key(self, random_primary_key): """ @@ -2140,7 +2220,8 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:7], ct.default_index_params[:7])) - def test_search_after_different_index_with_params(self, dim, index, params, auto_id, _async, enable_dynamic_field): + def test_search_after_different_index_with_params(self, dim, index, params, auto_id, _async, enable_dynamic_field, + scalar_index): """ target: test search after different index method: test search after different index and corresponding search params @@ -2149,10 +2230,11 @@ class TestCollectionSearch(TestcaseBase): # 1. initialize with data collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, partition_num=1, + is_all_data_type=True, auto_id=auto_id, dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] - # 2. create index and load + # 2. create index on vector field and load if params.get("m"): if (dim % params["m"]) != 0: params["m"] = dim // 4 @@ -2160,9 +2242,15 @@ class TestCollectionSearch(TestcaseBase): if (dim % params["PQM"]) != 0: params["PQM"] = dim // 4 default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} - collection_w.create_index("float_vector", default_index) + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + for vector_name in vector_name_list: + collection_w.create_index(vector_name, default_index) + # 3. create index on scalar field + scalar_index_params = {"index_type":scalar_index, "params": {}} + collection_w.create_index(ct.default_int64_field_name, scalar_index_params) collection_w.load() - # 3. search + # 4. search search_params = cf.gen_search_param(index, "COSINE") vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] for search_param in search_params: @@ -2751,7 +2839,9 @@ class TestCollectionSearch(TestcaseBase): dim=dim, is_index=False, is_flush=is_flush)[0:5] - # 2. create index + # 2. create index on sclalar and vector field + default_index = {"index_type": "INVERTED", "params": {}} + collection_w.create_index(ct.default_float_field_name, default_index) default_index = {"index_type": index, "params": { "nlist": 128}, "metric_type": "JACCARD"} collection_w.create_index("binary_vector", default_index) @@ -3029,9 +3119,12 @@ class TestCollectionSearch(TestcaseBase): dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:4] - # 2. create index + # 2. create index and load + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) index_param = {"index_type": "FLAT", "metric_type": "COSINE", "params": {"nlist": 100}} - collection_w.create_index("float_vector", index_param) + for vector_name in vector_name_list: + collection_w.create_index(vector_name, index_param) collection_w.load() # 3. filter result with expression in collection @@ -3281,7 +3374,11 @@ class TestCollectionSearch(TestcaseBase): collection_w = cf.insert_data(collection_w, is_all_data_type=True, insert_offset=offset-1000)[0] # 2. create index and load - collection_w.create_index(field_name, default_index_params) + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + index_param = {"index_type": "FLAT", "metric_type": "COSINE", "params": {"nlist": 100}} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, index_param) collection_w.load() # 3. search using expression which field value is out of bound @@ -3663,7 +3760,7 @@ class TestCollectionSearch(TestcaseBase): collection_w, _vectors = self.init_collection_general(prefix, True, enable_dynamic_field=enable_dynamic_field)[:2] - # 2. search with output field vector + # search with output field vector output_fields = [default_float_field_name, default_string_field_name, default_search_field] original_entities = [] if enable_dynamic_field: @@ -5083,6 +5180,40 @@ class TestSearchString(TestcaseBase): "limit": default_limit, "_async": _async}) + @pytest.mark.tags(CaseLabel.L2) + def test_search_string_field_is_primary_true_multi_vector_fields(self, dim, _async, enable_dynamic_field): + """ + target: test search with string expr and string field is primary + method: create collection and insert data + create index and collection load + collection search uses string expr in string field ,string field is primary + expected: Search successfully + """ + # 1. initialize with data + multiple_dim_array = [dim, dim] + collection_w, _, _, insert_ids = \ + self.init_collection_general(prefix, True, dim=dim, primary_field=ct.default_string_field_name, + enable_dynamic_field=enable_dynamic_field, + multiple_dim_array=multiple_dim_array)[0:4] + # 2. search + log.info("test_search_string_field_is_primary_true: searching collection %s" % + collection_w.name) + vectors = [[random.random() for _ in range(dim)] + for _ in range(default_nq)] + output_fields = [default_string_field_name, default_float_field_name] + vector_list = cf.extract_vector_field_name_list(collection_w) + for search_field in vector_list: + collection_w.search(vectors[:default_nq], search_field, + default_search_params, default_limit, + default_search_string_exp, + output_fields=output_fields, + _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": default_limit, + "_async": _async}) + @pytest.mark.tags(CaseLabel.L2) def test_range_search_string_field_is_primary_true(self, dim, _async, enable_dynamic_field): """ @@ -5093,10 +5224,15 @@ class TestSearchString(TestcaseBase): expected: Search successfully """ # 1. initialize with data + multiple_dim_array = [dim, dim] collection_w, _, _, insert_ids = \ self.init_collection_general(prefix, True, dim=dim, primary_field=ct.default_string_field_name, - enable_dynamic_field=enable_dynamic_field, is_index=False)[0:4] + enable_dynamic_field=enable_dynamic_field, is_index=False, + multiple_dim_array=multiple_dim_array)[0:4] + vector_list = cf.extract_vector_field_name_list(collection_w) collection_w.create_index(field_name, {"metric_type": "L2"}) + for vector_field_name in vector_list: + collection_w.create_index(vector_field_name, {"metric_type": "L2"}) collection_w.load() # 2. search log.info("test_search_string_field_is_primary_true: searching collection %s" % @@ -5106,16 +5242,17 @@ class TestSearchString(TestcaseBase): vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)] output_fields = [default_string_field_name, default_float_field_name] - collection_w.search(vectors[:default_nq], default_search_field, - range_search_params, default_limit, - default_search_string_exp, - output_fields=output_fields, - _async=_async, - check_task=CheckTasks.check_search_results, - check_items={"nq": default_nq, - "ids": insert_ids, - "limit": default_limit, - "_async": _async}) + for search_field in vector_list: + collection_w.search(vectors[:default_nq], search_field, + range_search_params, default_limit, + default_search_string_exp, + output_fields=output_fields, + _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": default_limit, + "_async": _async}) @pytest.mark.tags(CaseLabel.L2) def test_search_string_mix_expr(self, dim, auto_id, _async, enable_dynamic_field): @@ -5366,6 +5503,45 @@ class TestSearchString(TestcaseBase): "_async": _async} ) + @pytest.mark.tags(CaseLabel.L2) + def test_search_string_field_index(self, auto_id, _async): + """ + target: test search with string expr and string field is not primary + method: create collection and insert data + create index and collection load + collection search uses string expr in string field, string field is not primary + expected: Search successfully + """ + # 1. initialize with data + collection_w, _, _, insert_ids = \ + self.init_collection_general( + prefix, True, auto_id=auto_id, dim=default_dim, is_index=False)[0:4] + index_param = {"index_type": "IVF_FLAT", + "metric_type": "L2", "params": {"nlist": 100}} + collection_w.create_index("float_vector", index_param, index_name="a") + index_param = {"index_type": "Trie", "params": {}} + collection_w.create_index("varchar", index_param, index_name="b") + collection_w.load() + # 2. search + log.info("test_search_string_field_not_primary: searching collection %s" % + collection_w.name) + vectors = [[random.random() for _ in range(default_dim)] + for _ in range(default_nq)] + output_fields = [default_float_field_name, default_string_field_name] + collection_w.search(vectors[:default_nq], default_search_field, + # search all buckets + {"metric_type": "L2", "params": { + "nprobe": 100}}, default_limit, + perfix_expr, + output_fields=output_fields, + _async=_async, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": 1, + "_async": _async} + ) + @pytest.mark.tags(CaseLabel.L1) def test_search_all_index_with_compare_expr(self, _async): """ @@ -6537,6 +6713,50 @@ class TestCollectionRangeSearch(TestcaseBase): # distances_tmp = list(hits.distances) # assert distances_tmp.count(1.0) == 1 + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("range_filter", [1000, 1000.0]) + @pytest.mark.parametrize("radius", [0, 0.0]) + def test_range_search_multi_vector_fields(self, nq, dim, auto_id, is_flush, radius, range_filter, enable_dynamic_field): + """ + target: test range search normal case + method: create connection, collection, insert and search + expected: search successfully with limit(topK) + """ + # 1. initialize with data + multiple_dim_array = [dim, dim] + collection_w, _vectors, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, + enable_dynamic_field=enable_dynamic_field, + multiple_dim_array=multiple_dim_array)[0:5] + # 2. get vectors that inserted into collection + vectors = [] + if enable_dynamic_field: + for vector in _vectors[0]: + vector = vector[ct.default_float_vec_field_name] + vectors.append(vector) + else: + vectors = np.array(_vectors[0]).tolist() + vectors = [vectors[i][-1] for i in range(nq)] + # 3. range search + range_search_params = {"metric_type": "COSINE", "params": {"radius": radius, + "range_filter": range_filter}} + vector_list = cf. extract_vector_field_name_list(collection_w) + vector_list.append(default_search_field) + for search_field in vector_list: + search_res = collection_w.search(vectors[:nq], search_field, + range_search_params, default_limit, + default_search_exp, + check_task=CheckTasks.check_search_results, + check_items={"nq": nq, + "ids": insert_ids, + "limit": default_limit})[0] + log.info("test_range_search_normal: checking the distance of top 1") + for hits in search_res: + # verify that top 1 hit is itself,so min distance is 1.0 + assert abs(hits.distances[0] - 1.0) <= epsilon + # distances_tmp = list(hits.distances) + # assert distances_tmp.count(1.0) == 1 + @pytest.mark.tags(CaseLabel.L1) def test_range_search_cosine(self): """ @@ -9592,8 +9812,15 @@ class TestSearchGroupBy(TestcaseBase): """ collection_w = self.init_collection_general(prefix, auto_id=True, insert_data=False, is_index=False, is_all_data_type=True, with_json=False)[0] + + # create index and load + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) _index = {"index_type": "HNSW", "metric_type": metric, "params": {"M": 16, "efConstruction": 128}} - collection_w.create_index(ct.default_float_vec_field_name, index_params=_index) + for vector_name in vector_name_list: + collection_w.create_index(vector_name, _index) + collection_w.load() + # insert with the same values for scalar fields for _ in range(30): data = cf.gen_dataframe_all_data_type(nb=100, auto_id=True, with_json=False) @@ -9731,7 +9958,10 @@ class TestSearchGroupBy(TestcaseBase): collection_w = self.init_collection_general(prefix, auto_id=True, insert_data=False, is_index=False, is_all_data_type=True, with_json=False)[0] _index = {"index_type": "HNSW", "metric_type": metric, "params": {"M": 16, "efConstruction": 128}} - collection_w.create_index(ct.default_float_vec_field_name, index_params=_index) + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + for vector_name in vector_name_list: + collection_w.create_index(vector_name, _index) # insert with the same values(by insert rounds) for scalar fields for _ in range(100): data = cf.gen_dataframe_all_data_type(nb=100, auto_id=True, with_json=False) @@ -9790,7 +10020,10 @@ class TestSearchGroupBy(TestcaseBase): collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False, is_all_data_type=True, with_json=True,)[0] _index = {"index_type": "HNSW", "metric_type": metric, "params": {"M": 16, "efConstruction": 128}} - collection_w.create_index(ct.default_float_vec_field_name, index_params=_index) + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + for vector_name in vector_name_list: + collection_w.create_index(vector_name, _index) collection_w.load() search_params = {"metric_type": metric, "params": {"ef": 128}} @@ -9826,7 +10059,10 @@ class TestSearchGroupBy(TestcaseBase): collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False, is_all_data_type=True, with_json=False)[0] index_params = {"index_type": index, "params": params, "metric_type": metric} - collection_w.create_index(ct.default_float_vec_field_name, index_params) + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + for vector_name in vector_name_list: + collection_w.create_index(vector_name, index_params) collection_w.load() search_params = {"params": {}} @@ -9858,9 +10094,12 @@ class TestSearchGroupBy(TestcaseBase): """ metric = "IP" collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False, - is_all_data_type=True, with_json=True, )[0] + is_all_data_type=True, with_json=True)[0] _index = {"index_type": "HNSW", "metric_type": metric, "params": {"M": 16, "efConstruction": 128}} - collection_w.create_index(ct.default_float_vec_field_name, index_params=_index) + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + for vector_name in vector_name_list: + collection_w.create_index(vector_name, _index) collection_w.load() search_params = {"metric_type": metric, "params": {"ef": 128}} @@ -9897,7 +10136,10 @@ class TestSearchGroupBy(TestcaseBase): data = cf.gen_dataframe_all_data_type(nb=100, auto_id=True, with_json=False) collection_w.insert(data) _index = {"index_type": "HNSW", "metric_type": metric, "params": {"M": 16, "efConstruction": 128}} - collection_w.create_index(ct.default_float_vec_field_name, index_params=_index) + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + for vector_name in vector_name_list: + collection_w.create_index(vector_name, _index) collection_w.load() # 2. search pagination with offset limit = 10 @@ -9948,7 +10190,10 @@ class TestSearchGroupBy(TestcaseBase): data = cf.gen_dataframe_all_data_type(nb=100, auto_id=True, with_json=False) collection_w.insert(data) _index = {"index_type": "HNSW", "metric_type": metric, "params": {"M": 16, "efConstruction": 128}} - collection_w.create_index(ct.default_float_vec_field_name, index_params=_index) + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + for vector_name in vector_name_list: + collection_w.create_index(vector_name, _index) collection_w.load() grpby_field = ct.default_int64_field_name @@ -9991,7 +10236,10 @@ class TestSearchGroupBy(TestcaseBase): collection_w.insert(data) collection_w.flush() - collection_w.create_index(ct.default_float_vec_field_name, index_params=_index) + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + for vector_name in vector_name_list: + collection_w.create_index(vector_name, _index) time.sleep(10) collection_w.load() @@ -10035,3 +10283,1428 @@ class TestSearchGroupBy(TestcaseBase): verify: search successfully """ pass + + +class TestCollectionHybridSearchValid(TestcaseBase): + """ Test case of search interface """ + + @pytest.fixture(scope="function", params=[default_nb_medium]) + def nb(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[32, 128]) + def dim(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[False, True]) + def auto_id(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[False, True]) + def _async(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["JACCARD", "HAMMING"]) + def metrics(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[False, True]) + def is_flush(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[True, False]) + def enable_dynamic_field(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["IP", "COSINE", "L2"]) + def metric_type(self, request): + yield request.param + + @pytest.fixture(scope="function", params=[True, False]) + def random_primary_key(self, request): + yield request.param + + @pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) + def vector_data_type(self, request): + yield request.param + + """ + ****************************************************************** + # The following are valid base cases for hybrid_search + ****************************************************************** + """ + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("offset", [1, 5]) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_normal(self, dim, auto_id, is_flush, enable_dynamic_field, offset, + primary_field, vector_data_type): + """ + target: test hybrid search normal case + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + multiple_dim_array = [dim, dim] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, + primary_field=primary_field, + enable_dynamic_field=enable_dynamic_field, + multiple_dim_array=multiple_dim_array, + vector_data_type=vector_data_type)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. prepare search params + req_list = [] + weights = [0.2, 0.3, 0.5] + search_res_dict_array = [] + for i in range(len(vector_name_list)): + # 4. generate search data + vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type) + search_res_dict = {} + search_param = { + "data": vectors, + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE"}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 5. search to get the base line of hybrid_search + search_res = collection_w.search(vectors[:1], vector_name_list[i], + default_search_params, default_limit, + default_search_exp, + offset = offset, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + ids = search_res[0].ids + distance_array = [distance_single * weights[i] for distance_single in search_res[0].distances] + for j in range(len(ids)): + search_res_dict[ids[j]] = distance_array[j] + search_res_dict_array.append(search_res_dict) + # 6. calculate hybrid search base line + ids_answer, score_answer = cf.get_hybrid_search_base_results(search_res_dict_array) + # 7. hybrid search + hybrid_res = collection_w.hybrid_search(req_list, WeightedRanker(*weights), default_limit, + offset = offset, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + # 8. compare results through the re-calculated distances + for i in range(len(score_answer[:default_limit])): + assert score_answer[i] - hybrid_res[0].distances[i] < hybrid_search_epsilon + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_WeightedRanker_empty_reqs(self, primary_field): + """ + target: test hybrid search normal case + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, primary_field=primary_field, + multiple_dim_array=[default_dim, default_dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. hybrid search with empty reqs + collection_w.hybrid_search([], WeightedRanker(), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 0}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.xfail(reason="issue 29839") + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_as_search(self, primary_field, dim, auto_id, is_flush, enable_dynamic_field): + """ + target: test hybrid search to search as the original search interface + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK), and the result should be equal to search + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, + primary_field=primary_field, + enable_dynamic_field=False, multiple_dim_array=[dim, dim])[0:5] + + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + vectors = [[random.random() for _ in range(dim)] for _ in range(1)] + for search_field in vector_name_list: + # 2. prepare search params + req_list = [] + search_param = { + "data": vectors, + "anns_field": search_field, + "param": {"metric_type": "COSINE"}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 3. hybrid search + hybrid_res = collection_w.hybrid_search(req_list, WeightedRanker(1), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + search_res = collection_w.search(vectors[:1], search_field, + default_search_params, default_limit, + default_search_exp, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + # 4. the effect of hybrid search to one field should equal to search + log.info("The distance list is:\n") + log.info(hybrid_res[0].distances) + log.info(search_res[0].distances) + assert hybrid_res[0].ids == search_res[0].ids + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_different_metric_type(self, primary_field, dim, auto_id, is_flush, + enable_dynamic_field, metric_type): + """ + target: test hybrid search for fields with different metric type + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, is_index=False, + primary_field=primary_field, + enable_dynamic_field=False, multiple_dim_array=[dim, dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": metric_type} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.load() + # 3. prepare search params + req_list = [] + for vector_name in vector_name_list: + search_param = { + "data": [[random.random() for _ in range(dim)] for _ in range(1)], + "anns_field": vector_name, + "param": {"metric_type": metric_type, "offset": 0}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9, 1), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + @pytest.mark.xfail(reason="issue 29923") + def test_hybrid_search_different_dim(self, primary_field, dim, enable_dynamic_field, metric_type): + """ + target: test hybrid search for fields with different dim + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + default_limit = 100 + # 1. initialize collection with data + multiple_dim_array = [dim + dim, dim - 10] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=dim, is_index=False, primary_field=primary_field, + enable_dynamic_field=False, multiple_dim_array=multiple_dim_array)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": metric_type} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + collection_w.load() + # 3. prepare search params + req_list = [] + for i in range(len(vector_name_list)): + search_param = { + "data": [[random.random() for _ in range(multiple_dim_array[i])] for _ in range(1)], + "anns_field": vector_name_list[i], + "param": {"metric_type": metric_type, "offset": 0}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search + hybrid_search_0 = collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + hybrid_search_1 = collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + log.debug(hybrid_search_0[0].ids ) + log.debug(hybrid_search_1[0].ids) + log.debug(hybrid_search_0[0].distances) + log.debug(hybrid_search_1[0].distances) + assert hybrid_search_0[0].ids == hybrid_search_1[0].ids + assert hybrid_search_0[0].distances == hybrid_search_1[0].distances + + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_overall_limit_larger_sum_each_limit(self, primary_field, dim, + enable_dynamic_field, metric_type): + """ + target: test hybrid search: overall limit which is larger than sum of each limit + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + multiple_dim_array = [dim + dim, dim - 10] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=dim, is_index=False, primary_field=primary_field, + enable_dynamic_field=False, multiple_dim_array=multiple_dim_array)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": metric_type} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + collection_w.load() + # 3. prepare search params + req_list = [] + for i in range(len(vector_name_list)): + search_param = { + "data": [[random.random() for _ in range(multiple_dim_array[i])] for _ in range(1)], + "anns_field": vector_name_list[i], + "param": {"metric_type": metric_type, "offset": 0}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9), default_limit*len(req_list)+1, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit*len(req_list)}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_overall_different_limit(self, primary_field, dim, enable_dynamic_field, metric_type): + """ + target: test hybrid search with different limit params + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + multiple_dim_array = [dim + dim, dim - 10] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=dim, is_index=False, primary_field=primary_field, + enable_dynamic_field=False, multiple_dim_array=multiple_dim_array)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": metric_type} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + collection_w.load() + # 3. prepare search params + req_list = [] + for i in range(len(vector_name_list)): + search_param = { + "data": [[random.random() for _ in range(multiple_dim_array[i])] for _ in range(1)], + "anns_field": vector_name_list[i], + "param": {"metric_type": metric_type, "offset": 0}, + "limit": default_limit - i, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_min_limit(self, primary_field, dim, enable_dynamic_field, metric_type): + """ + target: test hybrid search with minimum limit params + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + multiple_dim_array = [dim + dim, dim - 10] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=dim, is_index=False, primary_field=primary_field, + enable_dynamic_field=False, multiple_dim_array=multiple_dim_array)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": metric_type} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + collection_w.load() + # 3. prepare search params + req_list = [] + for i in range(len(vector_name_list)): + search_param = { + "data": [[random.random() for _ in range(multiple_dim_array[i])] for _ in range(1)], + "anns_field": vector_name_list[i], + "param": {"metric_type": metric_type, "offset": 0}, + "limit": min_dim, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": min_dim*len(vector_name_list)}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_max_limit(self, primary_field, dim, enable_dynamic_field, metric_type): + """ + target: test hybrid search with maximum limit params + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + multiple_dim_array = [dim + dim, dim - 10] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=dim, is_index=False, primary_field=primary_field, + enable_dynamic_field=False, multiple_dim_array=multiple_dim_array)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": metric_type} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + collection_w.load() + # 3. prepare search params + req_list = [] + for i in range(len(vector_name_list)): + search_param = { + "data": [[random.random() for _ in range(multiple_dim_array[i])] for _ in range(1)], + "anns_field": vector_name_list[i], + "param": {"metric_type": metric_type}, + "limit": max_dim, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_max_min_limit(self, primary_field, dim, enable_dynamic_field, metric_type): + """ + target: test hybrid search with maximum and minimum limit params + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + multiple_dim_array = [dim + dim, dim - 10] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=dim, is_index=False, primary_field=primary_field, + enable_dynamic_field=False, multiple_dim_array=multiple_dim_array)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": metric_type} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + collection_w.load() + # 3. prepare search params + req_list = [] + for i in range(len(vector_name_list)): + limit = max_dim + if i == 1: + limit = min_dim + search_param = { + "data": [[random.random() for _ in range(multiple_dim_array[i])] for _ in range(1)], + "anns_field": vector_name_list[i], + "param": {"metric_type": metric_type}, + "limit": limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_same_anns_field(self, primary_field, dim, enable_dynamic_field, metric_type): + """ + target: test hybrid search: multiple search on same anns field + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + multiple_dim_array = [dim, dim] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=dim, is_index=False, primary_field=primary_field, + enable_dynamic_field=False, multiple_dim_array=multiple_dim_array)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": metric_type} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + collection_w.load() + # 3. prepare search params + req_list = [] + for i in range(len(vector_name_list)): + search_param = { + "data": [[random.random() for _ in range(multiple_dim_array[i])] for _ in range(1)], + "anns_field": vector_name_list[0], + "param": {"metric_type": metric_type, "offset": 0}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_different_offset_single_field(self, primary_field, dim, auto_id, is_flush, + enable_dynamic_field, metric_type): + """ + target: test hybrid search for fields with different offset + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, is_index=False, + primary_field=primary_field, + enable_dynamic_field=False, multiple_dim_array=[dim, dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": metric_type} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.load() + # 3. prepare search params + req_list = [] + for i in range(len(vector_name_list)): + search_param = { + "data": [[random.random() for _ in range(dim)] for _ in range(1)], + "anns_field": vector_name_list[i], + "param": {"metric_type": metric_type, "offset": i}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9, 1), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_max_reqs_num(self, primary_field, dim, enable_dynamic_field): + """ + target: test hybrid search with maximum reqs number + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + multiple_dim_array = [dim, dim] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=dim, is_index=False, primary_field=primary_field, + enable_dynamic_field=False, multiple_dim_array=multiple_dim_array)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "COSINE"} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + collection_w.load() + reqs_max_num = max_hybrid_search_req_num + # 3. prepare search params + req_list = [] + for i in range(reqs_max_num): + search_param = { + "data": [[random.random() for _ in range(dim)] for _ in range(1)], + "anns_field": default_search_field, + "param": {"metric_type": "COSINE"}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + weights = [random.random() for _ in range(len(req_list))] + log.info(weights) + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(*weights), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_WeightedRanker_different_parameters(self, primary_field, dim, auto_id, is_flush, + enable_dynamic_field, metric_type): + """ + target: test hybrid search for fields with different offset + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, is_index=False, + primary_field=primary_field, + enable_dynamic_field=False, multiple_dim_array=[dim, dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": metric_type} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.load() + # 3. prepare search params + req_list = [] + for i in range(len(vector_name_list)): + search_param = { + "data": [[random.random() for _ in range(dim)] for _ in range(1)], + "anns_field": vector_name_list[i], + "param": {"metric_type": metric_type, "offset": i}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(0.2, 0.03, 0.9), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.skip("issue: #29840") + def test_hybrid_search_invalid_WeightedRanker_params(self): + """ + target: test hybrid search with invalid params type to WeightedRanker + method: create connection, collection, insert and search + expected: raise exception + """ + # 1. initialize collection with data + multiple_dim_array = [default_dim, default_dim] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=default_dim, is_index=False, + multiple_dim_array=multiple_dim_array)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "COSINE"} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + collection_w.load() + reqs_num = 2 + # 3. prepare search params + req_list = [] + for i in range(reqs_num): + search_param = { + "data": [[random.random() for _ in range(default_dim)] for _ in range(1)], + "anns_field": default_search_field, + "param": {"metric_type": "COSINE"}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search with list in WeightedRanker + collection_w.hybrid_search(req_list, WeightedRanker([0.9, 0.1]), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit}) + # 5. hybrid search with two-dim list in WeightedRanker + weights = [[random.random() for _ in range(1)] for _ in range(len(req_list))] + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(*weights), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit}) + + @pytest.mark.tags(CaseLabel.L2) + def test_hybrid_search_over_maximum_reqs_num(self): + """ + target: test hybrid search over maximum reqs number + method: create connection, collection, insert and search + expected: raise exception + """ + # 1. initialize collection with data + multiple_dim_array = [default_dim, default_dim] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=default_dim, is_index=False, + multiple_dim_array=multiple_dim_array)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "COSINE"} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + collection_w.load() + reqs_max_num = max_hybrid_search_req_num + 1 + # 3. prepare search params + req_list = [] + for i in range(reqs_max_num): + search_param = { + "data": [[random.random() for _ in range(default_dim)] for _ in range(1)], + "anns_field": default_search_field, + "param": {"metric_type": "COSINE"}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + weights = [random.random() for _ in range(len(req_list))] + log.info(weights) + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(*weights), default_limit, + check_task=CheckTasks.err_res, + check_items={"err_code": 65535, + "err_msg": 'maximum of ann search requests is 1024'}) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_with_range_search(self, primary_field): + """ + target: test hybrid search with range search + method: create connection, collection, insert and search + expected: raise exception (not support yet) + """ + # 1. initialize collection with data + multiple_dim_array = [default_dim, default_dim] + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=default_dim, is_index=False, + primary_field=primary_field, + multiple_dim_array=multiple_dim_array)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "COSINE"} + for vector_name in vector_name_list: + collection_w.create_index(vector_name, flat_index) + collection_w.create_index(ct.default_float_vec_field_name, flat_index) + collection_w.load() + reqs_max_num = 2 + # 3. prepare search params + req_list = [] + for i in range(reqs_max_num): + search_param = { + "data": [[random.random() for _ in range(default_dim)] for _ in range(1)], + "anns_field": default_search_field, + "param": {"metric_type": "COSINE", "params": {"radius": 0, "range_filter": 1000}}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + weights = [random.random() for _ in range(len(req_list))] + log.info(weights) + # 4. hybrid search + collection_w.hybrid_search(req_list, WeightedRanker(*weights), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + @pytest.mark.xfail(reason="issue 29923") + def test_hybrid_search_RRFRanker_default_parameter(self, primary_field): + """ + target: test hybrid search with default value to RRFRanker + method: create connection, collection, insert and search. + Note: here the result check is through comparing the score, the ids could not be compared + because the high probability of the same score, then the id is not fixed in the range of + the same score + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=default_dim, primary_field=primary_field, + multiple_dim_array=[default_dim, default_dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. prepare search params for each vector field + req_list = [] + search_res_dict_array = [] + for i in range(len(vector_name_list)): + vectors = [[random.random() for _ in range(default_dim)] for _ in range(1)] + search_res_dict = {} + search_param = { + "data": vectors, + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE", "offset": 0}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # search for get the base line of hybrid_search + search_res = collection_w.search(vectors[:1], vector_name_list[i], + default_search_params, default_limit, + default_search_exp, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + ids = search_res[0].ids + for j in range(len(ids)): + search_res_dict[ids[j]] = 1/(j + 60 +1) + search_res_dict_array.append(search_res_dict) + # 4. calculate hybrid search base line for RRFRanker + ids_answer, score_answer = cf.get_hybrid_search_base_results(search_res_dict_array) + # 5. hybrid search + hybrid_search_0 = collection_w.hybrid_search(req_list, RRFRanker(), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + # 6. compare results through the re-calculated distances + for i in range(len(score_answer[:default_limit])): + assert score_answer[i] - hybrid_search_0[0].distances[i] < hybrid_search_epsilon + # 7. run hybrid search with the same parameters twice, and compare the results + hybrid_search_1 = collection_w.hybrid_search(req_list, RRFRanker(), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + + log.debug(hybrid_search_0[0].ids) + log.debug(hybrid_search_1[0].ids) + log.debug(hybrid_search_0[0].distances) + log.debug(hybrid_search_1[0].distances) + assert hybrid_search_0[0].ids == hybrid_search_1[0].ids + assert hybrid_search_0[0].distances == hybrid_search_1[0].distances + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("k", [1, 60, 1000, 16383]) + @pytest.mark.parametrize("offset", [0, 1, 5]) + def test_hybrid_search_RRFRanker_different_k(self, dim, auto_id, is_flush, enable_dynamic_field, k, offset): + """ + target: test hybrid search normal case + method: create connection, collection, insert and search. + Note: here the result check is through comparing the score, the ids could not be compared + because the high probability of the same score, then the id is not fixed in the range of + the same score + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, + enable_dynamic_field=False, multiple_dim_array=[dim, dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. prepare search params for each vector field + req_list = [] + search_res_dict_array = [] + for i in range(len(vector_name_list)): + vectors = [[random.random() for _ in range(dim)] for _ in range(1)] + search_res_dict = {} + search_param = { + "data": vectors, + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE"}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # search for get the base line of hybrid_search + search_res = collection_w.search(vectors[:1], vector_name_list[i], + default_search_params, default_limit, + default_search_exp, offset=offset, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + ids = search_res[0].ids + for j in range(len(ids)): + search_res_dict[ids[j]] = 1/(j + k +1) + search_res_dict_array.append(search_res_dict) + # 4. calculate hybrid search base line for RRFRanker + ids_answer, score_answer = cf.get_hybrid_search_base_results(search_res_dict_array) + # 5. hybrid search + hybrid_res = collection_w.hybrid_search(req_list, RRFRanker(k), default_limit, + offset=offset, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + # 6. compare results through the re-calculated distances + for i in range(len(score_answer[:default_limit])): + assert score_answer[i] - hybrid_res[0].distances[i] < hybrid_search_epsilon + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("offset", [0, 1, 5]) + @pytest.mark.parametrize("rerank", [RRFRanker(), WeightedRanker(0.1, 0.9, 1)]) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_offset_inside_outside_params(self, primary_field, offset, rerank): + """ + target: test hybrid search with offset inside and outside params + method: create connection, collection, insert and search. + Note: here the result check is through comparing the score, the ids could not be compared + because the high probability of the same score, then the id is not fixed in the range of + the same score + expected: hybrid search successfully with limit(topK), and the result should be the same + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, primary_field=primary_field, + multiple_dim_array=[default_dim, default_dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + req_list = [] + vectors_list = [] + # 3. generate vectors + for i in range(len(vector_name_list)): + vectors = [[random.random() for _ in range(default_dim)] for _ in range(1)] + vectors_list.append(vectors) + # 4. prepare search params for each vector field + for i in range(len(vector_name_list)): + search_param = { + "data": vectors_list[i], + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE", "offset": offset}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search with offset inside the params + hybrid_res_inside = collection_w.hybrid_search(req_list, rerank, default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + # 5. hybrid search with offset parameter + req_list = [] + for i in range(len(vector_name_list)): + search_param = { + "data": vectors_list[i], + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE"}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + hybrid_res = collection_w.hybrid_search(req_list, rerank, default_limit, + offset=offset, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + + assert hybrid_res_inside[0].distances == hybrid_res[0].distances + + @pytest.mark.tags(CaseLabel.L2) + def test_hybrid_search_RRFRanker_empty_reqs(self): + """ + target: test hybrid search normal case + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, multiple_dim_array=[default_dim, default_dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. hybrid search with empty reqs + collection_w.hybrid_search([], RRFRanker(), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 0}) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.parametrize("k", [0, 16385]) + @pytest.mark.xfail(reason="issue #29867") + def test_hybrid_search_RRFRanker_k_out_of_range(self, k): + """ + target: test hybrid search with default value to RRFRanker + method: create connection, collection, insert and search. + Note: here the result check is through comparing the score, the ids could not be compared + because the high probability of the same score, then the id is not fixed in the range of + the same score + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, dim=default_dim, + multiple_dim_array=[default_dim, default_dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. prepare search params for each vector field + req_list = [] + search_res_dict_array = [] + for i in range(len(vector_name_list)): + vectors = [[random.random() for _ in range(default_dim)] for _ in range(1)] + search_res_dict = {} + search_param = { + "data": vectors, + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE", "offset": 0}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # search for get the base line of hybrid_search + search_res = collection_w.search(vectors[:1], vector_name_list[i], + default_search_params, default_limit, + default_search_exp, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + ids = search_res[0].ids + for j in range(len(ids)): + search_res_dict[ids[j]] = 1/(j + k +1) + search_res_dict_array.append(search_res_dict) + # 4. calculate hybrid search base line for RRFRanker + ids_answer, score_answer = cf.get_hybrid_search_base_results(search_res_dict_array) + # 5. hybrid search + hybrid_res = collection_w.hybrid_search(req_list, RRFRanker(k), default_limit, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + # 6. compare results through the re-calculated distances + for i in range(len(score_answer[:default_limit])): + assert score_answer[i] - hybrid_res[0].distances[i] < hybrid_search_epsilon + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("limit", [1, 100, 16384]) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_different_limit_round_decimal(self, primary_field, limit): + """ + target: test hybrid search with different valid limit and round decimal + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, primary_field=primary_field, + multiple_dim_array=[default_dim, default_dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. prepare search params + req_list = [] + weights = [0.2, 0.3, 0.5] + search_res_dict_array = [] + if limit > default_nb: + limit = default_limit + for i in range(len(vector_name_list)): + vectors = [[random.random() for _ in range(default_dim)] for _ in range(1)] + search_res_dict = {} + search_param = { + "data": vectors, + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE", "offset": 0}, + "limit": limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # search to get the base line of hybrid_search + search_res = collection_w.search(vectors[:1], vector_name_list[i], + default_search_params, limit, + default_search_exp, round_decimal= 5, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": limit})[0] + ids = search_res[0].ids + distance_array = [distance_single * weights[i] for distance_single in search_res[0].distances] + for j in range(len(ids)): + search_res_dict[ids[j]] = distance_array[j] + search_res_dict_array.append(search_res_dict) + # 4. calculate hybrid search base line + ids_answer, score_answer = cf.get_hybrid_search_base_results(search_res_dict_array) + # 5. hybrid search + hybrid_res = collection_w.hybrid_search(req_list, WeightedRanker(*weights), limit, + round_decimal=5, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": limit})[0] + # 6. compare results through the re-calculated distances + for i in range(len(score_answer[:limit])): + assert score_answer[i] - hybrid_res[0].distances[i] < hybrid_search_epsilon + + @pytest.mark.tags(CaseLabel.L1) + def test_hybrid_search_limit_out_of_range_max(self): + """ + target: test hybrid search with over maximum limit + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, multiple_dim_array=[default_dim, default_dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. prepare search params + req_list = [] + weights = [0.2, 0.3, 0.5] + for i in range(len(vector_name_list)): + vectors = [[random.random() for _ in range(default_dim)] for _ in range(1)] + search_param = { + "data": vectors, + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE", "offset": 0}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search with over maximum limit + limit = 16385 + error = {ct.err_code: 65535, ct.err_msg: "invalid max query result window, (offset+limit) " + "should be in range [1, 16384], but got %d" % limit} + collection_w.hybrid_search(req_list, WeightedRanker(*weights), limit, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L1) + def test_hybrid_search_limit_out_of_range_min(self): + """ + target: test hybrid search with over minimum limit + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, multiple_dim_array=[default_dim, default_dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. prepare search params + req_list = [] + weights = [0.2, 0.3, 0.5] + for i in range(len(vector_name_list)): + vectors = [[random.random() for _ in range(default_dim)] for _ in range(1)] + search_param = { + "data": vectors, + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE", "offset": 0}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search with over maximum limit + limit = 0 + error = {ct.err_code: 1, ct.err_msg: "`limit` value 0 is illegal"} + collection_w.hybrid_search(req_list, WeightedRanker(*weights), limit, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_with_output_fields_all_fields(self, primary_field): + """ + target: test hybrid search normal case + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, primary_field=primary_field, + multiple_dim_array=[default_dim, default_dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. prepare search params + req_list = [] + weights = [0.2, 0.3, 0.5] + search_res_dict_array = [] + for i in range(len(vector_name_list)): + vectors = [[random.random() for _ in range(default_dim)] for _ in range(1)] + search_res_dict = {} + search_param = { + "data": vectors, + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE", "offset": 0}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # search to get the base line of hybrid_search + search_res = collection_w.search(vectors[:1], vector_name_list[i], + default_search_params, default_limit, + default_search_exp, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + ids = search_res[0].ids + distance_array = [distance_single * weights[i] for distance_single in search_res[0].distances] + for j in range(len(ids)): + search_res_dict[ids[j]] = distance_array[j] + search_res_dict_array.append(search_res_dict) + # 4. calculate hybrid search base line + ids_answer, score_answer = cf.get_hybrid_search_base_results(search_res_dict_array) + # 5. hybrid search + output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name, + default_json_field_name] + output_fields = output_fields + vector_name_list + hybrid_res = collection_w.hybrid_search(req_list, WeightedRanker(*weights), default_limit, + output_fields = output_fields, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit, + "output_fields": output_fields})[0] + # 6. compare results through the re-calculated distances + for i in range(len(score_answer[:default_limit])): + assert score_answer[i] - hybrid_res[0].distances[i] < hybrid_search_epsilon + + @pytest.mark.tags(CaseLabel.L2) + def test_hybrid_search_with_output_fields_all_fields_wildcard(self): + """ + target: test hybrid search normal case + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, multiple_dim_array=[default_dim, default_dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. prepare search params + req_list = [] + weights = [0.2, 0.3, 0.5] + search_res_dict_array = [] + for i in range(len(vector_name_list)): + vectors = [[random.random() for _ in range(default_dim)] for _ in range(1)] + search_res_dict = {} + search_param = { + "data": vectors, + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE", "offset": 0}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # search to get the base line of hybrid_search + search_res = collection_w.search(vectors[:1], vector_name_list[i], + default_search_params, default_limit, + default_search_exp, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit})[0] + ids = search_res[0].ids + distance_array = [distance_single * weights[i] for distance_single in search_res[0].distances] + for j in range(len(ids)): + search_res_dict[ids[j]] = distance_array[j] + search_res_dict_array.append(search_res_dict) + # 4. calculate hybrid search base line + ids_answer, score_answer = cf.get_hybrid_search_base_results(search_res_dict_array) + # 5. hybrid search + output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name, + default_json_field_name] + output_fields = output_fields + vector_name_list + hybrid_res = collection_w.hybrid_search(req_list, WeightedRanker(*weights), default_limit, + output_fields = ["*"], + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit, + "output_fields": output_fields})[0] + # 6. compare results through the re-calculated distances + for i in range(len(score_answer[:default_limit])): + assert score_answer[i] - hybrid_res[0].distances[i] < hybrid_search_epsilon + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("output_fields", [[default_search_field], [default_search_field, default_int64_field_name]]) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_with_output_fields_sync_async(self, primary_field, output_fields, _async): + """ + target: test hybrid search normal case + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, primary_field=primary_field, + multiple_dim_array=[default_dim, default_dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. prepare search params + req_list = [] + weights = [0.2, 0.3, 0.5] + search_res_dict_array = [] + for i in range(len(vector_name_list)): + vectors = [[random.random() for _ in range(default_dim)] for _ in range(1)] + search_res_dict = {} + search_param = { + "data": vectors, + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE", "offset": 0}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # search to get the base line of hybrid_search + search_res = collection_w.search(vectors[:1], vector_name_list[i], + default_search_params, default_limit, + default_search_exp, + _async = _async, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit, + "_async": _async})[0] + if _async: + search_res.done() + search_res = search_res.result() + ids = search_res[0].ids + distance_array = [distance_single * weights[i] for distance_single in search_res[0].distances] + for j in range(len(ids)): + search_res_dict[ids[j]] = distance_array[j] + search_res_dict_array.append(search_res_dict) + # 4. calculate hybrid search base line + ids_answer, score_answer = cf.get_hybrid_search_base_results(search_res_dict_array) + # 5. hybrid search + hybrid_res = collection_w.hybrid_search(req_list, WeightedRanker(*weights), default_limit, _async = _async, + output_fields = output_fields, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": default_limit, + "output_fields": output_fields, + "_async": _async})[0] + if _async: + hybrid_res.done() + hybrid_res = hybrid_res.result() + # 6. compare results through the re-calculated distances + for i in range(len(score_answer[:default_limit])): + assert score_answer[i] - hybrid_res[0].distances[i] < hybrid_search_epsilon + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("rerank", [RRFRanker(), WeightedRanker(0.1, 0.9, 1)]) + def test_hybrid_search_offset_both_inside_outside_params(self, rerank): + """ + target: test hybrid search with offset inside and outside params + method: create connection, collection, insert and search. + Note: here the result check is through comparing the score, the ids could not be compared + because the high probability of the same score, then the id is not fixed in the range of + the same score + expected: Raise exception + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, multiple_dim_array=[default_dim, default_dim])[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + req_list = [] + vectors_list = [] + # 3. generate vectors + for i in range(len(vector_name_list)): + vectors = [[random.random() for _ in range(default_dim)] for _ in range(1)] + vectors_list.append(vectors) + # 4. prepare search params for each vector field + for i in range(len(vector_name_list)): + search_param = { + "data": vectors_list[i], + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE", "offset": 0}, + "limit": default_limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # 4. hybrid search with offset inside the params + error = {ct.err_code: 1, ct.err_msg: "Provide offset both in kwargs and param, expect just one"} + collection_w.hybrid_search(req_list, rerank, default_limit, offset=2, + check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("limit", [1, 100, 16384]) + @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) + def test_hybrid_search_is_partition_key(self, primary_field, limit): + """ + target: test hybrid search with different valid limit and round decimal + method: create connection, collection, insert and search + expected: hybrid search successfully with limit(topK) + """ + # 1. initialize collection with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, primary_field=primary_field, + multiple_dim_array=[default_dim, default_dim], + is_partition_key=primary_field)[0:5] + # 2. extract vector field name + vector_name_list = cf.extract_vector_field_name_list(collection_w) + vector_name_list.append(ct.default_float_vec_field_name) + # 3. prepare search params + req_list = [] + weights = [0.2, 0.3, 0.5] + search_res_dict_array = [] + if limit > default_nb: + limit = default_limit + for i in range(len(vector_name_list)): + vectors = [[random.random() for _ in range(default_dim)] for _ in range(1)] + search_res_dict = {} + search_param = { + "data": vectors, + "anns_field": vector_name_list[i], + "param": {"metric_type": "COSINE", "offset": 0}, + "limit": limit, + "expr": "int64 > 0"} + req = AnnSearchRequest(**search_param) + req_list.append(req) + # search to get the base line of hybrid_search + search_res = collection_w.search(vectors[:1], vector_name_list[i], + default_search_params, limit, + default_search_exp, round_decimal= 5, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": limit})[0] + ids = search_res[0].ids + distance_array = [distance_single * weights[i] for distance_single in search_res[0].distances] + for j in range(len(ids)): + search_res_dict[ids[j]] = distance_array[j] + search_res_dict_array.append(search_res_dict) + # 4. calculate hybrid search base line + ids_answer, score_answer = cf.get_hybrid_search_base_results(search_res_dict_array) + # 5. hybrid search + hybrid_res = collection_w.hybrid_search(req_list, WeightedRanker(*weights), limit, + round_decimal=5, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, + "ids": insert_ids, + "limit": limit})[0] + # 6. compare results through the re-calculated distances + for i in range(len(score_answer[:limit])): + assert score_answer[i] - hybrid_res[0].distances[i] < hybrid_search_epsilon +