diff --git a/tests/python_client/base/client_base.py b/tests/python_client/base/client_base.py index c56d34c582..e5b3cfd2e6 100644 --- a/tests/python_client/base/client_base.py +++ b/tests/python_client/base/client_base.py @@ -242,7 +242,8 @@ class TestcaseBase(Base): expected: return collection and raw data, insert ids """ log.info("Test case of search interface: initialize before test case") - self._connect() + if not self.connection_wrap.has_connection(alias=DefaultConfig.DEFAULT_USING)[0]: + self._connect() collection_name = cf.gen_unique_str(prefix) if name is not None: collection_name = name diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index 9f8a5a69fa..e70a6d76e9 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -359,9 +359,9 @@ def gen_collection_schema_all_datatype(description=ct.default_desc, else: multiple_dim_array.insert(0, dim) for i in range(len(multiple_dim_array)): - fields.append(gen_float_vec_field(name=f"multiple_vector_{ct.vector_data_type_all[i%3]}", + fields.append(gen_float_vec_field(name=f"multiple_vector_{ct.all_float_vector_types[i%3]}", dim=multiple_dim_array[i], - vector_data_type=ct.vector_data_type_all[i%3])) + vector_data_type=ct.all_float_vector_types[i%3])) schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description, primary_field=primary_field, auto_id=auto_id, @@ -485,8 +485,8 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi def gen_general_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[], - vector_data_type="FLOAT_VECTOR", auto_id = False, - primary_field = ct.default_int64_field_name): + vector_data_type="FLOAT_VECTOR", auto_id=False, + primary_field=ct.default_int64_field_name): insert_list = [] if not random_primary_key: int_values = pd.Series(data=[i for i in range(start, start + nb)]) @@ -496,14 +496,15 @@ def gen_general_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string") json_values = [{"number": i, "float": i*1.0} for i in range(start, start + nb)] float_vec_values = gen_vectors(nb, dim, vector_data_type=vector_data_type) - insert_list = [int_values, float_values, string_values, json_values, float_vec_values] + insert_list = [int_values, float_values, string_values] + + if with_json is True: + insert_list.append(json_values) + insert_list.append(float_vec_values) - if with_json is False: - index = insert_list.index(json_values) - del insert_list[index] if auto_id is True: if primary_field == ct.default_int64_field_name: - index = insert_list.index(int_values) + index = 0 elif primary_field == ct.default_string_field_name: index = 2 del insert_list[index] @@ -699,7 +700,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, w df[ct.default_float_vec_field_name] = float_vec_values else: for i in range(len(multiple_dim_array)): - df[multiple_vector_field_name[i]] = gen_vectors(nb, multiple_dim_array[i], ct.vector_data_type_all[i%3]) + df[multiple_vector_field_name[i]] = gen_vectors(nb, multiple_dim_array[i], ct.all_float_vector_types[i%3]) if with_json is False: df.drop(ct.default_json_field_name, axis=1, inplace=True) @@ -737,7 +738,7 @@ def gen_general_list_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0 insert_list.append(float_vec_values) else: for i in range(len(multiple_dim_array)): - insert_list.append(gen_vectors(nb, multiple_dim_array[i], ct.vector_data_type_all[i%3])) + insert_list.append(gen_vectors(nb, multiple_dim_array[i], ct.all_float_vector_types[i%3])) if with_json is False: # index = insert_list.index(json_values) @@ -782,7 +783,7 @@ def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, st else: for i in range(len(multiple_dim_array)): dict[multiple_vector_field_name[i]] = gen_vectors(nb, multiple_dim_array[i], - ct.vector_data_type_all[i])[0] + ct.all_float_vector_types[i])[0] if len(multiple_dim_array) != 0: with open(ct.rows_all_data_type_file_path + f'_{partition_id}' + f'_dim{dim}.txt', 'wb') as json_file: pickle.dump(array, json_file) @@ -1233,7 +1234,7 @@ def gen_simple_index(): elif ct.all_index_types[i] in ct.sparse_support: continue dic = {"index_type": ct.all_index_types[i], "metric_type": "L2"} - dic.update({"params": ct.default_index_params[i]}) + dic.update({"params": ct.default_all_indexes_params[i]}) index_params.append(dic) return index_params @@ -1671,6 +1672,16 @@ def index_to_dict(index): } +def get_index_params_params(index_type): + """get default params of index params by index type""" + return ct.default_all_indexes_params[ct.all_index_types.index(index_type)] + + +def get_search_params_params(index_type): + """get default params of search params by index type""" + return ct.default_all_search_params_params[ct.all_index_types.index(index_type)] + + def assert_json_contains(expr, list_data): opposite = False if expr.startswith("not"): diff --git a/tests/python_client/common/common_type.py b/tests/python_client/common/common_type.py index a3d65043f4..b95e74ebe8 100644 --- a/tests/python_client/common/common_type.py +++ b/tests/python_client/common/common_type.py @@ -14,14 +14,6 @@ default_nq = 2 default_limit = 10 default_batch_size = 1000 max_limit = 16384 -default_search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}} -default_search_ip_params = {"metric_type": "IP", "params": {"nprobe": 10}} -default_search_binary_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}} -default_index = {"index_type": "IVF_SQ8", "metric_type": "COSINE", "params": {"nlist": 64}} -default_binary_index = {"index_type": "BIN_IVF_FLAT", "params": {"nlist": 128}, "metric_type": "JACCARD"} -default_diskann_index = {"index_type": "DISKANN", "metric_type": "COSINE", "params": {}} -default_diskann_search_params = {"metric_type": "COSINE", "params": {"search_list": 30}} -default_sparse_search_params = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}} max_top_k = 16384 max_partition_num = 4096 max_role_num = 10 @@ -52,7 +44,7 @@ default_binary_vec_field_name = "binary_vector" float_type = "FLOAT_VECTOR" float16_type = "FLOAT16_VECTOR" bfloat16_type = "BFLOAT16_VECTOR" -vector_data_type_all = [float_type, float16_type, bfloat16_type] +all_float_vector_types = [float_type, float16_type, bfloat16_type] default_sparse_vec_field_name = "sparse_vector" default_partition_name = "_default" default_resource_group_name = '__default_resource_group' @@ -108,11 +100,6 @@ code = "code" err_code = "err_code" err_msg = "err_msg" in_cluster_env = "IN_CLUSTER" - -default_flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "COSINE"} -default_bin_flat_index = {"index_type": "BIN_FLAT", "params": {}, "metric_type": "JACCARD"} -default_sparse_inverted_index = {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP", - "params": {"drop_ratio_build": 0.2}} default_count_output = "count(*)" rows_all_data_type_file_path = "/tmp/rows_all_data_type" @@ -250,26 +237,50 @@ get_wrong_format_dict = [ ] """ Specially defined list """ -all_index_types = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ", "HNSW", "SCANN", "DISKANN", "BIN_FLAT", "BIN_IVF_FLAT", - "SPARSE_INVERTED_INDEX", "SPARSE_WAND", "GPU_IVF_FLAT", "GPU_IVF_PQ"] +L0_index_types = ["IVF_SQ8", "HNSW", "DISKANN"] +all_index_types = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ", + "HNSW", "SCANN", "DISKANN", + "BIN_FLAT", "BIN_IVF_FLAT", + "SPARSE_INVERTED_INDEX", "SPARSE_WAND", + "GPU_IVF_FLAT", "GPU_IVF_PQ"] -default_index_params = [{"nlist": 128}, {"nlist": 128}, {"nlist": 128}, {"nlist": 128, "m": 16, "nbits": 8}, - {"M": 48, "efConstruction": 500}, {"nlist": 128}, {}, {"nlist": 128}, {"nlist": 128}, - {"drop_ratio_build": 0.2}, {"drop_ratio_build": 0.2}, - {"nlist": 64}, {"nlist": 64, "m": 16, "nbits": 8}] +default_all_indexes_params = [{}, {"nlist": 128}, {"nlist": 128}, {"nlist": 128, "m": 16, "nbits": 8}, + {"M": 32, "efConstruction": 360}, {"nlist": 128}, {}, + {}, {"nlist": 64}, + {"drop_ratio_build": 0.2}, {"drop_ratio_build": 0.2}, + {"nlist": 64}, {"nlist": 64, "m": 16, "nbits": 8}] + +default_all_search_params_params = [{}, {"nprobe": 32}, {"nprobe": 32}, {"nprobe": 32}, + {"ef": 100}, {"nprobe": 32, "reorder_k": 100}, {"search_list": 30}, + {}, {"nprobe": 32}, + {"drop_ratio_search": "0.2"}, {"drop_ratio_search": "0.2"}, + {}, {}] Handler_type = ["GRPC", "HTTP"] binary_support = ["BIN_FLAT", "BIN_IVF_FLAT"] -delete_support = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ"] -ivf = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ"] -skip_pq = ["IVF_PQ"] sparse_support = ["SPARSE_INVERTED_INDEX", "SPARSE_WAND"] +default_L0_metric = "COSINE" float_metrics = ["L2", "IP", "COSINE"] binary_metrics = ["JACCARD", "HAMMING", "SUBSTRUCTURE", "SUPERSTRUCTURE"] structure_metrics = ["SUBSTRUCTURE", "SUPERSTRUCTURE"] all_scalar_data_types = ['int8', 'int16', 'int32', 'int64', 'float', 'double', 'bool', 'varchar'] +default_flat_index = {"index_type": "FLAT", "params": {}, "metric_type": default_L0_metric} +default_bin_flat_index = {"index_type": "BIN_FLAT", "params": {}, "metric_type": "JACCARD"} +default_sparse_inverted_index = {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP", + "params": {"drop_ratio_build": 0.2}} + +default_search_params = {"params": default_all_search_params_params[2]} +default_search_ip_params = {"metric_type": "IP", "params": default_all_search_params_params[2]} +default_search_binary_params = {"metric_type": "JACCARD", "params": {"nprobe": 32}} +default_index = {"index_type": "IVF_SQ8", "metric_type": default_L0_metric, "params": default_all_indexes_params[2]} +default_binary_index = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": default_all_indexes_params[8]} +default_diskann_index = {"index_type": "DISKANN", "metric_type": default_L0_metric, "params": {}} +default_diskann_search_params = {"params": {"search_list": 30}} +default_sparse_search_params = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}} + + class CheckTasks: """ The name of the method used to check the result """ check_nothing = "check_nothing" diff --git a/tests/python_client/load/test_workload.py b/tests/python_client/load/test_workload.py index 4644ed0fc3..65f4b6a05d 100644 --- a/tests/python_client/load/test_workload.py +++ b/tests/python_client/load/test_workload.py @@ -1,94 +1,94 @@ -import datetime -import pytest - -from base.client_base import TestcaseBase -from common import common_func as cf -from common import common_type as ct -from common.common_type import CaseLabel -from utils.util_log import test_log as log -from pymilvus import utility - - -rounds = 100 -per_nb = 100000 -default_field_name = ct.default_float_vec_field_name -default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}} - - -class TestLoad(TestcaseBase): - """ Test case of end to end""" - @pytest.mark.tags(CaseLabel.L3) - def test_load_default(self): - name = 'load_test_collection_1' - name2 = 'load_test_collection_2' - # create - # collection_w = self.init_collection_wrap(name=name) - # collection_w2 = self.init_collection_wrap(name=name2) - # assert collection_w.name == name - - for i in range(50): - name = f"load_collection2_{i}" - self.init_collection_wrap(name=name) - log.debug(f"total collections: {len(utility.list_collections())}") - - # # insert - # data = cf.gen_default_list_data(per_nb) - # log.debug(f"data len: {len(data[0])}") - # for i in range(rounds): - # t0 = datetime.datetime.now() - # ins_res, res = collection_w.insert(data, timeout=180) - # tt = datetime.datetime.now() - t0 - # log.debug(f"round{i} insert: {len(ins_res.primary_keys)} entities in {tt}s") - # assert res # and per_nb == len(ins_res.primary_keys) - # - # t0 = datetime.datetime.now() - # ins_res2, res = collection_w2.insert(data, timeout=180) - # tt = datetime.datetime.now() - t0 - # log.debug(f"round{i} insert2: {len(ins_res2.primary_keys)} entities in {tt}s") - # assert res - # - # # flush - # t0 = datetime.datetime.now() - # log.debug(f"current collection num_entities: {collection_w.num_entities}") - # tt = datetime.datetime.now() - t0 - # log.debug(f"round{i} flush in {tt}") - # - # t0 = datetime.datetime.now() - # log.debug(f"current collection2 num_entities: {collection_w2.num_entities}") - # tt = datetime.datetime.now() - t0 - # log.debug(f"round{i} flush2 in {tt}") - - # index, res = collection_w.create_index(default_field_name, default_index_params, timeout=60) - # assert res - - # # search - # collection_w.load() - # search_vectors = cf.gen_vectors(1, ct.default_dim) - # t0 = datetime.datetime.now() - # res_1, _ = collection_w.search(data=search_vectors, - # anns_field=ct.default_float_vec_field_name, - # param={"nprobe": 16}, limit=1) - # tt = datetime.datetime.now() - t0 - # log.debug(f"assert search: {tt}") - # assert len(res_1) == 1 - # # collection_w.release() - # - # # index - # collection_w.insert(cf.gen_default_dataframe_data(nb=5000)) - # assert collection_w.num_entities == len(data[0]) + 5000 - # _index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}} - # t0 = datetime.datetime.now() - # index, _ = collection_w.create_index(field_name=ct.default_float_vec_field_name, - # index_params=_index_params, - # name=cf.gen_unique_str()) - # tt = datetime.datetime.now() - t0 - # log.debug(f"assert index: {tt}") - # assert len(collection_w.indexes) == 1 - # - # # query - # term_expr = f'{ct.default_int64_field_name} in [3001,4001,4999,2999]' - # t0 = datetime.datetime.now() - # res, _ = collection_w.query(term_expr) - # tt = datetime.datetime.now() - t0 - # log.debug(f"assert query: {tt}") - # assert len(res) == 4 +# import datetime +# import pytest +# +# from base.client_base import TestcaseBase +# from common import common_func as cf +# from common import common_type as ct +# from common.common_type import CaseLabel +# from utils.util_log import test_log as log +# from pymilvus import utility +# +# +# rounds = 100 +# per_nb = 100000 +# default_field_name = ct.default_float_vec_field_name +# default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}} +# +# +# class TestLoad(TestcaseBase): +# """ Test case of end to end""" +# @pytest.mark.tags(CaseLabel.L3) +# def test_load_default(self): +# name = 'load_test_collection_1' +# name2 = 'load_test_collection_2' +# # create +# # collection_w = self.init_collection_wrap(name=name) +# # collection_w2 = self.init_collection_wrap(name=name2) +# # assert collection_w.name == name +# +# for i in range(50): +# name = f"load_collection2_{i}" +# self.init_collection_wrap(name=name) +# log.debug(f"total collections: {len(utility.list_collections())}") +# +# # # insert +# # data = cf.gen_default_list_data(per_nb) +# # log.debug(f"data len: {len(data[0])}") +# # for i in range(rounds): +# # t0 = datetime.datetime.now() +# # ins_res, res = collection_w.insert(data, timeout=180) +# # tt = datetime.datetime.now() - t0 +# # log.debug(f"round{i} insert: {len(ins_res.primary_keys)} entities in {tt}s") +# # assert res # and per_nb == len(ins_res.primary_keys) +# # +# # t0 = datetime.datetime.now() +# # ins_res2, res = collection_w2.insert(data, timeout=180) +# # tt = datetime.datetime.now() - t0 +# # log.debug(f"round{i} insert2: {len(ins_res2.primary_keys)} entities in {tt}s") +# # assert res +# # +# # # flush +# # t0 = datetime.datetime.now() +# # log.debug(f"current collection num_entities: {collection_w.num_entities}") +# # tt = datetime.datetime.now() - t0 +# # log.debug(f"round{i} flush in {tt}") +# # +# # t0 = datetime.datetime.now() +# # log.debug(f"current collection2 num_entities: {collection_w2.num_entities}") +# # tt = datetime.datetime.now() - t0 +# # log.debug(f"round{i} flush2 in {tt}") +# +# # index, res = collection_w.create_index(default_field_name, default_all_indexes_params, timeout=60) +# # assert res +# +# # # search +# # collection_w.load() +# # search_vectors = cf.gen_vectors(1, ct.default_dim) +# # t0 = datetime.datetime.now() +# # res_1, _ = collection_w.search(data=search_vectors, +# # anns_field=ct.default_float_vec_field_name, +# # param={"nprobe": 16}, limit=1) +# # tt = datetime.datetime.now() - t0 +# # log.debug(f"assert search: {tt}") +# # assert len(res_1) == 1 +# # # collection_w.release() +# # +# # # index +# # collection_w.insert(cf.gen_default_dataframe_data(nb=5000)) +# # assert collection_w.num_entities == len(data[0]) + 5000 +# # _index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}} +# # t0 = datetime.datetime.now() +# # index, _ = collection_w.create_index(field_name=ct.default_float_vec_field_name, +# # index_params=_index_params, +# # name=cf.gen_unique_str()) +# # tt = datetime.datetime.now() - t0 +# # log.debug(f"assert index: {tt}") +# # assert len(collection_w.indexes) == 1 +# # +# # # query +# # term_expr = f'{ct.default_int64_field_name} in [3001,4001,4999,2999]' +# # t0 = datetime.datetime.now() +# # res, _ = collection_w.query(term_expr) +# # tt = datetime.datetime.now() - t0 +# # log.debug(f"assert query: {tt}") +# # assert len(res) == 4 diff --git a/tests/python_client/milvus_client/test_milvus_client_index.py b/tests/python_client/milvus_client/test_milvus_client_index.py index 89dbf5cf81..081d3a8bb2 100644 --- a/tests/python_client/milvus_client/test_milvus_client_index.py +++ b/tests/python_client/milvus_client/test_milvus_client_index.py @@ -274,7 +274,7 @@ class TestMilvusClientIndexValid(TestcaseBase): @pytest.mark.skip("https://github.com/milvus-io/pymilvus/issues/1886") @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:7], - ct.default_index_params[:7])) + ct.default_all_indexes_params[:7])) def test_milvus_client_index_default(self, index, params, metric_type): """ target: test search (high level api) normal case @@ -324,7 +324,7 @@ class TestMilvusClientIndexValid(TestcaseBase): @pytest.mark.skip(reason="pymilvus issue 1884") @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:7], - ct.default_index_params[:7])) + ct.default_all_indexes_params[:7])) def test_milvus_client_index_with_params(self, index, params, metric_type): """ target: test search (high level api) normal case @@ -372,7 +372,7 @@ class TestMilvusClientIndexValid(TestcaseBase): @pytest.mark.skip("wait for modification") @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:7], - ct.default_index_params[:7])) + ct.default_all_indexes_params[:7])) def test_milvus_client_index_after_insert(self, index, params, metric_type): """ target: test search (high level api) normal case @@ -518,7 +518,7 @@ class TestMilvusClientIndexValid(TestcaseBase): @pytest.mark.skip("wait for modification") @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:7], - ct.default_index_params[:7])) + ct.default_all_indexes_params[:7])) def test_milvus_client_index_drop_create_same_index(self, index, params, metric_type): """ target: test search (high level api) normal case @@ -570,7 +570,7 @@ class TestMilvusClientIndexValid(TestcaseBase): @pytest.mark.skip("wait for modification") @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:7], - ct.default_index_params[:7])) + ct.default_all_indexes_params[:7])) def test_milvus_client_index_drop_create_different_index(self, index, params, metric_type): """ target: test search (high level api) normal case diff --git a/tests/python_client/testcases/stability/test_restart.py b/tests/python_client/testcases/stability/test_restart.py index a3e45665cc..16f48699c2 100644 --- a/tests/python_client/testcases/stability/test_restart.py +++ b/tests/python_client/testcases/stability/test_restart.py @@ -189,12 +189,12 @@ class TestRestartBase: # # logging.getLogger().info(file) # if file["field"] == field_name and file["name"] != "_raw": # assert file["data_size"] > 0 - # if file["index_type"] != default_index["index_type"]: + # if file["index_type"] != default_ivf_flat_index["index_type"]: # continue # for file in stats["partitions"][0]["segments"][0]["files"]: # if file["field"] == field_name and file["name"] != "_raw": # assert file["data_size"] > 0 - # if file["index_type"] != default_index["index_type"]: + # if file["index_type"] != default_ivf_flat_index["index_type"]: # assert False # else: # assert True diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index e97c5ab4c6..af50fae2a8 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -19,7 +19,7 @@ from pymilvus.exceptions import MilvusException prefix = "index" default_schema = cf.gen_default_collection_schema() default_field_name = ct.default_float_vec_field_name -default_index_params = {"index_type": "IVF_SQ8", "metric_type": "COSINE", "params": {"nlist": 64}} +default_index_params = ct.default_index default_autoindex_params = {"index_type": "AUTOINDEX", "metric_type": "IP"} # copied from pymilvus @@ -33,9 +33,9 @@ index_name2 = cf.gen_unique_str("varhar") index_name3 = cf.gen_unique_str("binary") default_string_index_params = {} default_binary_schema = cf.gen_default_binary_collection_schema() -default_binary_index_params = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}} +default_binary_index_params = ct.default_binary_index # query = gen_search_vectors_params(field_name, default_entities, default_top_k, 1) -default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} +default_ivf_flat_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"} default_ip_index_params = {"index_type": "IVF_FLAT", "metric_type": "IP", "params": {"nlist": 64}} default_nq = ct.default_nq default_limit = ct.default_limit @@ -215,7 +215,7 @@ class TestIndexOperation(TestcaseBase): self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params) error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: at most one " "distinct index is allowed per field"} - self.index_wrap.init_index(collection_w.collection, default_field_name, default_index, + self.index_wrap.init_index(collection_w.collection, default_field_name, default_ivf_flat_index, check_task=CheckTasks.err_res, check_items=error) assert len(collection_w.indexes) == 1 @@ -611,7 +611,7 @@ class TestNewIndexBase(TestcaseBase): self.connection_wrap.remove_connection(ct.default_alias) res_list, _ = self.connection_wrap.list_connections() assert ct.default_alias not in res_list - collection_w.create_index(ct.default_float_vec_field_name, ct.default_index_params, + collection_w.create_index(ct.default_float_vec_field_name, ct.default_all_indexes_params, check_task=CheckTasks.err_res, check_items={ct.err_code: 1, ct.err_msg: "should create connect first"}) @@ -716,7 +716,7 @@ class TestNewIndexBase(TestcaseBase): collection_w = self.init_collection_wrap(name=c_name) data = cf.gen_default_list_data() collection_w.insert(data=data) - index_prams = [default_index, {"metric_type": "L2", "index_type": "IVF_SQ8", "params": {"nlist": 1024}}] + index_prams = [default_ivf_flat_index, {"metric_type": "L2", "index_type": "IVF_SQ8", "params": {"nlist": 1024}}] for index in index_prams: index_name = cf.gen_unique_str("name") collection_w.create_index(default_float_vec_field_name, index, index_name=index_name) @@ -1122,7 +1122,7 @@ class TestNewIndexBase(TestcaseBase): "limit": default_limit}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:6], ct.default_index_params[:6])) + @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:6], ct.default_all_indexes_params[:6])) def test_drop_mmap_index(self, index, params): """ target: disabling and re-enabling mmap for index @@ -1384,7 +1384,7 @@ class TestIndexInvalid(TestcaseBase): """ collection_name = get_collection_name with pytest.raises(Exception) as e: - connect.create_index(collection_name, field_name, default_index) + connect.create_index(collection_name, field_name, default_ivf_flat_index) @pytest.mark.tags(CaseLabel.L2) def test_drop_index_with_invalid_collection_name(self, connect, get_collection_name): diff --git a/tests/python_client/testcases/test_insert.py b/tests/python_client/testcases/test_insert.py index fea66a87a4..6a92f53336 100644 --- a/tests/python_client/testcases/test_insert.py +++ b/tests/python_client/testcases/test_insert.py @@ -25,8 +25,7 @@ default_schema = cf.gen_default_collection_schema() default_binary_schema = cf.gen_default_binary_collection_schema() default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}} -default_binary_index_params = { - "index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}} +default_binary_index_params = ct.default_binary_index default_search_exp = "int64 >= 0" diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index 1c4f4830f4..e05be96c90 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -27,8 +27,8 @@ default_mix_expr = "int64 >= 0 && varchar >= \"0\"" default_expr = f'{ct.default_int64_field_name} >= 0' default_invalid_expr = "varchar >= 0" default_string_term_expr = f'{ct.default_string_field_name} in [\"0\", \"1\"]' -default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}} -binary_index_params = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}} +default_index_params = ct.default_index +binary_index_params = ct.default_binary_index default_entities = ut.gen_entities(ut.default_nb, is_normal=True) default_pos = 5 diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 1df4b4fb1a..10ccec344a 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -49,10 +49,9 @@ default_float_field_name = ct.default_float_field_name default_bool_field_name = ct.default_bool_field_name default_string_field_name = ct.default_string_field_name default_json_field_name = ct.default_json_field_name -default_index_params = {"index_type": "IVF_SQ8", "metric_type": "COSINE", "params": {"nlist": 64}} +default_index_params = ct.default_index vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)] -range_search_supported_index = ct.all_index_types[:7] -range_search_supported_index_params = ct.default_index_params[:7] +range_search_supported_indexes = ct.all_index_types[:7] uid = "test_search" nq = 1 epsilon = 0.001 @@ -346,10 +345,8 @@ class TestCollectionSearchInvalid(TestcaseBase): "[expected=COSINE][actual=L2]"}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) - def test_search_invalid_params_type(self, index, params): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_invalid_params_type(self, index): """ target: test search with invalid search params method: test search with invalid params type @@ -361,6 +358,7 @@ class TestCollectionSearchInvalid(TestcaseBase): collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 5000, is_index=False)[0:4] # 2. create index and load + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "L2"} collection_w.create_index("float_vector", default_index) collection_w.load() @@ -830,10 +828,8 @@ class TestCollectionSearchInvalid(TestcaseBase): "err_msg": "partition name search_partition_0 not found"}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[1:7], - ct.default_index_params[1:7])) - def test_search_different_index_invalid_params(self, index, params): + @pytest.mark.parametrize("index", ct.all_index_types[1:7]) + def test_search_different_index_invalid_params(self, index): """ target: test search with different index method: test search with different index @@ -844,6 +840,7 @@ class TestCollectionSearchInvalid(TestcaseBase): partition_num=1, is_index=False)[0:4] # 2. create different index + params = cf.get_index_params_params(index) if params.get("m"): if (default_dim % params["m"]) != 0: params["m"] = default_dim // 4 @@ -1009,8 +1006,8 @@ class TestCollectionSearchInvalid(TestcaseBase): default_search_exp, output_fields=output_fields) @pytest.mark.tags(CaseLabel.L3) - @pytest.mark.parametrize("index, param", zip(ct.all_index_types[-2:], ct.default_index_params[-2:])) - def test_search_output_field_vector_after_gpu_index(self, index, param): + @pytest.mark.parametrize("index", ct.all_index_types[-2:]) + def test_search_output_field_vector_after_gpu_index(self, index): """ target: test search with vector as output field method: 1. create a collection and insert data @@ -1022,7 +1019,8 @@ class TestCollectionSearchInvalid(TestcaseBase): collection_w = self.init_collection_general(prefix, True, is_index=False)[0] # 2. create an index which doesn't output vectors - default_index = {"index_type": index, "params": param, "metric_type": "L2"} + params = cf.get_index_params_params(index) + default_index = {"index_type": index, "params": params, "metric_type": "L2"} collection_w.create_index(field_name, default_index) # 3. load and search @@ -1229,41 +1227,6 @@ class TestCollectionSearchInvalid(TestcaseBase): check_items={"err_code": 65535, "err_msg": "range_filter must more than radius when IP"}) - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip(reason="annoy not supported any more") - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[5:5], - ct.default_index_params[5:5])) - def test_range_search_not_support_index(self, index, params): - """ - target: test range search after unsupported index - method: test range search after ANNOY index - expected: raise exception and report the error - """ - # 1. initialize with data - collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000, - partition_num=1, - dim=default_dim, is_index=False)[0:5] - # 2. create index and load - default_index = {"index_type": index, - "params": params, "metric_type": "L2"} - collection_w.create_index("float_vector", default_index) - collection_w.load() - # 3. range search - search_params = cf.gen_search_param(index) - vectors = [[random.random() for _ in range(default_dim)] - for _ in range(default_nq)] - for search_param in search_params: - search_param["params"]["radius"] = 1000 - search_param["params"]["range_filter"] = 0 - log.info("Searching with search params: {}".format(search_param)) - collection_w.search(vectors[:default_nq], default_search_field, - search_param, default_limit, - default_search_exp, - check_task=CheckTasks.err_res, - check_items={"err_code": 1, - "err_msg": f"not implemented"}) - @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip(reason="It will report error before range search") @pytest.mark.parametrize("metric", ["SUPERSTRUCTURE", "SUBSTRUCTURE"]) @@ -2228,10 +2191,8 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.GPU) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) - def test_search_after_different_index_with_params(self, index, params, _async, scalar_index): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_after_different_index_with_params(self, index, _async, scalar_index): """ target: test search after different index method: test search after different index and corresponding search params @@ -2248,12 +2209,7 @@ class TestCollectionSearch(TestcaseBase): dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] # 2. create index on vector field and load - if params.get("m"): - if (dim % params["m"]) != 0: - params["m"] = dim // 4 - if params.get("PQM"): - if (dim % params["PQM"]) != 0: - params["PQM"] = dim // 4 + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} vector_name_list = cf.extract_vector_field_name_list(collection_w) vector_name_list.append(ct.default_float_vec_field_name) @@ -2287,10 +2243,8 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.GPU) @pytest.mark.skip(reason="waiting for the address of bf16 data generation slow problem") - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) - def test_search_after_different_index_with_params_all_vector_type_multiple_vectors(self, index, params, + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_after_different_index_with_params_all_vector_type_multiple_vectors(self, index, _async, scalar_index): """ @@ -2311,12 +2265,7 @@ class TestCollectionSearch(TestcaseBase): enable_dynamic_field=enable_dynamic_field, multiple_dim_array=[default_dim, default_dim])[0:5] # 2. create index on vector field and load - if params.get("m"): - if (default_dim % params["m"]) != 0: - params["m"] = default_dim // 4 - if params.get("PQM"): - if (default_dim % params["PQM"]) != 0: - params["PQM"] = default_dim // 4 + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} vector_name_list = cf.extract_vector_field_name_list(collection_w) for vector_name in vector_name_list: @@ -2347,10 +2296,8 @@ class TestCollectionSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.GPU) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[9:11], - ct.default_index_params[9:11])) - def test_search_after_different_index_with_params_gpu(self, index, params, _async): + @pytest.mark.parametrize("index", ct.all_index_types[9:11]) + def test_search_after_different_index_with_params_gpu(self, index, _async): """ target: test search after different index method: test search after different index and corresponding search params @@ -2366,6 +2313,7 @@ class TestCollectionSearch(TestcaseBase): dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] # 2. create index and load + params = cf.get_index_params_params(index) if params.get("m"): if (dim % params["m"]) != 0: params["m"] = dim // 4 @@ -2418,10 +2366,8 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.GPU) @pytest.mark.skip("issue #27252") - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) - def test_search_after_different_index_with_min_dim(self, index, params, _async): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_after_different_index_with_min_dim(self, index, _async): """ target: test search after different index with min dim method: test search after different index and corresponding search params with dim = 1 @@ -2434,10 +2380,7 @@ class TestCollectionSearch(TestcaseBase): auto_id=auto_id, dim=min_dim, is_index=False)[0:5] # 2. create index and load - if params.get("m"): - params["m"] = min_dim - if params.get("PQM"): - params["PQM"] = min_dim + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "L2"} collection_w.create_index("float_vector", default_index) collection_w.load() @@ -2456,10 +2399,8 @@ class TestCollectionSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.GPU) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[9:11], - ct.default_index_params[9:11])) - def test_search_after_different_index_with_min_dim_gpu(self, index, params, _async): + @pytest.mark.parametrize("index", ct.all_index_types[9:11]) + def test_search_after_different_index_with_min_dim_gpu(self, index, _async): """ target: test search after different index with min dim method: test search after different index and corresponding search params with dim = 1 @@ -2472,6 +2413,7 @@ class TestCollectionSearch(TestcaseBase): auto_id=auto_id, dim=min_dim, is_index=False)[0:5] # 2. create index and load + params = cf.get_index_params_params(index) if params.get("m"): params["m"] = min_dim if params.get("PQM"): @@ -2495,10 +2437,8 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.GPU) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) - def test_search_after_index_different_metric_type(self, index, params, _async, metric_type): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_after_index_different_metric_type(self, index, _async, metric_type): """ target: test search with different metric type method: test search with different metric type @@ -2526,6 +2466,7 @@ class TestCollectionSearch(TestcaseBase): original_vectors.append(vectors_single) log.info(len(original_vectors)) # 3. create different index + params = cf.get_index_params_params(index) if params.get("m"): if (dim % params["m"]) != 0: params["m"] = dim // 4 @@ -2563,10 +2504,8 @@ class TestCollectionSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip(reason="issue 24957") - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) - def test_search_after_release_recreate_index(self, index, params, _async, metric_type): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_after_release_recreate_index(self, index, _async, metric_type): """ target: test search after new metric with different metric type method: test search after new metric with different metric type @@ -2593,6 +2532,7 @@ class TestCollectionSearch(TestcaseBase): vectors_single = [vectors_tmp[i][-1] for i in range(2500)] original_vectors.append(vectors_single) # 3. create different index + params = cf.get_index_params_params(index) if params.get("m"): if (dim % params["m"]) != 0: params["m"] = dim // 4 @@ -2633,10 +2573,8 @@ class TestCollectionSearch(TestcaseBase): "original_vectors": original_vectors}) @pytest.mark.tags(CaseLabel.GPU) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[9:11], - ct.default_index_params[9:11])) - def test_search_after_index_different_metric_type_gpu(self, index, params, _async): + @pytest.mark.parametrize("index", ct.all_index_types[9:11]) + def test_search_after_index_different_metric_type_gpu(self, index, _async): """ target: test search with different metric type method: test search with different metric type @@ -2652,6 +2590,7 @@ class TestCollectionSearch(TestcaseBase): dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] # 2. create different index + params = cf.get_index_params_params(index) if params.get("m"): if (dim % params["m"]) != 0: params["m"] = dim // 4 @@ -3761,12 +3700,10 @@ class TestCollectionSearch(TestcaseBase): "output_fields": output_fields}) @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) + @pytest.mark.parametrize("index", ct.all_index_types[:7]) @pytest.mark.parametrize("metrics", ct.float_metrics) @pytest.mark.parametrize("limit", [20, 1200]) - def test_search_output_field_vector_after_different_index_metrics(self, index, params, metrics, limit): + def test_search_output_field_vector_after_different_index_metrics(self, index, metrics, limit): """ target: test search with output vector field after different index method: 1. create a collection and insert data @@ -3778,6 +3715,7 @@ class TestCollectionSearch(TestcaseBase): collection_w, _vectors = self.init_collection_general(prefix, True, is_index=False)[:2] # 2. create index and load + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": metrics} collection_w.create_index(field_name, default_index) collection_w.load() @@ -4690,8 +4628,8 @@ class TestCollectionSearch(TestcaseBase): assert res[ct.default_string_field_name] == "abc" @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", zip(ct.all_index_types[1:4], ct.default_index_params[1:4])) - def test_search_repeatedly_ivf_index_same_limit(self, index, params): + @pytest.mark.parametrize("index", ct.all_index_types[1:4]) + def test_search_repeatedly_ivf_index_same_limit(self, index): """ target: test create collection repeatedly method: search twice, check the results is the same @@ -4703,6 +4641,7 @@ class TestCollectionSearch(TestcaseBase): collection_w = self.init_collection_general(prefix, True, nb, is_index=False)[0] # 2. insert data again + params = cf.get_index_params_params(index) index_params = {"metric_type": "COSINE", "index_type": index, "params": params} collection_w.create_index(default_search_field, index_params) @@ -4716,8 +4655,8 @@ class TestCollectionSearch(TestcaseBase): assert res1[i].ids == res2[i].ids @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", zip(ct.all_index_types[1:4], ct.default_index_params[1:4])) - def test_search_repeatedly_ivf_index_different_limit(self, index, params): + @pytest.mark.parametrize("index", ct.all_index_types[1:4]) + def test_search_repeatedly_ivf_index_different_limit(self, index): """ target: test create collection repeatedly method: search twice, check the results is the same @@ -4729,6 +4668,7 @@ class TestCollectionSearch(TestcaseBase): collection_w = self.init_collection_general(prefix, True, nb, is_index=False)[0] # 2. insert data again + params = cf.get_index_params_params(index) index_params = {"metric_type": "COSINE", "index_type": index, "params": params} collection_w.create_index(default_search_field, index_params) @@ -4838,10 +4778,8 @@ class TestSearchBase(TestcaseBase): f" [1, 16384], but got {top_k}"}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) - def test_search_index_empty_partition(self, index, params): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_index_empty_partition(self, index): """ target: test basic search function, all the search params are correct, test all index params, and build method: add vectors into collection, search with the given vectors, check the result @@ -4861,12 +4799,7 @@ class TestSearchBase(TestcaseBase): par = collection_w.partitions # collection_w.load() # 3. create different index - if params.get("m"): - if (dim % params["m"]) != 0: - params["m"] = dim // 4 - if params.get("PQM"): - if (dim % params["PQM"]) != 0: - params["PQM"] = dim // 4 + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} collection_w.create_index("float_vector", default_index) collection_w.load() @@ -4887,10 +4820,8 @@ class TestSearchBase(TestcaseBase): "limit": 0}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) - def test_search_index_partitions(self, index, params, get_top_k): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_index_partitions(self, index, get_top_k): """ target: test basic search function, all the search params are correct, test all index params, and build method: search collection with the given vectors and tags, check the result @@ -4905,12 +4836,7 @@ class TestSearchBase(TestcaseBase): dim=dim, is_index=False)[0:5] vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] # 2. create different index - if params.get("m"): - if (dim % params["m"]) != 0: - params["m"] = dim // 4 - if params.get("PQM"): - if (dim % params["PQM"]) != 0: - params["PQM"] = dim // 4 + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} collection_w.create_index("float_vector", default_index) @@ -4951,10 +4877,8 @@ class TestSearchBase(TestcaseBase): assert len(res[0]) <= top_k @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) - def test_search_ip_after_index(self, index, params): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_ip_after_index(self, index): """ target: test basic search function, all the search params are correct, test all index params, and build method: search with the given vectors, check the result @@ -4969,6 +4893,7 @@ class TestSearchBase(TestcaseBase): dim=dim, is_index=False)[0:5] vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] # 2. create ip index + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "IP"} collection_w.create_index("float_vector", default_index) collection_w.load() @@ -5012,10 +4937,8 @@ class TestSearchBase(TestcaseBase): assert abs(got - ref) <= epsilon @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) - def test_search_ip_index_empty_partition(self, index, params): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_ip_index_empty_partition(self, index): """ target: test basic search function, all the search params are correct, test all index params, and build method: add vectors into collection, search with the given vectors, check the result @@ -5033,8 +4956,8 @@ class TestSearchBase(TestcaseBase): partition_name = "search_partition_empty" collection_w.create_partition(partition_name=partition_name, description="search partition empty") par = collection_w.partitions - # collection_w.load() # 3. create different index + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "IP"} collection_w.create_index("float_vector", default_index) collection_w.load() @@ -5056,10 +4979,8 @@ class TestSearchBase(TestcaseBase): "limit": 0}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) - def test_search_ip_index_partitions(self, index, params): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_ip_index_partitions(self, index): """ target: test basic search function, all the search params are correct, test all index params, and build method: search collection with the given vectors and tags, check the result @@ -5075,8 +4996,8 @@ class TestSearchBase(TestcaseBase): vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] # 2. create partition par_name = collection_w.partitions[0].name - # collection_w.load() # 3. create different index + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "IP"} collection_w.create_index("float_vector", default_index) collection_w.load() @@ -5088,8 +5009,8 @@ class TestSearchBase(TestcaseBase): default_search_exp, [par_name]) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:7], ct.default_index_params[:7])) - def test_search_cosine_all_indexes(self, index, params): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_cosine_all_indexes(self, index): """ target: test basic search function, all the search params are correct, test all index params, and build method: search collection with the given vectors and tags, check the result @@ -5099,6 +5020,7 @@ class TestSearchBase(TestcaseBase): collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, is_index=False)[0:5] # 2. create index + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "COSINE"} collection_w.create_index("float_vector", default_index) collection_w.load() @@ -5291,8 +5213,8 @@ class TestSearchBase(TestcaseBase): "limit": top_k}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:6], ct.default_index_params[:6])) - def test_each_index_with_mmap_enabled_search(self, index, params): + @pytest.mark.parametrize("index", ct.all_index_types[:6]) + def test_each_index_with_mmap_enabled_search(self, index): """ target: test each index with mmap enabled search method: test each index with mmap enabled search @@ -5301,6 +5223,7 @@ class TestSearchBase(TestcaseBase): self._connect() c_name = cf.gen_unique_str(prefix) collection_w, _ = self.collection_wrap.init_collection(c_name, schema=cf.gen_default_collection_schema()) + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "L2"} collection_w.create_index(field_name, default_index, index_name="mmap_index") # mmap index @@ -5320,8 +5243,8 @@ class TestSearchBase(TestcaseBase): "limit": ct.default_limit}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", zip(ct.all_index_types[7:9], ct.default_index_params[7:9])) - def test_enable_mmap_search_for_binary_indexes(self, index, params): + @pytest.mark.parametrize("index", ct.all_index_types[7:9]) + def test_enable_mmap_search_for_binary_indexes(self, index): """ target: enable mmap for binary indexes method: enable mmap for binary indexes @@ -5333,7 +5256,7 @@ class TestSearchBase(TestcaseBase): default_schema = cf.gen_default_binary_collection_schema(auto_id=False, dim=dim, primary_field=ct.default_int64_field_name) collection_w, _ = self.collection_wrap.init_collection(c_name, schema=default_schema) - + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "JACCARD"} collection_w.create_index("binary_vector", default_index, index_name="binary_idx_name") @@ -6404,10 +6327,8 @@ class TestSearchPagination(TestcaseBase): assert res[0].ids == [] @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) - def test_search_pagination_after_different_index(self, index, params, offset, _async): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_pagination_after_different_index(self, index, offset, _async): """ target: test search pagination after different index method: test search pagination after different index and corresponding search params @@ -6421,12 +6342,7 @@ class TestSearchPagination(TestcaseBase): auto_id=auto_id, dim=dim, is_index=False)[0:5] # 2. create index and load - if params.get("m"): - if (dim % params["m"]) != 0: - params["m"] = dim // 4 - if params.get("PQM"): - if (dim % params["PQM"]) != 0: - params["PQM"] = dim // 4 + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "L2"} collection_w.create_index("float_vector", default_index) collection_w.load() @@ -6972,8 +6888,23 @@ class TestSearchDiskann(TestcaseBase): class TestCollectionRangeSearch(TestcaseBase): """ Test case of range search interface """ - @pytest.fixture(scope="function", - params=[default_nb, default_nb_medium]) + @pytest.fixture(scope="function", params=ct.all_index_types[:7]) + def index_type(self, request): + tags = request.config.getoption("--tags") + if CaseLabel.L2 not in tags: + if request.param not in ct.L0_index_types: + pytest.skip(f"skip index type {request.param}") + yield request.param + + @pytest.fixture(scope="function", params=ct.float_metrics) + def metric(self, request): + tags = request.config.getoption("--tags") + if CaseLabel.L2 not in tags: + if request.param not in ct.default_L0_metric: + pytest.skip(f"skip index type {request.param}") + yield request.param + + @pytest.fixture(scope="function", params=[default_nb, default_nb_medium]) def nb(self, request): yield request.param @@ -7012,48 +6943,74 @@ class TestCollectionRangeSearch(TestcaseBase): # The followings are valid range search cases ****************************************************************** """ + @pytest.mark.tags(CaseLabel.L0) + @pytest.mark.parametrize("vector_data_type", ct.all_float_vector_types) + def test_range_search_default(self, index_type, metric, vector_data_type): + """ + target: verify the range search returns correct results + method: 1. create collection, insert 8000 vectors, + 2. search with topk=1000 + 3. range search from the 30th-330th distance as filter + 4. verified the range search results is same as the search results in the range + """ + collection_w = self.init_collection_general(prefix, auto_id=True, insert_data=False, is_index=False, + vector_data_type=vector_data_type, with_json=False)[0] + nb = 2000 + for i in range(3): + data = cf.gen_general_default_list_data(nb=nb, auto_id=True, + vector_data_type=vector_data_type, with_json=False) + collection_w.insert(data) - @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("range_filter", [1000, 1000.0]) - @pytest.mark.parametrize("radius", [0, 0.0]) - def test_range_search_normal(self, nq, is_flush, radius, range_filter): - """ - target: test range search normal case - method: create connection, collection, insert and search - expected: search successfully with limit(topK) - """ - # 1. initialize with data - dim = 64 - auto_id = False - enable_dynamic_field = False - collection_w, _vectors, _, insert_ids, time_stamp = \ - self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, - enable_dynamic_field=enable_dynamic_field)[0:5] - # 2. get vectors that inserted into collection - vectors = [] - if enable_dynamic_field: - for vector in _vectors[0]: - vector = vector[ct.default_float_vec_field_name] - vectors.append(vector) - else: - vectors = np.array(_vectors[0]).tolist() - vectors = [vectors[i][-1] for i in range(nq)] - # 3. range search - range_search_params = {"metric_type": "COSINE", "params": {"radius": radius, - "range_filter": range_filter}} - search_res = collection_w.search(vectors[:nq], default_search_field, - range_search_params, default_limit, - default_search_exp, - check_task=CheckTasks.check_search_results, - check_items={"nq": nq, - "ids": insert_ids, - "limit": default_limit})[0] - log.info("test_range_search_normal: checking the distance of top 1") - for hits in search_res: - # verify that top 1 hit is itself,so min distance is 1.0 - assert abs(hits.distances[0] - 1.0) <= epsilon - # distances_tmp = list(hits.distances) - # assert distances_tmp.count(1.0) == 1 + collection_w.flush() + _index_params = {"index_type": "FLAT", "metric_type": metric, "params": {}} + collection_w.create_index(ct.default_float_vec_field_name, index_params=_index_params) + collection_w.load() + + for i in range(2): + with_growing = bool(i % 2) + if with_growing is True: + # add some growing segments + for _ in range(2): + data = cf.gen_general_default_list_data(nb=nb, auto_id=True, + vector_data_type=vector_data_type, with_json=False) + collection_w.insert(data) + + search_params = {"params": {}} + nq = 1 + search_vectors = cf.gen_vectors(nq, ct.default_dim, vector_data_type=vector_data_type) + search_res = collection_w.search(search_vectors, default_search_field, + search_params, limit=1000)[0] + assert len(search_res[0].ids) == 1000 + log.debug(f"search topk=1000 returns {len(search_res[0].ids)}") + check_topk = 300 + check_from = 30 + ids = search_res[0].ids[check_from:check_from + check_topk] + radius = search_res[0].distances[check_from + check_topk] + range_filter = search_res[0].distances[check_from] + + # rebuild the collection with test target index + collection_w.release() + collection_w.indexes[0].drop() + _index_params = {"index_type": index_type, "metric_type": metric, + "params": cf.get_index_params_params(index_type)} + collection_w.create_index(ct.default_float_vec_field_name, index_params=_index_params) + collection_w.load() + + params = cf.get_search_params_params(index_type) + params.update({"radius": radius, "range_filter": range_filter}) + if index_type == "HNSW": + params.update({"ef": check_topk+100}) + if index_type == "IVF_PQ": + params.update({"max_empty_result_buckets": 100}) + range_search_params = {"params": params} + range_res = collection_w.search(search_vectors, default_search_field, + range_search_params, limit=check_topk)[0] + range_ids = range_res[0].ids + # assert len(range_ids) == check_topk + log.debug(f"range search radius={radius}, range_filter={range_filter}, range results num: {len(range_ids)}") + hit_rate = round(len(set(ids).intersection(set(range_ids))) / len(set(ids)), 2) + log.debug(f"range search results with growing {bool(i % 2)} hit rate: {hit_rate}") + assert hit_rate >= 0.2 # issue #32630 to improve the accuracy @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("range_filter", [1000, 1000.0]) @@ -7605,10 +7562,8 @@ class TestCollectionRangeSearch(TestcaseBase): "_async": _async}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", - zip(range_search_supported_index, - range_search_supported_index_params)) - def test_range_search_after_different_index_with_params(self, index, params): + @pytest.mark.parametrize("index", range_search_supported_indexes) + def test_range_search_after_different_index_with_params(self, index): """ target: test range search after different index method: test range search after different index and corresponding search params @@ -7622,12 +7577,7 @@ class TestCollectionRangeSearch(TestcaseBase): dim=dim, is_index=False, enable_dynamic_field=enable_dynamic_field)[0:5] # 2. create index and load - if params.get("m"): - if (dim % params["m"]) != 0: - params["m"] = dim // 4 - if params.get("PQM"): - if (dim % params["PQM"]) != 0: - params["PQM"] = dim // 4 + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": "L2"} collection_w.create_index("float_vector", default_index) collection_w.load() @@ -7652,10 +7602,8 @@ class TestCollectionRangeSearch(TestcaseBase): "limit": default_limit}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", - zip(range_search_supported_index, - range_search_supported_index_params)) - def test_range_search_after_index_different_metric_type(self, index, params): + @pytest.mark.parametrize("index", range_search_supported_indexes) + def test_range_search_after_index_different_metric_type(self, index): """ target: test range search with different metric type method: test range search with different metric type @@ -7669,12 +7617,7 @@ class TestCollectionRangeSearch(TestcaseBase): partition_num=1, dim=dim, is_index=False)[0:5] # 2. create different index - if params.get("m"): - if (dim % params["m"]) != 0: - params["m"] = dim // 4 - if params.get("PQM"): - if (dim % params["PQM"]) != 0: - params["PQM"] = dim // 4 + params = cf.get_index_params_params(index) log.info("test_range_search_after_index_different_metric_type: Creating index-%s" % index) default_index = {"index_type": index, "params": params, "metric_type": "IP"} collection_w.create_index("float_vector", default_index) @@ -8435,7 +8378,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # delete data delete_ids = [i for i in range(50, 150)] collection_w.delete(f"int64 in {delete_ids}") @@ -8472,7 +8415,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # delete data delete_ids = [i for i in range(50, 150)] collection_w.delete(f"int64 in {delete_ids}") @@ -8509,7 +8452,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # delete data delete_ids = [i for i in range(50, 150)] collection_w.delete(f"int64 in {delete_ids}") @@ -8546,7 +8489,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # delete data delete_ids = [i for i in range(50, 150)] collection_w.delete(f"int64 in {delete_ids}") @@ -8620,7 +8563,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # load collection_w.load() # delete data @@ -8651,7 +8594,7 @@ class TestCollectionLoadOperation(TestcaseBase): 2. insert data 3. load one partition 4. delete half data in each partition - 5. release the collection + 5. release the collection and load one partition 6. search expected: No exception """ @@ -8669,13 +8612,12 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w.release() partition_w1.load() # search on collection, partition1, partition2 - collection_w.search(vectors[:1], field_name, default_search_params, 200, - check_task=CheckTasks.check_search_results, - check_items={"nq": 1, "limit": 50}) - collection_w.search(vectors[:1], field_name, default_search_params, 200, - partition_names=[partition_w1.name], - check_task=CheckTasks.check_search_results, - check_items={"nq": 1, "limit": 50}) + collection_w.query(expr='', output_fields=[ct.default_count_output], + check_task=CheckTasks.check_query_results, + check_items={"exp_res": [{ct.default_count_output: 50}]}) + partition_w1.query(expr='', output_fields=[ct.default_count_output], + check_task=CheckTasks.check_query_results, + check_items={"exp_res": [{ct.default_count_output: 50}]}) collection_w.search(vectors[:1], field_name, default_search_params, 200, partition_names=[partition_w2.name], check_task=CheckTasks.err_res, @@ -8697,7 +8639,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # load partition_w1.load() # delete data @@ -8734,7 +8676,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # load && release collection_w.load() partition_w1.release() @@ -8770,7 +8712,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # load && release partition_w1.load() collection_w.release() @@ -9084,7 +9026,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # flush collection_w.flush() # load && release @@ -9120,7 +9062,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # flush collection_w.flush() # load && release @@ -9156,7 +9098,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # flush collection_w.flush() # load && release @@ -9229,7 +9171,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # flush collection_w.flush() # load && release @@ -9266,7 +9208,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # load collection_w.load() # flush @@ -9343,7 +9285,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # load partition_w1.load() # flush @@ -9379,7 +9321,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # load && release collection_w.load() partition_w2.release() @@ -9415,7 +9357,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # load && release collection_w.load() collection_w.release() @@ -9521,7 +9463,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # load and release for i in range(5): collection_w.release() @@ -9576,7 +9518,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # load and release collection_w.load() partition_w3 = collection_w.create_partition("_default3")[0] @@ -9598,7 +9540,7 @@ class TestCollectionLoadOperation(TestcaseBase): collection_w = self.init_collection_general( prefix, True, 200, partition_num=1, is_index=False)[0] partition_w1, partition_w2 = collection_w.partitions - collection_w.create_index(default_search_field, default_index_params) + collection_w.create_index(default_search_field, ct.default_flat_index) # load and release partition_w1.load() partition_w3 = collection_w.create_partition("_default3")[0] @@ -10111,11 +10053,9 @@ class TestSearchIterator(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip("issue #25145") - @pytest.mark.parametrize("index, params", - zip(ct.all_index_types[:7], - ct.default_index_params[:7])) + @pytest.mark.parametrize("index", ct.all_index_types[:7]) @pytest.mark.parametrize("metrics", ct.float_metrics) - def test_search_iterator_after_different_index_metrics(self, index, params, metrics): + def test_search_iterator_after_different_index_metrics(self, index, metrics): """ target: test search iterator using different index method: 1. search iterator @@ -10125,6 +10065,7 @@ class TestSearchIterator(TestcaseBase): # 1. initialize with data batch_size = 100 collection_w = self.init_collection_general(prefix, True, is_index=False)[0] + params = cf.get_index_params_params(index) default_index = {"index_type": index, "params": params, "metric_type": metrics} collection_w.create_index(field_name, default_index) collection_w.load() @@ -10395,8 +10336,8 @@ class TestSearchGroupBy(TestcaseBase): check_items={"err_code": err_code, "err_msg": err_msg}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, params", zip(ct.all_index_types[:7], ct.default_index_params[:7])) - def test_search_group_by_unsupported_index(self, index, params): + @pytest.mark.parametrize("index", ct.all_index_types[:7]) + def test_search_group_by_unsupported_index(self, index): """ target: test search group by with the unsupported vector index method: 1. create a collection with data @@ -10410,6 +10351,7 @@ class TestSearchGroupBy(TestcaseBase): metric = "L2" collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False, is_all_data_type=True, with_json=False)[0] + params = cf.get_index_params_params(index) index_params = {"index_type": index, "params": params, "metric_type": metric} collection_w.create_index(ct.default_float_vec_field_name, index_params) collection_w.load() @@ -10780,7 +10722,6 @@ class TestCollectionHybridSearchValid(TestcaseBase): # The following are valid base cases for hybrid_search ****************************************************************** """ - @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("offset", [0, 5]) @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name]) @@ -10790,12 +10731,20 @@ class TestCollectionHybridSearchValid(TestcaseBase): method: create connection, collection, insert and search expected: hybrid search successfully with limit(topK) """ + self._connect() + # create db + db_name = cf.gen_unique_str(prefix) + self.database_wrap.create_database(db_name) + # using db and create collection + self.database_wrap.using_database(db_name) + # 1. initialize collection with data dim = 64 + enable_dynamic_field = True multiple_dim_array = [dim, dim] collection_w, _, _, insert_ids, time_stamp = \ self.init_collection_general(prefix, True, dim=dim, is_flush=is_flush, - primary_field=primary_field, + primary_field=primary_field, enable_dynamic_field=enable_dynamic_field, multiple_dim_array=multiple_dim_array, vector_data_type=vector_data_type)[0:5] # 2. extract vector field name @@ -10860,6 +10809,10 @@ class TestCollectionHybridSearchValid(TestcaseBase): for i in range(len(score_answer_nq[k][:default_limit])): assert score_answer_nq[k][i] - hybrid_res[k].distances[i] < hybrid_search_epsilon + # 9. drop db + collection_w.drop() + self.database_wrap.drop_database(db_name) + @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("nq", [16384]) def test_hybrid_search_normal_max_nq(self, nq): @@ -12599,8 +12552,8 @@ class TestSparseSearch(TestcaseBase): """ Add some test cases for the sparse vector """ @pytest.mark.tags(CaseLabel.L1) - @pytest.mark.parametrize("index, index_params", zip(ct.all_index_types[9:11], ct.default_index_params[9:11])) - def test_sparse_index_search(self, index, index_params): + @pytest.mark.parametrize("index", ct.all_index_types[9:11]) + def test_sparse_index_search(self, index): """ target: verify that sparse index for sparse vectors can be searched properly method: create connection, collection, insert and search @@ -12612,8 +12565,9 @@ class TestSparseSearch(TestcaseBase): collection_w, _ = self.collection_wrap.init_collection(c_name, schema=schema) data = cf.gen_default_list_sparse_data() collection_w.insert(data) - params = {"index_type": index, "metric_type": "IP", "params": index_params} - collection_w.create_index(ct.default_sparse_vec_field_name, params, index_name=index) + params = cf.get_index_params_params(index) + index_params = {"index_type": index, "metric_type": "IP", "params": params} + collection_w.create_index(ct.default_sparse_vec_field_name, index_params, index_name=index) collection_w.load() collection_w.search(data[-1][-1:], ct.default_sparse_vec_field_name, @@ -12623,9 +12577,9 @@ class TestSparseSearch(TestcaseBase): "limit": default_limit}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("index, index_params", zip(ct.all_index_types[9:11], ct.default_index_params[9:11])) + @pytest.mark.parametrize("index", ct.all_index_types[9:11]) @pytest.mark.parametrize("dim", [ct.min_sparse_vector_dim, 32768, ct.max_sparse_vector_dim]) - def test_sparse_index_dim(self, index, index_params, dim): + def test_sparse_index_dim(self, index, dim): """ target: validating the sparse index in different dimensions method: create connection, collection, insert and hybrid search @@ -12637,8 +12591,9 @@ class TestSparseSearch(TestcaseBase): collection_w, _ = self.collection_wrap.init_collection(c_name, schema=schema) data = cf.gen_default_list_sparse_data(dim=dim) collection_w.insert(data) - params = {"index_type": index, "metric_type": "IP", "params": index_params} - collection_w.create_index(ct.default_sparse_vec_field_name, params, index_name=index) + params = cf.get_index_params_params(index) + index_params = {"index_type": index, "metric_type": "IP", "params": params} + collection_w.create_index(ct.default_sparse_vec_field_name, index_params, index_name=index) collection_w.load() collection_w.search(data[-1][-1:], ct.default_sparse_vec_field_name, @@ -12649,8 +12604,8 @@ class TestSparseSearch(TestcaseBase): @pytest.mark.tags(CaseLabel.L2) @pytest.mark.skip(reason="issue #31485") - @pytest.mark.parametrize("index, index_params", zip(ct.all_index_types[9:11], ct.default_index_params[9:11])) - def test_sparse_index_enable_mmap_search(self, index, index_params): + @pytest.mark.parametrize("index", ct.all_index_types[9:11]) + def test_sparse_index_enable_mmap_search(self, index): """ target: verify that the sparse indexes of sparse vectors can be searched properly after turning on mmap method: create connection, collection, enable mmap, insert and search @@ -12664,8 +12619,9 @@ class TestSparseSearch(TestcaseBase): data = cf.gen_default_list_sparse_data() collection_w.insert(data) - params = {"index_type": index, "metric_type": "IP", "params": index_params} - collection_w.create_index(ct.default_sparse_vec_field_name, params, index_name=index) + params = cf.get_index_params_params(index) + index_params = {"index_type": index, "metric_type": "IP", "params": params} + collection_w.create_index(ct.default_sparse_vec_field_name, index_params, index_name=index) collection_w.set_properties({'mmap.enabled': True}) pro = collection_w.describe().get("properties") diff --git a/tests/python_client/utils/util_pymilvus.py b/tests/python_client/utils/util_pymilvus.py index 947e151800..7f334d7fb5 100644 --- a/tests/python_client/utils/util_pymilvus.py +++ b/tests/python_client/utils/util_pymilvus.py @@ -62,18 +62,6 @@ def binary_support(): return ["BIN_FLAT", "BIN_IVF_FLAT"] -def delete_support(): - return ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ"] - - -def ivf(): - return ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ"] - - -def skip_pq(): - return ["IVF_PQ"] - - def binary_metrics(): return ["JACCARD", "HAMMING", "SUBSTRUCTURE", "SUPERSTRUCTURE"] @@ -721,30 +709,6 @@ def gen_invalid_vectors(): return invalid_vectors -def gen_invaild_search_params(): - invalid_search_key = 100 - search_params = [] - for index_type in all_index_types: - if index_type == "FLAT": - continue - search_params.append({"index_type": index_type, "search_params": {"invalid_key": invalid_search_key}}) - if index_type in delete_support(): - for nprobe in gen_invalid_params(): - ivf_search_params = {"index_type": index_type, "search_params": {"nprobe": nprobe}} - search_params.append(ivf_search_params) - elif index_type in ["HNSW"]: - for ef in gen_invalid_params(): - hnsw_search_param = {"index_type": index_type, "search_params": {"ef": ef}} - search_params.append(hnsw_search_param) - elif index_type == "ANNOY": - for search_k in gen_invalid_params(): - if isinstance(search_k, int): - continue - annoy_search_param = {"index_type": index_type, "search_params": {"search_k": search_k}} - search_params.append(annoy_search_param) - return search_params - - def gen_invalid_index(): index_params = [] for index_type in gen_invalid_strs(): @@ -825,23 +789,6 @@ def gen_normal_expressions(): return expressions -def get_search_param(index_type, metric_type="L2"): - search_params = {"metric_type": metric_type} - if index_type in ivf() or index_type in binary_support(): - nprobe64 = {"nprobe": 64} - search_params.update({"params": nprobe64}) - elif index_type in ["HNSW"]: - ef64 = {"ef": 64} - search_params.update({"params": ef64}) - elif index_type == "ANNOY": - search_k = {"search_k": 1000} - search_params.update({"params": search_k}) - else: - log.error("Invalid index_type.") - raise Exception("Invalid index_type.") - return search_params - - def assert_equal_vector(v1, v2): if len(v1) != len(v2): assert False