test: Add a new range search test for all indexes and align some index params (#32724)

related issue: https://github.com/milvus-io/milvus/issues/32653

1. align some default index params
2. add a new range search tests for all indexes and float vectors

---------

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
pull/32743/head
yanliang567 2024-04-30 17:37:27 +08:00 committed by GitHub
parent c70c21e70b
commit 5bb672d70d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 394 additions and 469 deletions

View File

@ -242,7 +242,8 @@ class TestcaseBase(Base):
expected: return collection and raw data, insert ids
"""
log.info("Test case of search interface: initialize before test case")
self._connect()
if not self.connection_wrap.has_connection(alias=DefaultConfig.DEFAULT_USING)[0]:
self._connect()
collection_name = cf.gen_unique_str(prefix)
if name is not None:
collection_name = name

View File

@ -359,9 +359,9 @@ def gen_collection_schema_all_datatype(description=ct.default_desc,
else:
multiple_dim_array.insert(0, dim)
for i in range(len(multiple_dim_array)):
fields.append(gen_float_vec_field(name=f"multiple_vector_{ct.vector_data_type_all[i%3]}",
fields.append(gen_float_vec_field(name=f"multiple_vector_{ct.all_float_vector_types[i%3]}",
dim=multiple_dim_array[i],
vector_data_type=ct.vector_data_type_all[i%3]))
vector_data_type=ct.all_float_vector_types[i%3]))
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id,
@ -485,8 +485,8 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi
def gen_general_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
vector_data_type="FLOAT_VECTOR", auto_id = False,
primary_field = ct.default_int64_field_name):
vector_data_type="FLOAT_VECTOR", auto_id=False,
primary_field=ct.default_int64_field_name):
insert_list = []
if not random_primary_key:
int_values = pd.Series(data=[i for i in range(start, start + nb)])
@ -496,14 +496,15 @@ def gen_general_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0,
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
json_values = [{"number": i, "float": i*1.0} for i in range(start, start + nb)]
float_vec_values = gen_vectors(nb, dim, vector_data_type=vector_data_type)
insert_list = [int_values, float_values, string_values, json_values, float_vec_values]
insert_list = [int_values, float_values, string_values]
if with_json is True:
insert_list.append(json_values)
insert_list.append(float_vec_values)
if with_json is False:
index = insert_list.index(json_values)
del insert_list[index]
if auto_id is True:
if primary_field == ct.default_int64_field_name:
index = insert_list.index(int_values)
index = 0
elif primary_field == ct.default_string_field_name:
index = 2
del insert_list[index]
@ -699,7 +700,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, w
df[ct.default_float_vec_field_name] = float_vec_values
else:
for i in range(len(multiple_dim_array)):
df[multiple_vector_field_name[i]] = gen_vectors(nb, multiple_dim_array[i], ct.vector_data_type_all[i%3])
df[multiple_vector_field_name[i]] = gen_vectors(nb, multiple_dim_array[i], ct.all_float_vector_types[i%3])
if with_json is False:
df.drop(ct.default_json_field_name, axis=1, inplace=True)
@ -737,7 +738,7 @@ def gen_general_list_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0
insert_list.append(float_vec_values)
else:
for i in range(len(multiple_dim_array)):
insert_list.append(gen_vectors(nb, multiple_dim_array[i], ct.vector_data_type_all[i%3]))
insert_list.append(gen_vectors(nb, multiple_dim_array[i], ct.all_float_vector_types[i%3]))
if with_json is False:
# index = insert_list.index(json_values)
@ -782,7 +783,7 @@ def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, st
else:
for i in range(len(multiple_dim_array)):
dict[multiple_vector_field_name[i]] = gen_vectors(nb, multiple_dim_array[i],
ct.vector_data_type_all[i])[0]
ct.all_float_vector_types[i])[0]
if len(multiple_dim_array) != 0:
with open(ct.rows_all_data_type_file_path + f'_{partition_id}' + f'_dim{dim}.txt', 'wb') as json_file:
pickle.dump(array, json_file)
@ -1233,7 +1234,7 @@ def gen_simple_index():
elif ct.all_index_types[i] in ct.sparse_support:
continue
dic = {"index_type": ct.all_index_types[i], "metric_type": "L2"}
dic.update({"params": ct.default_index_params[i]})
dic.update({"params": ct.default_all_indexes_params[i]})
index_params.append(dic)
return index_params
@ -1671,6 +1672,16 @@ def index_to_dict(index):
}
def get_index_params_params(index_type):
"""get default params of index params by index type"""
return ct.default_all_indexes_params[ct.all_index_types.index(index_type)]
def get_search_params_params(index_type):
"""get default params of search params by index type"""
return ct.default_all_search_params_params[ct.all_index_types.index(index_type)]
def assert_json_contains(expr, list_data):
opposite = False
if expr.startswith("not"):

View File

@ -14,14 +14,6 @@ default_nq = 2
default_limit = 10
default_batch_size = 1000
max_limit = 16384
default_search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}}
default_search_ip_params = {"metric_type": "IP", "params": {"nprobe": 10}}
default_search_binary_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
default_index = {"index_type": "IVF_SQ8", "metric_type": "COSINE", "params": {"nlist": 64}}
default_binary_index = {"index_type": "BIN_IVF_FLAT", "params": {"nlist": 128}, "metric_type": "JACCARD"}
default_diskann_index = {"index_type": "DISKANN", "metric_type": "COSINE", "params": {}}
default_diskann_search_params = {"metric_type": "COSINE", "params": {"search_list": 30}}
default_sparse_search_params = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}}
max_top_k = 16384
max_partition_num = 4096
max_role_num = 10
@ -52,7 +44,7 @@ default_binary_vec_field_name = "binary_vector"
float_type = "FLOAT_VECTOR"
float16_type = "FLOAT16_VECTOR"
bfloat16_type = "BFLOAT16_VECTOR"
vector_data_type_all = [float_type, float16_type, bfloat16_type]
all_float_vector_types = [float_type, float16_type, bfloat16_type]
default_sparse_vec_field_name = "sparse_vector"
default_partition_name = "_default"
default_resource_group_name = '__default_resource_group'
@ -108,11 +100,6 @@ code = "code"
err_code = "err_code"
err_msg = "err_msg"
in_cluster_env = "IN_CLUSTER"
default_flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "COSINE"}
default_bin_flat_index = {"index_type": "BIN_FLAT", "params": {}, "metric_type": "JACCARD"}
default_sparse_inverted_index = {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP",
"params": {"drop_ratio_build": 0.2}}
default_count_output = "count(*)"
rows_all_data_type_file_path = "/tmp/rows_all_data_type"
@ -250,26 +237,50 @@ get_wrong_format_dict = [
]
""" Specially defined list """
all_index_types = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ", "HNSW", "SCANN", "DISKANN", "BIN_FLAT", "BIN_IVF_FLAT",
"SPARSE_INVERTED_INDEX", "SPARSE_WAND", "GPU_IVF_FLAT", "GPU_IVF_PQ"]
L0_index_types = ["IVF_SQ8", "HNSW", "DISKANN"]
all_index_types = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ",
"HNSW", "SCANN", "DISKANN",
"BIN_FLAT", "BIN_IVF_FLAT",
"SPARSE_INVERTED_INDEX", "SPARSE_WAND",
"GPU_IVF_FLAT", "GPU_IVF_PQ"]
default_index_params = [{"nlist": 128}, {"nlist": 128}, {"nlist": 128}, {"nlist": 128, "m": 16, "nbits": 8},
{"M": 48, "efConstruction": 500}, {"nlist": 128}, {}, {"nlist": 128}, {"nlist": 128},
{"drop_ratio_build": 0.2}, {"drop_ratio_build": 0.2},
{"nlist": 64}, {"nlist": 64, "m": 16, "nbits": 8}]
default_all_indexes_params = [{}, {"nlist": 128}, {"nlist": 128}, {"nlist": 128, "m": 16, "nbits": 8},
{"M": 32, "efConstruction": 360}, {"nlist": 128}, {},
{}, {"nlist": 64},
{"drop_ratio_build": 0.2}, {"drop_ratio_build": 0.2},
{"nlist": 64}, {"nlist": 64, "m": 16, "nbits": 8}]
default_all_search_params_params = [{}, {"nprobe": 32}, {"nprobe": 32}, {"nprobe": 32},
{"ef": 100}, {"nprobe": 32, "reorder_k": 100}, {"search_list": 30},
{}, {"nprobe": 32},
{"drop_ratio_search": "0.2"}, {"drop_ratio_search": "0.2"},
{}, {}]
Handler_type = ["GRPC", "HTTP"]
binary_support = ["BIN_FLAT", "BIN_IVF_FLAT"]
delete_support = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ"]
ivf = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ"]
skip_pq = ["IVF_PQ"]
sparse_support = ["SPARSE_INVERTED_INDEX", "SPARSE_WAND"]
default_L0_metric = "COSINE"
float_metrics = ["L2", "IP", "COSINE"]
binary_metrics = ["JACCARD", "HAMMING", "SUBSTRUCTURE", "SUPERSTRUCTURE"]
structure_metrics = ["SUBSTRUCTURE", "SUPERSTRUCTURE"]
all_scalar_data_types = ['int8', 'int16', 'int32', 'int64', 'float', 'double', 'bool', 'varchar']
default_flat_index = {"index_type": "FLAT", "params": {}, "metric_type": default_L0_metric}
default_bin_flat_index = {"index_type": "BIN_FLAT", "params": {}, "metric_type": "JACCARD"}
default_sparse_inverted_index = {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP",
"params": {"drop_ratio_build": 0.2}}
default_search_params = {"params": default_all_search_params_params[2]}
default_search_ip_params = {"metric_type": "IP", "params": default_all_search_params_params[2]}
default_search_binary_params = {"metric_type": "JACCARD", "params": {"nprobe": 32}}
default_index = {"index_type": "IVF_SQ8", "metric_type": default_L0_metric, "params": default_all_indexes_params[2]}
default_binary_index = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": default_all_indexes_params[8]}
default_diskann_index = {"index_type": "DISKANN", "metric_type": default_L0_metric, "params": {}}
default_diskann_search_params = {"params": {"search_list": 30}}
default_sparse_search_params = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}}
class CheckTasks:
""" The name of the method used to check the result """
check_nothing = "check_nothing"

View File

@ -1,94 +1,94 @@
import datetime
import pytest
from base.client_base import TestcaseBase
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel
from utils.util_log import test_log as log
from pymilvus import utility
rounds = 100
per_nb = 100000
default_field_name = ct.default_float_vec_field_name
default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
class TestLoad(TestcaseBase):
""" Test case of end to end"""
@pytest.mark.tags(CaseLabel.L3)
def test_load_default(self):
name = 'load_test_collection_1'
name2 = 'load_test_collection_2'
# create
# collection_w = self.init_collection_wrap(name=name)
# collection_w2 = self.init_collection_wrap(name=name2)
# assert collection_w.name == name
for i in range(50):
name = f"load_collection2_{i}"
self.init_collection_wrap(name=name)
log.debug(f"total collections: {len(utility.list_collections())}")
# # insert
# data = cf.gen_default_list_data(per_nb)
# log.debug(f"data len: {len(data[0])}")
# for i in range(rounds):
# t0 = datetime.datetime.now()
# ins_res, res = collection_w.insert(data, timeout=180)
# tt = datetime.datetime.now() - t0
# log.debug(f"round{i} insert: {len(ins_res.primary_keys)} entities in {tt}s")
# assert res # and per_nb == len(ins_res.primary_keys)
#
# t0 = datetime.datetime.now()
# ins_res2, res = collection_w2.insert(data, timeout=180)
# tt = datetime.datetime.now() - t0
# log.debug(f"round{i} insert2: {len(ins_res2.primary_keys)} entities in {tt}s")
# assert res
#
# # flush
# t0 = datetime.datetime.now()
# log.debug(f"current collection num_entities: {collection_w.num_entities}")
# tt = datetime.datetime.now() - t0
# log.debug(f"round{i} flush in {tt}")
#
# t0 = datetime.datetime.now()
# log.debug(f"current collection2 num_entities: {collection_w2.num_entities}")
# tt = datetime.datetime.now() - t0
# log.debug(f"round{i} flush2 in {tt}")
# index, res = collection_w.create_index(default_field_name, default_index_params, timeout=60)
# assert res
# # search
# collection_w.load()
# search_vectors = cf.gen_vectors(1, ct.default_dim)
# t0 = datetime.datetime.now()
# res_1, _ = collection_w.search(data=search_vectors,
# anns_field=ct.default_float_vec_field_name,
# param={"nprobe": 16}, limit=1)
# tt = datetime.datetime.now() - t0
# log.debug(f"assert search: {tt}")
# assert len(res_1) == 1
# # collection_w.release()
#
# # index
# collection_w.insert(cf.gen_default_dataframe_data(nb=5000))
# assert collection_w.num_entities == len(data[0]) + 5000
# _index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
# t0 = datetime.datetime.now()
# index, _ = collection_w.create_index(field_name=ct.default_float_vec_field_name,
# index_params=_index_params,
# name=cf.gen_unique_str())
# tt = datetime.datetime.now() - t0
# log.debug(f"assert index: {tt}")
# assert len(collection_w.indexes) == 1
#
# # query
# term_expr = f'{ct.default_int64_field_name} in [3001,4001,4999,2999]'
# t0 = datetime.datetime.now()
# res, _ = collection_w.query(term_expr)
# tt = datetime.datetime.now() - t0
# log.debug(f"assert query: {tt}")
# assert len(res) == 4
# import datetime
# import pytest
#
# from base.client_base import TestcaseBase
# from common import common_func as cf
# from common import common_type as ct
# from common.common_type import CaseLabel
# from utils.util_log import test_log as log
# from pymilvus import utility
#
#
# rounds = 100
# per_nb = 100000
# default_field_name = ct.default_float_vec_field_name
# default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
#
#
# class TestLoad(TestcaseBase):
# """ Test case of end to end"""
# @pytest.mark.tags(CaseLabel.L3)
# def test_load_default(self):
# name = 'load_test_collection_1'
# name2 = 'load_test_collection_2'
# # create
# # collection_w = self.init_collection_wrap(name=name)
# # collection_w2 = self.init_collection_wrap(name=name2)
# # assert collection_w.name == name
#
# for i in range(50):
# name = f"load_collection2_{i}"
# self.init_collection_wrap(name=name)
# log.debug(f"total collections: {len(utility.list_collections())}")
#
# # # insert
# # data = cf.gen_default_list_data(per_nb)
# # log.debug(f"data len: {len(data[0])}")
# # for i in range(rounds):
# # t0 = datetime.datetime.now()
# # ins_res, res = collection_w.insert(data, timeout=180)
# # tt = datetime.datetime.now() - t0
# # log.debug(f"round{i} insert: {len(ins_res.primary_keys)} entities in {tt}s")
# # assert res # and per_nb == len(ins_res.primary_keys)
# #
# # t0 = datetime.datetime.now()
# # ins_res2, res = collection_w2.insert(data, timeout=180)
# # tt = datetime.datetime.now() - t0
# # log.debug(f"round{i} insert2: {len(ins_res2.primary_keys)} entities in {tt}s")
# # assert res
# #
# # # flush
# # t0 = datetime.datetime.now()
# # log.debug(f"current collection num_entities: {collection_w.num_entities}")
# # tt = datetime.datetime.now() - t0
# # log.debug(f"round{i} flush in {tt}")
# #
# # t0 = datetime.datetime.now()
# # log.debug(f"current collection2 num_entities: {collection_w2.num_entities}")
# # tt = datetime.datetime.now() - t0
# # log.debug(f"round{i} flush2 in {tt}")
#
# # index, res = collection_w.create_index(default_field_name, default_all_indexes_params, timeout=60)
# # assert res
#
# # # search
# # collection_w.load()
# # search_vectors = cf.gen_vectors(1, ct.default_dim)
# # t0 = datetime.datetime.now()
# # res_1, _ = collection_w.search(data=search_vectors,
# # anns_field=ct.default_float_vec_field_name,
# # param={"nprobe": 16}, limit=1)
# # tt = datetime.datetime.now() - t0
# # log.debug(f"assert search: {tt}")
# # assert len(res_1) == 1
# # # collection_w.release()
# #
# # # index
# # collection_w.insert(cf.gen_default_dataframe_data(nb=5000))
# # assert collection_w.num_entities == len(data[0]) + 5000
# # _index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
# # t0 = datetime.datetime.now()
# # index, _ = collection_w.create_index(field_name=ct.default_float_vec_field_name,
# # index_params=_index_params,
# # name=cf.gen_unique_str())
# # tt = datetime.datetime.now() - t0
# # log.debug(f"assert index: {tt}")
# # assert len(collection_w.indexes) == 1
# #
# # # query
# # term_expr = f'{ct.default_int64_field_name} in [3001,4001,4999,2999]'
# # t0 = datetime.datetime.now()
# # res, _ = collection_w.query(term_expr)
# # tt = datetime.datetime.now() - t0
# # log.debug(f"assert query: {tt}")
# # assert len(res) == 4

View File

@ -274,7 +274,7 @@ class TestMilvusClientIndexValid(TestcaseBase):
@pytest.mark.skip("https://github.com/milvus-io/pymilvus/issues/1886")
@pytest.mark.parametrize("index, params",
zip(ct.all_index_types[:7],
ct.default_index_params[:7]))
ct.default_all_indexes_params[:7]))
def test_milvus_client_index_default(self, index, params, metric_type):
"""
target: test search (high level api) normal case
@ -324,7 +324,7 @@ class TestMilvusClientIndexValid(TestcaseBase):
@pytest.mark.skip(reason="pymilvus issue 1884")
@pytest.mark.parametrize("index, params",
zip(ct.all_index_types[:7],
ct.default_index_params[:7]))
ct.default_all_indexes_params[:7]))
def test_milvus_client_index_with_params(self, index, params, metric_type):
"""
target: test search (high level api) normal case
@ -372,7 +372,7 @@ class TestMilvusClientIndexValid(TestcaseBase):
@pytest.mark.skip("wait for modification")
@pytest.mark.parametrize("index, params",
zip(ct.all_index_types[:7],
ct.default_index_params[:7]))
ct.default_all_indexes_params[:7]))
def test_milvus_client_index_after_insert(self, index, params, metric_type):
"""
target: test search (high level api) normal case
@ -518,7 +518,7 @@ class TestMilvusClientIndexValid(TestcaseBase):
@pytest.mark.skip("wait for modification")
@pytest.mark.parametrize("index, params",
zip(ct.all_index_types[:7],
ct.default_index_params[:7]))
ct.default_all_indexes_params[:7]))
def test_milvus_client_index_drop_create_same_index(self, index, params, metric_type):
"""
target: test search (high level api) normal case
@ -570,7 +570,7 @@ class TestMilvusClientIndexValid(TestcaseBase):
@pytest.mark.skip("wait for modification")
@pytest.mark.parametrize("index, params",
zip(ct.all_index_types[:7],
ct.default_index_params[:7]))
ct.default_all_indexes_params[:7]))
def test_milvus_client_index_drop_create_different_index(self, index, params, metric_type):
"""
target: test search (high level api) normal case

View File

@ -189,12 +189,12 @@ class TestRestartBase:
# # logging.getLogger().info(file)
# if file["field"] == field_name and file["name"] != "_raw":
# assert file["data_size"] > 0
# if file["index_type"] != default_index["index_type"]:
# if file["index_type"] != default_ivf_flat_index["index_type"]:
# continue
# for file in stats["partitions"][0]["segments"][0]["files"]:
# if file["field"] == field_name and file["name"] != "_raw":
# assert file["data_size"] > 0
# if file["index_type"] != default_index["index_type"]:
# if file["index_type"] != default_ivf_flat_index["index_type"]:
# assert False
# else:
# assert True

View File

@ -19,7 +19,7 @@ from pymilvus.exceptions import MilvusException
prefix = "index"
default_schema = cf.gen_default_collection_schema()
default_field_name = ct.default_float_vec_field_name
default_index_params = {"index_type": "IVF_SQ8", "metric_type": "COSINE", "params": {"nlist": 64}}
default_index_params = ct.default_index
default_autoindex_params = {"index_type": "AUTOINDEX", "metric_type": "IP"}
# copied from pymilvus
@ -33,9 +33,9 @@ index_name2 = cf.gen_unique_str("varhar")
index_name3 = cf.gen_unique_str("binary")
default_string_index_params = {}
default_binary_schema = cf.gen_default_binary_collection_schema()
default_binary_index_params = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}}
default_binary_index_params = ct.default_binary_index
# query = gen_search_vectors_params(field_name, default_entities, default_top_k, 1)
default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"}
default_ivf_flat_index = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"}
default_ip_index_params = {"index_type": "IVF_FLAT", "metric_type": "IP", "params": {"nlist": 64}}
default_nq = ct.default_nq
default_limit = ct.default_limit
@ -215,7 +215,7 @@ class TestIndexOperation(TestcaseBase):
self.index_wrap.init_index(collection_w.collection, default_field_name, default_index_params)
error = {ct.err_code: 65535, ct.err_msg: "CreateIndex failed: at most one "
"distinct index is allowed per field"}
self.index_wrap.init_index(collection_w.collection, default_field_name, default_index,
self.index_wrap.init_index(collection_w.collection, default_field_name, default_ivf_flat_index,
check_task=CheckTasks.err_res, check_items=error)
assert len(collection_w.indexes) == 1
@ -611,7 +611,7 @@ class TestNewIndexBase(TestcaseBase):
self.connection_wrap.remove_connection(ct.default_alias)
res_list, _ = self.connection_wrap.list_connections()
assert ct.default_alias not in res_list
collection_w.create_index(ct.default_float_vec_field_name, ct.default_index_params,
collection_w.create_index(ct.default_float_vec_field_name, ct.default_all_indexes_params,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1, ct.err_msg: "should create connect first"})
@ -716,7 +716,7 @@ class TestNewIndexBase(TestcaseBase):
collection_w = self.init_collection_wrap(name=c_name)
data = cf.gen_default_list_data()
collection_w.insert(data=data)
index_prams = [default_index, {"metric_type": "L2", "index_type": "IVF_SQ8", "params": {"nlist": 1024}}]
index_prams = [default_ivf_flat_index, {"metric_type": "L2", "index_type": "IVF_SQ8", "params": {"nlist": 1024}}]
for index in index_prams:
index_name = cf.gen_unique_str("name")
collection_w.create_index(default_float_vec_field_name, index, index_name=index_name)
@ -1122,7 +1122,7 @@ class TestNewIndexBase(TestcaseBase):
"limit": default_limit})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index, params", zip(ct.all_index_types[:6], ct.default_index_params[:6]))
@pytest.mark.parametrize("index, params", zip(ct.all_index_types[:6], ct.default_all_indexes_params[:6]))
def test_drop_mmap_index(self, index, params):
"""
target: disabling and re-enabling mmap for index
@ -1384,7 +1384,7 @@ class TestIndexInvalid(TestcaseBase):
"""
collection_name = get_collection_name
with pytest.raises(Exception) as e:
connect.create_index(collection_name, field_name, default_index)
connect.create_index(collection_name, field_name, default_ivf_flat_index)
@pytest.mark.tags(CaseLabel.L2)
def test_drop_index_with_invalid_collection_name(self, connect, get_collection_name):

View File

@ -25,8 +25,7 @@ default_schema = cf.gen_default_collection_schema()
default_binary_schema = cf.gen_default_binary_collection_schema()
default_index_params = {"index_type": "IVF_SQ8",
"metric_type": "L2", "params": {"nlist": 64}}
default_binary_index_params = {
"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}}
default_binary_index_params = ct.default_binary_index
default_search_exp = "int64 >= 0"

View File

@ -27,8 +27,8 @@ default_mix_expr = "int64 >= 0 && varchar >= \"0\""
default_expr = f'{ct.default_int64_field_name} >= 0'
default_invalid_expr = "varchar >= 0"
default_string_term_expr = f'{ct.default_string_field_name} in [\"0\", \"1\"]'
default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
binary_index_params = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": {"nlist": 64}}
default_index_params = ct.default_index
binary_index_params = ct.default_binary_index
default_entities = ut.gen_entities(ut.default_nb, is_normal=True)
default_pos = 5

File diff suppressed because it is too large Load Diff

View File

@ -62,18 +62,6 @@ def binary_support():
return ["BIN_FLAT", "BIN_IVF_FLAT"]
def delete_support():
return ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ"]
def ivf():
return ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ"]
def skip_pq():
return ["IVF_PQ"]
def binary_metrics():
return ["JACCARD", "HAMMING", "SUBSTRUCTURE", "SUPERSTRUCTURE"]
@ -721,30 +709,6 @@ def gen_invalid_vectors():
return invalid_vectors
def gen_invaild_search_params():
invalid_search_key = 100
search_params = []
for index_type in all_index_types:
if index_type == "FLAT":
continue
search_params.append({"index_type": index_type, "search_params": {"invalid_key": invalid_search_key}})
if index_type in delete_support():
for nprobe in gen_invalid_params():
ivf_search_params = {"index_type": index_type, "search_params": {"nprobe": nprobe}}
search_params.append(ivf_search_params)
elif index_type in ["HNSW"]:
for ef in gen_invalid_params():
hnsw_search_param = {"index_type": index_type, "search_params": {"ef": ef}}
search_params.append(hnsw_search_param)
elif index_type == "ANNOY":
for search_k in gen_invalid_params():
if isinstance(search_k, int):
continue
annoy_search_param = {"index_type": index_type, "search_params": {"search_k": search_k}}
search_params.append(annoy_search_param)
return search_params
def gen_invalid_index():
index_params = []
for index_type in gen_invalid_strs():
@ -825,23 +789,6 @@ def gen_normal_expressions():
return expressions
def get_search_param(index_type, metric_type="L2"):
search_params = {"metric_type": metric_type}
if index_type in ivf() or index_type in binary_support():
nprobe64 = {"nprobe": 64}
search_params.update({"params": nprobe64})
elif index_type in ["HNSW"]:
ef64 = {"ef": 64}
search_params.update({"params": ef64})
elif index_type == "ANNOY":
search_k = {"search_k": 1000}
search_params.update({"params": search_k})
else:
log.error("Invalid index_type.")
raise Exception("Invalid index_type.")
return search_params
def assert_equal_vector(v1, v2):
if len(v1) != len(v2):
assert False