mirror of https://github.com/milvus-io/milvus.git
add diskann testcase with search,query (#20727)
Signed-off-by: jingkl <jingjing.jia@zilliz.com> Signed-off-by: jingkl <jingjing.jia@zilliz.com>pull/20721/head
parent
8e2083c8a1
commit
5e207c0785
|
@ -234,7 +234,7 @@ class ApiCollectionWrapper:
|
|||
|
||||
@trace()
|
||||
def create_index(self, field_name, index_params, index_name=None, check_task=None, check_items=None, **kwargs):
|
||||
disktimeout = 100
|
||||
disktimeout = 600
|
||||
timeout = kwargs.get("timeout", disktimeout * 2)
|
||||
index_name = INDEX_NAME if index_name is None else index_name
|
||||
index_name = kwargs.get("index_name", index_name)
|
||||
|
|
|
@ -14,7 +14,7 @@ class ApiIndexWrapper:
|
|||
|
||||
def init_index(self, collection, field_name, index_params, index_name=None, check_task=None, check_items=None,
|
||||
**kwargs):
|
||||
disktimeout = 100
|
||||
disktimeout = 600
|
||||
timeout = kwargs.get("timeout", disktimeout * 2)
|
||||
index_name = INDEX_NAME if index_name is None else index_name
|
||||
index_name = kwargs.get("index_name", index_name)
|
||||
|
|
|
@ -400,6 +400,10 @@ def gen_invaild_search_params_type():
|
|||
continue
|
||||
annoy_search_param = {"index_type": index_type, "search_params": {"search_k": search_k}}
|
||||
search_params.append(annoy_search_param)
|
||||
elif index_type == "DISKANN":
|
||||
for search_list in ct.get_invalid_ints:
|
||||
diskann_search_param = {"index_type": index_type, "search_params": {"search_list": search_list}}
|
||||
search_params.append(diskann_search_param)
|
||||
return search_params
|
||||
|
||||
|
||||
|
@ -425,6 +429,10 @@ def gen_search_param(index_type, metric_type="L2"):
|
|||
for search_k in [1000, 5000]:
|
||||
annoy_search_param = {"metric_type": metric_type, "params": {"search_k": search_k}}
|
||||
search_params.append(annoy_search_param)
|
||||
elif index_type == "DISKANN":
|
||||
for search_list in [20, 30]:
|
||||
diskann_search_param = {"metric_type": metric_type, "params": {"search_list": search_list}}
|
||||
search_params.append(diskann_search_param)
|
||||
else:
|
||||
log.error("Invalid index_type.")
|
||||
raise Exception("Invalid index_type.")
|
||||
|
@ -446,6 +454,11 @@ def gen_invalid_search_param(index_type, metric_type="L2"):
|
|||
for search_k in ["-1"]:
|
||||
annoy_search_param = {"metric_type": metric_type, "params": {"search_k": search_k}}
|
||||
search_params.append(annoy_search_param)
|
||||
elif index_type == "DISKANN":
|
||||
for search_list in ["-1"]:
|
||||
diskann_search_param = {"metric_type": metric_type, "params": {"search_list": search_list}}
|
||||
search_params.append(diskann_search_param)
|
||||
|
||||
else:
|
||||
log.error("Invalid index_type.")
|
||||
raise Exception("Invalid index_type.")
|
||||
|
|
|
@ -169,10 +169,10 @@ get_wrong_format_dict = [
|
|||
]
|
||||
|
||||
""" Specially defined list """
|
||||
all_index_types = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ", "HNSW", "ANNOY", "BIN_FLAT", "BIN_IVF_FLAT"]
|
||||
all_index_types = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ", "HNSW", "ANNOY", "DISKANN", "BIN_FLAT", "BIN_IVF_FLAT"]
|
||||
|
||||
default_index_params = [{"nlist": 128}, {"nlist": 128}, {"nlist": 128}, {"nlist": 128, "m": 16, "nbits": 8},
|
||||
{"M": 48, "efConstruction": 500}, {"n_trees": 50}, {"nlist": 128}, {"nlist": 128}]
|
||||
{"M": 48, "efConstruction": 500}, {"n_trees": 50}, {}, {"nlist": 128}, {"nlist": 128}]
|
||||
|
||||
Handler_type = ["GRPC", "HTTP"]
|
||||
binary_support = ["BIN_FLAT", "BIN_IVF_FLAT"]
|
||||
|
|
|
@ -70,6 +70,7 @@ class TestCompactionParams(TestcaseBase):
|
|||
log.debug(c_plans2.plans[0].target)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.skip(reason="https://github.com/milvus-io/milvus/issues/20747")
|
||||
def test_compact_partition(self):
|
||||
"""
|
||||
target: test compact partition
|
||||
|
|
|
@ -1580,11 +1580,11 @@ class TestIndexDiskann(TestcaseBase):
|
|||
collection_w = self.init_collection_wrap(name=c_name)
|
||||
data = cf.gen_default_list_data()
|
||||
collection_w.insert(data=data)
|
||||
assert collection_w.num_entities == default_nb
|
||||
index, _ = self.index_wrap.init_index(collection_w.collection, default_float_vec_field_name, ct.default_diskann_index)
|
||||
log.info(self.index_wrap.params)
|
||||
cf.assert_equal_index(index, collection_w.indexes[0])
|
||||
collection_w.load()
|
||||
assert collection_w.num_entities == default_nb
|
||||
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
|
||||
search_res, _ = collection_w.search(vectors[:default_nq], default_search_field,
|
||||
ct.default_diskann_search_params, default_limit,
|
||||
|
@ -1625,6 +1625,7 @@ class TestIndexDiskann(TestcaseBase):
|
|||
collection_w = self.init_collection_wrap(c_name)
|
||||
data = cf.gen_default_list_data()
|
||||
collection_w.insert(data=data)
|
||||
assert collection_w.num_entities == default_nb
|
||||
res, _ = collection_w.create_index(ct.default_float_vec_field_name, ct.default_diskann_index,
|
||||
index_name=ct.default_index_name, _async=_async,
|
||||
_callback=self.call_back())
|
||||
|
@ -1651,6 +1652,7 @@ class TestIndexDiskann(TestcaseBase):
|
|||
collection_w = self.init_collection_wrap(c_name)
|
||||
data = cf.gen_default_list_data()
|
||||
collection_w.insert(data=data)
|
||||
assert collection_w.num_entities == default_nb
|
||||
res, _ = collection_w.create_index(ct.default_float_vec_field_name, ct.default_diskann_index,
|
||||
index_name=ct.default_index_name, _async=_async)
|
||||
if _async:
|
||||
|
@ -1699,6 +1701,7 @@ class TestIndexDiskann(TestcaseBase):
|
|||
collection_w = self.init_collection_wrap(name=c_name)
|
||||
data = cf.gen_default_list_data()
|
||||
collection_w.insert(data=data)
|
||||
assert collection_w.num_entities == default_nb
|
||||
collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index, index_name=index_name1)
|
||||
collection_w.load()
|
||||
assert len(collection_w.indexes) == 1
|
||||
|
@ -1739,6 +1742,7 @@ class TestIndexDiskann(TestcaseBase):
|
|||
collection_w = self.init_collection_wrap(name=c_name)
|
||||
data = cf.gen_default_list_data()
|
||||
collection_w.insert(data=data)
|
||||
assert collection_w.num_entities == default_nb
|
||||
collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index, index_name="a")
|
||||
assert collection_w.has_index(index_name="a")[0] == True
|
||||
collection_w.create_index(default_string_field_name, default_string_index_params, index_name="b")
|
||||
|
@ -1797,6 +1801,7 @@ class TestIndexDiskann(TestcaseBase):
|
|||
collection_w = self.init_collection_wrap(name=c_name)
|
||||
data = cf.gen_default_list_data(default_nb)
|
||||
collection_w.insert(data=data)
|
||||
assert collection_w.num_entities == default_nb
|
||||
|
||||
def build(collection_w):
|
||||
|
||||
|
|
|
@ -1671,4 +1671,39 @@ class TestqueryString(TestcaseBase):
|
|||
res, _ = collection_w.query(expr, output_fields=output_fields)
|
||||
|
||||
assert len(res) == nb
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
def test_query_with_create_diskann_index(self):
|
||||
"""
|
||||
target: test query after create diskann index
|
||||
method: create a collection and build diskann index
|
||||
expected: verify query result
|
||||
"""
|
||||
collection_w, vectors = self.init_collection_general(prefix, insert_data=True, is_index=True)[0:2]
|
||||
|
||||
collection_w.create_index(ct.default_float_vec_field_name, ct.default_diskann_index)
|
||||
assert collection_w.has_index()[0]
|
||||
|
||||
collection_w.load()
|
||||
|
||||
int_values = [0]
|
||||
term_expr = f'{ct.default_int64_field_name} in {int_values}'
|
||||
check_vec = vectors[0].iloc[:, [0]][0:len(int_values)].to_dict('records')
|
||||
collection_w.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: check_vec})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
def test_query_with_create_diskann_with_string_pk(self):
|
||||
"""
|
||||
target: test query after create diskann index
|
||||
method: create a collection with string pk and build diskann index
|
||||
expected: verify query result
|
||||
"""
|
||||
collection_w, vectors = self.init_collection_general(prefix, insert_data=True, primary_field=ct.default_string_field_name, is_index=True)[0:2]
|
||||
collection_w.create_index(ct.default_float_vec_field_name, ct.default_diskann_index)
|
||||
assert collection_w.has_index()[0]
|
||||
collection_w.load()
|
||||
res = vectors[0].iloc[:, 1:3].to_dict('records')
|
||||
output_fields = [default_float_field_name, default_string_field_name]
|
||||
collection_w.query(default_mix_expr, output_fields=output_fields,
|
||||
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
|
||||
|
||||
|
|
|
@ -52,6 +52,7 @@ raw_vectors, binary_entities = gen_binary_entities(default_nb)
|
|||
default_query, _ = gen_search_vectors_params(field_name, entities, default_top_k, nq)
|
||||
index_name1 = cf.gen_unique_str("float")
|
||||
index_name2 = cf.gen_unique_str("varhar")
|
||||
half_nb = ct.default_nb // 2
|
||||
|
||||
|
||||
class TestCollectionSearchInvalid(TestcaseBase):
|
||||
|
@ -299,8 +300,8 @@ class TestCollectionSearchInvalid(TestcaseBase):
|
|||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("index, params",
|
||||
zip(ct.all_index_types[:6],
|
||||
ct.default_index_params[:6]))
|
||||
zip(ct.all_index_types[:7],
|
||||
ct.default_index_params[:7]))
|
||||
def test_search_invalid_params_type(self, index, params):
|
||||
"""
|
||||
target: test search with invalid search params
|
||||
|
@ -905,7 +906,7 @@ class TestCollectionSearch(TestcaseBase):
|
|||
def nq(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=[8, 128])
|
||||
@pytest.fixture(scope="function", params=[32, 128])
|
||||
def dim(self, request):
|
||||
yield request.param
|
||||
|
||||
|
@ -1475,8 +1476,8 @@ class TestCollectionSearch(TestcaseBase):
|
|||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("index, params",
|
||||
zip(ct.all_index_types[:6],
|
||||
ct.default_index_params[:6]))
|
||||
zip(ct.all_index_types[:7],
|
||||
ct.default_index_params[:7]))
|
||||
def test_search_after_different_index_with_params(self, dim, index, params, auto_id, _async):
|
||||
"""
|
||||
target: test search after different index
|
||||
|
@ -1553,8 +1554,8 @@ class TestCollectionSearch(TestcaseBase):
|
|||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("index, params",
|
||||
zip(ct.all_index_types[:6],
|
||||
ct.default_index_params[:6]))
|
||||
zip(ct.all_index_types[:7],
|
||||
ct.default_index_params[:7]))
|
||||
def test_search_after_index_different_metric_type(self, dim, index, params, auto_id, _async):
|
||||
"""
|
||||
target: test search with different metric type
|
||||
|
@ -2922,7 +2923,7 @@ class TestSearchBase(TestcaseBase):
|
|||
def get_nq(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=[8, 128])
|
||||
@pytest.fixture(scope="function", params=[32, 128])
|
||||
def dim(self, request):
|
||||
yield request.param
|
||||
|
||||
|
@ -2959,8 +2960,8 @@ class TestSearchBase(TestcaseBase):
|
|||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("index, params",
|
||||
zip(ct.all_index_types[:6],
|
||||
ct.default_index_params[:6]))
|
||||
zip(ct.all_index_types[:7],
|
||||
ct.default_index_params[:7]))
|
||||
def test_search_index_empty_partition(self, index, params):
|
||||
"""
|
||||
target: test basic search function, all the search params are correct, test all index params, and build
|
||||
|
@ -3008,8 +3009,8 @@ class TestSearchBase(TestcaseBase):
|
|||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("index, params",
|
||||
zip(ct.all_index_types[:6],
|
||||
ct.default_index_params[:6]))
|
||||
zip(ct.all_index_types[:7],
|
||||
ct.default_index_params[:7]))
|
||||
def test_search_index_partitions(self, index, params, get_top_k):
|
||||
"""
|
||||
target: test basic search function, all the search params are correct, test all index params, and build
|
||||
|
@ -3068,8 +3069,8 @@ class TestSearchBase(TestcaseBase):
|
|||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("index, params",
|
||||
zip(ct.all_index_types[:6],
|
||||
ct.default_index_params[:6]))
|
||||
zip(ct.all_index_types[:7],
|
||||
ct.default_index_params[:7]))
|
||||
def test_search_ip_after_index(self, index, params):
|
||||
"""
|
||||
target: test basic search function, all the search params are correct, test all index params, and build
|
||||
|
@ -3125,8 +3126,8 @@ class TestSearchBase(TestcaseBase):
|
|||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("index, params",
|
||||
zip(ct.all_index_types[:6],
|
||||
ct.default_index_params[:6]))
|
||||
zip(ct.all_index_types[:7],
|
||||
ct.default_index_params[:7]))
|
||||
def test_search_ip_index_empty_partition(self, index, params):
|
||||
"""
|
||||
target: test basic search function, all the search params are correct, test all index params, and build
|
||||
|
@ -3168,8 +3169,8 @@ class TestSearchBase(TestcaseBase):
|
|||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("index, params",
|
||||
zip(ct.all_index_types[:6],
|
||||
ct.default_index_params[:6]))
|
||||
zip(ct.all_index_types[:7],
|
||||
ct.default_index_params[:7]))
|
||||
def test_search_ip_index_partitions(self, index, params):
|
||||
"""
|
||||
target: test basic search function, all the search params are correct, test all index params, and build
|
||||
|
@ -3320,7 +3321,7 @@ class TestsearchString(TestcaseBase):
|
|||
def nq(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=[8, 128])
|
||||
@pytest.fixture(scope="function", params=[32, 128])
|
||||
def dim(self, request):
|
||||
yield request.param
|
||||
|
||||
|
@ -3762,7 +3763,7 @@ class TestsearchPagination(TestcaseBase):
|
|||
def offset(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=[8, 128])
|
||||
@pytest.fixture(scope="function", params=[32, 128])
|
||||
def dim(self, request):
|
||||
yield request.param
|
||||
|
||||
|
@ -4099,8 +4100,8 @@ class TestsearchPagination(TestcaseBase):
|
|||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("index, params",
|
||||
zip(ct.all_index_types[:6],
|
||||
ct.default_index_params[:6]))
|
||||
zip(ct.all_index_types[:7],
|
||||
ct.default_index_params[:7]))
|
||||
def test_search_pagination_after_different_index(self, index, params, auto_id, offset, _async):
|
||||
"""
|
||||
target: test search pagination after different index
|
||||
|
@ -4108,7 +4109,7 @@ class TestsearchPagination(TestcaseBase):
|
|||
expected: search successfully
|
||||
"""
|
||||
# 1. initialize with data
|
||||
dim = 8
|
||||
dim = 128
|
||||
collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 1000,
|
||||
partition_num=1,
|
||||
auto_id=auto_id,
|
||||
|
@ -4202,3 +4203,272 @@ class TestsearchPaginationInvalid(TestcaseBase):
|
|||
check_items={"err_code": 1,
|
||||
"err_msg": "offset [%d] is invalid, should be in range "
|
||||
"[1, 16385], but got %d" % (offset, offset)})
|
||||
|
||||
class TestsearchDiskann(TestcaseBase):
|
||||
"""
|
||||
******************************************************************
|
||||
The following cases are used to test search about diskann index
|
||||
******************************************************************
|
||||
"""
|
||||
@pytest.fixture(scope="function", params=[32, 128])
|
||||
def dim(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=[False, True])
|
||||
def auto_id(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.fixture(scope="function", params=[False ,True])
|
||||
def _async(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_diskann_index(self, dim, auto_id, _async):
|
||||
"""
|
||||
target: test delete after creating index
|
||||
method: 1.create collection , insert data, primary_field is int field
|
||||
2.create diskann index , then load
|
||||
3.search
|
||||
expected: search successfully
|
||||
"""
|
||||
# 1. initialize with data
|
||||
|
||||
nb = 2000
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, auto_id=auto_id,
|
||||
nb=nb, dim=dim,
|
||||
is_index=True)[0:4]
|
||||
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN", "metric_type":"L2", "params": {}}
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index)
|
||||
collection_w.load()
|
||||
|
||||
|
||||
|
||||
default_search_params ={"metric_type": "L2", "params": {"search_list": 30}}
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
_async=_async,
|
||||
travel_timestamp=0,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async}
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("limit", [20])
|
||||
@pytest.mark.parametrize("search_list", [10, 201])
|
||||
def test_search_invalid_params_with_diskann_A(self, dim, auto_id, search_list, limit):
|
||||
"""
|
||||
target: test delete after creating index
|
||||
method: 1.create collection , insert data, primary_field is int field
|
||||
2.create diskann index
|
||||
3.search with invalid params, where topk <=20, search list (topk, 200]
|
||||
expected: search report an error
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w, _, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=True)[0:4]
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN", "metric_type":"L2", "params": {}}
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index)
|
||||
collection_w.load()
|
||||
default_search_params ={"metric_type": "L2", "params": {"search_list": search_list}}
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
travel_timestamp=0,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 1,
|
||||
"err_msg": "fail to search on all shard leaders"}
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("limit", [6553])
|
||||
@pytest.mark.parametrize("search_list", [6553, 65531])
|
||||
def test_search_invalid_params_with_diskann_B(self, dim, auto_id, search_list, limit):
|
||||
"""
|
||||
target: test delete after creating index
|
||||
method: 1.create collection , insert data, primary_field is int field
|
||||
2.create diskann index
|
||||
3.search with invalid params, where 20< topk <= 6553, search list (topk, topk * 10]
|
||||
expected: search report an error
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w, _, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=True)[0:4]
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN", "metric_type":"L2", "params": {}}
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index)
|
||||
collection_w.load()
|
||||
default_search_params ={"metric_type": "L2", "params": {"search_list": search_list}}
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
travel_timestamp=0,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 1,
|
||||
"err_msg": "fail to search on all shard leaders"}
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("limit", [6554])
|
||||
@pytest.mark.parametrize("search_list", [6554, 65536])
|
||||
def test_search_invalid_params_with_diskann_C(self, dim, auto_id, search_list, limit):
|
||||
"""
|
||||
target: test delete after creating index
|
||||
method: 1.create collection , insert data, primary_field is int field
|
||||
2.create diskann index
|
||||
3.search with invalid params, where topk > 6553, search list (topk, 65535]
|
||||
expected: search report an error
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w, _, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=True)[0:4]
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN", "metric_type":"L2", "params": {}}
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index)
|
||||
collection_w.load()
|
||||
default_search_params ={"metric_type": "L2", "params": {"search_list": search_list}}
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
travel_timestamp=0,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={"err_code": 1,
|
||||
"err_msg": "fail to search on all shard leaders"}
|
||||
)
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_diskann_with_string_pk(self, dim):
|
||||
"""
|
||||
target: test delete after creating index
|
||||
method: 1.create collection , insert data, primary_field is string field
|
||||
2.create diskann index
|
||||
3.search with invalid metric type
|
||||
expected: search successfully
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w, _, _, insert_ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=False, dim=dim, is_index=True, primary_field=ct.default_string_field_name)[0:4]
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN", "metric_type":"L2", "params": {}}
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index)
|
||||
collection_w.load()
|
||||
search_list = 20
|
||||
default_search_params ={"metric_type": "L2", "params": {"search_list": search_list}}
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
travel_timestamp=0,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_delete_data(self, dim, auto_id, _async):
|
||||
"""
|
||||
target: test delete after creating index
|
||||
method: 1.create collection , insert data,
|
||||
2.create diskann index
|
||||
3.delete data, the search
|
||||
expected: assert index and deleted id not in search result
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w, _, _, ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=True)[0:4]
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN", "metric_type":"L2", "params": {}}
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index)
|
||||
collection_w.load()
|
||||
tmp_expr = f'{ct.default_int64_field_name} in {[0]}'
|
||||
|
||||
expr = f'{ct.default_int64_field_name} in {ids[:half_nb]}'
|
||||
|
||||
# delete half of data
|
||||
del_res = collection_w.delete(expr)[0]
|
||||
assert del_res.delete_count == half_nb
|
||||
|
||||
collection_w.delete(tmp_expr)
|
||||
default_search_params ={"metric_type": "L2", "params": {"search_list": 30}}
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
_async=_async,
|
||||
travel_timestamp=0,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_search_with_diskann_and_more_index(self, dim, auto_id, _async):
|
||||
"""
|
||||
target: test delete after creating index
|
||||
method: 1.create collection , insert data
|
||||
2.create more index ,then load
|
||||
3.delete half data, search
|
||||
expected: assert index and deleted id not in search result
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w, _, _, ids = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_index=True)[0:4]
|
||||
# 2. create index
|
||||
default_index = {"index_type": "DISKANN", "metric_type":"L2", "params": {}}
|
||||
collection_w.create_index(ct.default_float_vec_field_name, default_index, index_name=index_name1)
|
||||
index_params_one = {}
|
||||
collection_w.create_index("float", index_params_one, index_name="a")
|
||||
index_param_two ={}
|
||||
collection_w.create_index("varchar", index_param_two, index_name="b")
|
||||
|
||||
collection_w.load()
|
||||
tmp_expr = f'{ct.default_int64_field_name} in {[0]}'
|
||||
|
||||
expr = f'{ct.default_int64_field_name} in {ids[:half_nb]}'
|
||||
|
||||
# delete half of data
|
||||
del_res = collection_w.delete(expr)[0]
|
||||
assert del_res.delete_count == half_nb
|
||||
|
||||
collection_w.delete(tmp_expr)
|
||||
default_search_params ={"metric_type": "L2", "params": {"search_list": 30}}
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(default_nq)]
|
||||
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
_async=_async,
|
||||
travel_timestamp=0,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async}
|
||||
)
|
||||
|
|
|
@ -714,6 +714,7 @@ class TestUtilityBase(TestcaseBase):
|
|||
assert res['total_rows'] == nb
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.skip(reason='wait to modify')
|
||||
def test_index_process_collection_indexing(self):
|
||||
"""
|
||||
target: test building_process
|
||||
|
|
Loading…
Reference in New Issue