add case about diskann and binary index (#26804)

Signed-off-by: nico <cheng.yuan@zilliz.com>
pull/26844/head
nico 2023-09-05 10:23:48 +08:00 committed by GitHub
parent 68a2940b66
commit f0c911afc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 52 additions and 17 deletions

View File

@ -1085,18 +1085,15 @@ class TestNewIndexBinary(TestcaseBase):
@pytest.mark.tags(CaseLabel.L2)
# @pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_index(self):
def test_create_binary_index_on_scalar_field(self):
"""
target: test create index interface
method: create collection and add entities in it, create index
expected: return search success
"""
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
df, _ = cf.gen_default_binary_dataframe_data()
collection_w.insert(data=df)
collection_w = self.init_collection_general(prefix, True, is_binary=True, is_index=False)[0]
collection_w.create_index(default_string_field_name, default_string_index_params, index_name=binary_field_name)
assert collection_w.has_index(index_name=binary_field_name)[0] == True
assert collection_w.has_index(index_name=binary_field_name)[0] is True
@pytest.mark.tags(CaseLabel.L0)
# @pytest.mark.timeout(BUILD_TIMEOUT)
@ -1166,14 +1163,21 @@ class TestNewIndexBinary(TestcaseBase):
c_name = cf.gen_unique_str(prefix)
collection_w = self.init_collection_wrap(name=c_name, schema=default_binary_schema)
binary_index_params = {'index_type': 'HNSW', "M": '18', "efConstruction": '240', 'metric_type': metric_type}
if metric_type == "TANIMOTO":
collection_w.create_index(default_binary_vec_field_name, binary_index_params,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "metric type not found or not supported"})
else:
collection_w.create_index(default_binary_vec_field_name, binary_index_params)
assert collection_w.index()[0].params == binary_index_params
collection_w.create_index(default_binary_vec_field_name, binary_index_params)
assert collection_w.index()[0].params == binary_index_params
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("metric", ct.binary_metrics)
def test_create_binary_index_all_metrics(self, metric):
"""
target: test create binary index using all supported metrics
method: create binary using all supported metrics
expected: succeed
"""
collection_w = self.init_collection_general(prefix, True, is_binary=True, is_index=False)[0]
binary_index_params = {"index_type": "BIN_FLAT", "metric_type": metric, "params": {"nlist": 64}}
collection_w.create_index(binary_field_name, binary_index_params)
assert collection_w.has_index()[0] is True
"""
******************************************************************

View File

@ -835,7 +835,7 @@ class TestQueryParams(TestcaseBase):
assert len(res) == 1
# test for mixed data
ids = [_id * 1.1, bool(_id % 2)]
ids = [[_id, str(_id)], bool(_id % 2)]
expression = f"{expr_prefix}(listMix, {ids})"
res = collection_w.query(expression)[0]
assert len(res) == 1

View File

@ -1200,6 +1200,7 @@ class TestCollectionSearchInvalid(TestcaseBase):
"err_msg": f"Data type and metric type miss-match"})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip(reason="SUPERSTRUCTURE and SUBSTRUCTURE are supported again now")
@pytest.mark.parametrize("metric", ["SUPERSTRUCTURE", "SUBSTRUCTURE"])
def test_range_search_binary_not_supported_metrics(self, metric):
"""
@ -5781,6 +5782,37 @@ class TestSearchDiskann(TestcaseBase):
"_async": _async}
)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("search_list", [20, 200])
def test_search_with_limit_20(self, _async, enable_dynamic_field, search_list):
"""
target: test delete after creating index
method: 1.create collection , insert data, primary_field is int field
2.create diskann index , then load
3.search
expected: search successfully
"""
limit = 20
# 1. initialize with data
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, is_index=False,
enable_dynamic_field=enable_dynamic_field)[0:4]
# 2. create index
default_index = {"index_type": "DISKANN", "metric_type": "L2", "params": {}}
collection_w.create_index(ct.default_float_vec_field_name, default_index)
collection_w.load()
search_params = {"metric_type": "L2", "params": {"search_list": search_list}}
output_fields = [default_int64_field_name, default_float_field_name, default_string_field_name]
collection_w.search(vectors[:default_nq], default_search_field,
search_params, limit, default_search_exp,
output_fields=output_fields, _async=_async,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": limit,
"_async": _async})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("limit", [1])
@pytest.mark.parametrize("search_list", [-1, 0, 201])
@ -5850,8 +5882,7 @@ class TestSearchDiskann(TestcaseBase):
output_fields=output_fields,
check_task=CheckTasks.err_res,
check_items={"err_code": 1,
"err_msg": "fail to search on all shard leaders"}
)
"err_msg": "fail to search on all shard leaders"})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("limit", [6553])