test: Update search iterator result check for dup ids (#40190)

Related issue: #40161 
1. fix the search iterator result assertion for dup ids
2. use normalized vectors for iterator tests

---------

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
pull/40224/head
yanliang567 2025-02-26 16:49:59 +08:00 committed by GitHub
parent 32c00dbc1b
commit 84f7e2d895
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 60 additions and 160 deletions

View File

@ -458,8 +458,8 @@ class ResponseChecker:
if check_items.get("limit"):
if "range_filter" not in check_items and "radius" not in check_items:
assert len(pk_list) / check_items["limit"] >= 0.9
assert len(pk_list) == len(set(pk_list))
log.info("check: total %d results" % len(pk_list))
log.debug(f"check: total {len(pk_list)} results, set len: {len(set(pk_list))}")
assert len(pk_list) == len(set(pk_list)) != 0
return True

View File

@ -39,7 +39,6 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
yield request.param
@pytest.mark.tags(CaseLabel.L1)
# @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/39045")
def test_milvus_client_search_iterator_using_mul_db(self, search_params):
"""
target: test search iterator(high level api) case about mul db
@ -57,8 +56,7 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
collections = self.list_collections(client)[0]
assert collection_name in collections
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
@ -71,7 +69,7 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 5. search_iterator
vectors_to_search = rng.random((1, default_dim))
vectors_to_search = cf.gen_vectors(1, default_dim)
search_params = {"params": search_params}
error_msg = "alias or database may have been changed"
self.search_iterator(client, collection_name, vectors_to_search, batch_size, search_params=search_params,
@ -82,7 +80,6 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
# @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/39087")
def test_milvus_client_search_iterator_alias_different_col(self, search_params):
"""
target: test search iterator(high level api) case about alias
@ -104,15 +101,14 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base):
collections = self.list_collections(client)[0]
assert collection_name_new in collections
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
self.insert(client, collection_name_new, rows)
self.flush(client, collection_name_new)
# 3. search_iterator
vectors_to_search = rng.random((1, default_dim))
vectors_to_search = cf.gen_vectors(1, default_dim)
search_params = {"params": search_params}
error_msg = "alias or database may have been changed"
self.search_iterator(client, alias, vectors_to_search, batch_size, search_params=search_params,
@ -168,13 +164,12 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
"dim": default_dim,
"consistency_level": 0})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search iterator
vectors_to_search = rng.random((1, default_dim))
vectors_to_search = cf.gen_vectors(1, default_dim)
check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
@ -211,14 +206,13 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
index_params.add_index(default_vector_field_name, metric_type=default_metric_type)
self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [
{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]),
{default_primary_key_field_name: str(i), default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_string_field_name: str(i), "nullable_field": None, "array_field": None} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search iterator
vectors_to_search = rng.random((1, default_dim))
vectors_to_search = cf.gen_vectors(1, default_dim)
check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
@ -254,14 +248,13 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
new_name = collection_name + "new"
self.rename_collection(client, old_name, new_name)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, new_name, rows)
self.flush(client, new_name)
# assert self.num_entities(client, collection_name)[0] == default_nb
# 3. search_iterator
vectors_to_search = rng.random((1, default_dim))
vectors_to_search = cf.gen_vectors(1, default_dim)
check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
@ -288,17 +281,16 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
collections = self.list_collections(client)[0]
assert collection_name in collections
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{
default_primary_key_field_name: i,
default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0,
default_int32_array_field_name: [i, i + 1, i + 2],
default_string_array_field_name: [str(i), str(i + 1), str(i + 2)]
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 3. search iterator
vectors_to_search = rng.random((1, default_dim))
vectors_to_search = cf.gen_vectors(1, default_dim)
check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
@ -321,14 +313,13 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
# 1. create collection
self.create_collection(client, collection_name, default_dim, id_type="string", max_length=ct.default_length)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [
{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]),
{default_primary_key_field_name: str(i), default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
self.flush(client, collection_name)
# 3. search_iterator
vectors_to_search = rng.random((1, default_dim))
vectors_to_search = cf.gen_vectors(1, default_dim)
check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type}
if "radius" in search_params:
check_items["radius"] = search_params["radius"]
@ -353,15 +344,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
self.create_collection(client, collection_name, default_dim, metric_type=metric_type, auto_id=auto_id,
consistency_level="Strong")
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
if auto_id:
for row in rows:
row.pop(default_primary_key_field_name)
self.insert(client, collection_name, rows)
# 3. search_iterator
vectors_to_search = rng.random((1, default_dim))
vectors_to_search = cf.gen_vectors(1, default_dim)
limit = default_limit if default_limit < default_batch_size else default_batch_size
check_items = {"batch_size": default_batch_size, "limit": limit, "metric_type": metric_type}
if "radius" in search_params:
@ -390,15 +380,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
self.create_collection(client, collection_name, default_dim, metric_type=metric_type, auto_id=auto_id,
consistency_level="Strong")
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
if auto_id:
for row in rows:
row.pop(default_primary_key_field_name)
self.insert(client, collection_name, rows)
# 3. search_iterator
vectors_to_search = rng.random((1, default_dim))
vectors_to_search = cf.gen_vectors(1, default_dim)
limit = default_limit if default_limit < default_batch_size else default_batch_size
check_items = {"batch_size": default_batch_size, "limit": limit, "metric_type": metric_type}
if "radius" in search_params:
@ -427,15 +416,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
default_nb = 1000
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
pks = self.insert(client, collection_name, rows)[0]
# 3. delete
delete_num = 3
self.delete(client, collection_name, ids=[i for i in range(delete_num)])
# 4. search_iterator
vectors_to_search = rng.random((1, default_dim))
vectors_to_search = cf.gen_vectors(1, default_dim)
insert_ids = [i for i in range(default_nb)]
for insert_id in range(delete_num):
if insert_id in insert_ids:
@ -466,15 +454,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base):
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
default_nb = 1000
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
pks = self.insert(client, collection_name, rows)[0]
# 3. delete
delete_num = 3
self.delete(client, collection_name, filter=f"id < {delete_num}")
# 4. search_iterator
vectors_to_search = rng.random((1, default_dim))
vectors_to_search = cf.gen_vectors(1, default_dim)
insert_ids = [i for i in range(default_nb)]
for insert_id in range(delete_num):
if insert_id in insert_ids:

View File

@ -10387,31 +10387,51 @@ class TestSearchIterator(TestcaseBase):
""" Test case of search iterator """
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("metric_type", ct.float_metrics)
@pytest.mark.parametrize("vector_data_type", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
def test_search_iterator_normal(self, vector_data_type):
def test_range_search_iterator_default(self, metric_type, vector_data_type):
"""
target: test search iterator normal
target: test iterator range search
method: 1. search iterator
2. check the result, expect pk
2. check the result, expect pk not repeat and meet the range requirements
expected: search successfully
"""
# 1. initialize with data
dim = 128
collection_w = self.init_collection_general(prefix, True, dim=dim, is_index=False,
batch_size = 100
collection_w = self.init_collection_general(prefix, True, dim=default_dim, is_index=False,
vector_data_type=vector_data_type)[0]
collection_w.create_index(field_name, {"metric_type": "L2"})
collection_w.create_index(field_name, {"metric_type": metric_type})
collection_w.load()
search_vector = cf.gen_vectors(1, default_dim, vector_data_type)
search_params = {"metric_type": metric_type}
collection_w.search_iterator(search_vector, field_name, search_params, batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"metric_type": metric_type,
"batch_size": batch_size})
limit = 200
res = collection_w.search(search_vector, field_name, param=search_params, limit=200,
check_task=CheckTasks.check_search_results,
check_items={"nq": 1, "limit": limit})[0]
# 2. search iterator
search_params = {"metric_type": "L2"}
vectors = cf.gen_vectors_based_on_vector_type(1, dim, vector_data_type)
batch_size = 200
collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": batch_size})
batch_size = 600
collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": batch_size})
if metric_type != "L2":
radius = res[0][limit//2].distance - 0.1 # pick a radius to make sure there exists results
range_filter = res[0][0].distance + 0.1
search_params = {"metric_type": metric_type, "params": {"radius": radius, "range_filter": range_filter}}
collection_w.search_iterator(search_vector, field_name, search_params, batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"metric_type": metric_type, "batch_size": batch_size,
"radius": radius,
"range_filter": range_filter})
else:
radius = res[0][limit//2].distance + 0.1
range_filter = res[0][0].distance - 0.1
search_params = {"metric_type": metric_type, "params": {"radius": radius, "range_filter": range_filter}}
collection_w.search_iterator(search_vector, field_name, search_params, batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"metric_type": metric_type, "batch_size": batch_size,
"radius": radius,
"range_filter": range_filter})
@pytest.mark.tags(CaseLabel.L1)
def test_search_iterator_binary(self):
@ -10455,113 +10475,6 @@ class TestSearchIterator(TestcaseBase):
expr=expression, check_task=CheckTasks.check_search_iterator,
check_items={})
@pytest.mark.tags(CaseLabel.L2)
def test_range_search_iterator_L2(self):
"""
target: test iterator range search
method: 1. search iterator
2. check the result, expect pk not repeat and meet the expr requirements
expected: search successfully
"""
# 1. initialize with data
batch_size = 100
collection_w = self.init_collection_general(prefix, True, is_index=False)[0]
collection_w.create_index(field_name, {"metric_type": "L2"})
collection_w.load()
# 2. search iterator
search_params = {"metric_type": "L2", "params": {"radius": 35.0, "range_filter": 34.0}}
collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"metric_type": "L2",
"radius": 35.0,
"range_filter": 34.0})
@pytest.mark.tags(CaseLabel.L2)
def test_range_search_iterator_IP(self):
"""
target: test iterator range search
method: 1. search iterator
2. check the result, expect pk not repeat and meet the expr requirements
expected: search successfully
"""
# 1. initialize with data
batch_size = 100
collection_w = self.init_collection_general(prefix, True, is_index=False)[0]
collection_w.create_index(field_name, {"metric_type": "IP"})
collection_w.load()
# 2. search iterator
search_params = {"metric_type": "IP", "params": {"radius": 0, "range_filter": 45}}
collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"metric_type": "IP",
"radius": 0,
"range_filter": 45})
@pytest.mark.tags(CaseLabel.L1)
def test_range_search_iterator_COSINE(self):
"""
target: test iterator range search
method: 1. search iterator
2. check the result, expect pk not repeat and meet the expr requirements
expected: search successfully
"""
# 1. initialize with data
batch_size = 100
collection_w = self.init_collection_general(prefix, True, is_index=False)[0]
collection_w.create_index(field_name, {"metric_type": "COSINE"})
collection_w.load()
# 2. search iterator
search_params = {"metric_type": "COSINE", "params": {"radius": 0.8, "range_filter": 1}}
collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"metric_type": "COSINE",
"radius": 0.8,
"range_filter": 1})
@pytest.mark.tags(CaseLabel.L2)
def test_range_search_iterator_only_radius(self):
"""
target: test search iterator normal
method: 1. search iterator
2. check the result, expect pk not repeat and meet the expr requirements
expected: search successfully
"""
# 1. initialize with data
batch_size = 100
collection_w = self.init_collection_general(prefix, True, is_index=False)[0]
collection_w.create_index(field_name, {"metric_type": "L2"})
collection_w.load()
# 2. search iterator
search_params = {"metric_type": "L2", "params": {"radius": 35.0}}
collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"metric_type": "L2",
"radius": 35.0})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("issue #25145")
@pytest.mark.parametrize("index", ct.all_index_types[:7])
@pytest.mark.parametrize("metrics", ct.float_metrics)
def test_search_iterator_after_different_index_metrics(self, index, metrics):
"""
target: test search iterator using different index
method: 1. search iterator
2. check the result, expect pk not repeat and meet the expr requirements
expected: search successfully
"""
# 1. initialize with data
batch_size = 100
collection_w = self.init_collection_general(prefix, True, is_index=False)[0]
params = cf.get_index_params_params(index)
default_index = {"index_type": index, "params": params, "metric_type": metrics}
collection_w.create_index(field_name, default_index)
collection_w.load()
# 2. search iterator
search_params = {"metric_type": metrics}
collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("batch_size", [10, 100, 777, 1000])
def test_search_iterator_with_different_limit(self, batch_size):