From 84f7e2d89502970d7417a2202eabbd2c85ab3d42 Mon Sep 17 00:00:00 2001 From: yanliang567 <82361606+yanliang567@users.noreply.github.com> Date: Wed, 26 Feb 2025 16:49:59 +0800 Subject: [PATCH] test: Update search iterator result check for dup ids (#40190) Related issue: #40161 1. fix the search iterator result assertion for dup ids 2. use normalized vectors for iterator tests --------- Signed-off-by: yanliang567 --- tests/python_client/check/func_check.py | 4 +- .../test_milvus_client_search_iterator.py | 57 +++---- tests/python_client/testcases/test_search.py | 159 ++++-------------- 3 files changed, 60 insertions(+), 160 deletions(-) diff --git a/tests/python_client/check/func_check.py b/tests/python_client/check/func_check.py index 962744501d..ea589d4144 100644 --- a/tests/python_client/check/func_check.py +++ b/tests/python_client/check/func_check.py @@ -458,8 +458,8 @@ class ResponseChecker: if check_items.get("limit"): if "range_filter" not in check_items and "radius" not in check_items: assert len(pk_list) / check_items["limit"] >= 0.9 - assert len(pk_list) == len(set(pk_list)) - log.info("check: total %d results" % len(pk_list)) + log.debug(f"check: total {len(pk_list)} results, set len: {len(set(pk_list))}") + assert len(pk_list) == len(set(pk_list)) != 0 return True diff --git a/tests/python_client/milvus_client/test_milvus_client_search_iterator.py b/tests/python_client/milvus_client/test_milvus_client_search_iterator.py index c2425c0875..cd29878f17 100644 --- a/tests/python_client/milvus_client/test_milvus_client_search_iterator.py +++ b/tests/python_client/milvus_client/test_milvus_client_search_iterator.py @@ -39,7 +39,6 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base): yield request.param @pytest.mark.tags(CaseLabel.L1) - # @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/39045") def test_milvus_client_search_iterator_using_mul_db(self, search_params): """ target: test search iterator(high level api) case about mul db @@ -57,8 +56,7 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base): collections = self.list_collections(client)[0] assert collection_name in collections # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] self.insert(client, collection_name, rows) self.flush(client, collection_name) @@ -71,7 +69,7 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base): self.insert(client, collection_name, rows) self.flush(client, collection_name) # 5. search_iterator - vectors_to_search = rng.random((1, default_dim)) + vectors_to_search = cf.gen_vectors(1, default_dim) search_params = {"params": search_params} error_msg = "alias or database may have been changed" self.search_iterator(client, collection_name, vectors_to_search, batch_size, search_params=search_params, @@ -82,7 +80,6 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base): self.drop_collection(client, collection_name) @pytest.mark.tags(CaseLabel.L1) - # @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/39087") def test_milvus_client_search_iterator_alias_different_col(self, search_params): """ target: test search iterator(high level api) case about alias @@ -104,15 +101,14 @@ class TestMilvusClientSearchIteratorInValid(TestMilvusClientV2Base): collections = self.list_collections(client)[0] assert collection_name_new in collections # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] self.insert(client, collection_name, rows) self.flush(client, collection_name) self.insert(client, collection_name_new, rows) self.flush(client, collection_name_new) # 3. search_iterator - vectors_to_search = rng.random((1, default_dim)) + vectors_to_search = cf.gen_vectors(1, default_dim) search_params = {"params": search_params} error_msg = "alias or database may have been changed" self.search_iterator(client, alias, vectors_to_search, batch_size, search_params=search_params, @@ -168,13 +164,12 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): "dim": default_dim, "consistency_level": 0}) # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] self.insert(client, collection_name, rows) self.flush(client, collection_name) # 3. search iterator - vectors_to_search = rng.random((1, default_dim)) + vectors_to_search = cf.gen_vectors(1, default_dim) check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type} if "radius" in search_params: check_items["radius"] = search_params["radius"] @@ -211,14 +206,13 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): index_params.add_index(default_vector_field_name, metric_type=default_metric_type) self.create_collection(client, collection_name, dimension=dim, schema=schema, index_params=index_params) # 2. insert - rng = np.random.default_rng(seed=19530) rows = [ - {default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]), + {default_primary_key_field_name: str(i), default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), default_string_field_name: str(i), "nullable_field": None, "array_field": None} for i in range(default_nb)] self.insert(client, collection_name, rows) self.flush(client, collection_name) # 3. search iterator - vectors_to_search = rng.random((1, default_dim)) + vectors_to_search = cf.gen_vectors(1, default_dim) check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type} if "radius" in search_params: check_items["radius"] = search_params["radius"] @@ -254,14 +248,13 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): new_name = collection_name + "new" self.rename_collection(client, old_name, new_name) # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] self.insert(client, new_name, rows) self.flush(client, new_name) # assert self.num_entities(client, collection_name)[0] == default_nb # 3. search_iterator - vectors_to_search = rng.random((1, default_dim)) + vectors_to_search = cf.gen_vectors(1, default_dim) check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type} if "radius" in search_params: check_items["radius"] = search_params["radius"] @@ -288,17 +281,16 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): collections = self.list_collections(client)[0] assert collection_name in collections # 2. insert - rng = np.random.default_rng(seed=19530) rows = [{ default_primary_key_field_name: i, - default_vector_field_name: list(rng.random((1, default_dim))[0]), + default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), default_float_field_name: i * 1.0, default_int32_array_field_name: [i, i + 1, i + 2], default_string_array_field_name: [str(i), str(i + 1), str(i + 2)] } for i in range(default_nb)] self.insert(client, collection_name, rows) # 3. search iterator - vectors_to_search = rng.random((1, default_dim)) + vectors_to_search = cf.gen_vectors(1, default_dim) check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type} if "radius" in search_params: check_items["radius"] = search_params["radius"] @@ -321,14 +313,13 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): # 1. create collection self.create_collection(client, collection_name, default_dim, id_type="string", max_length=ct.default_length) # 2. insert - rng = np.random.default_rng(seed=19530) rows = [ - {default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]), + {default_primary_key_field_name: str(i), default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] self.insert(client, collection_name, rows) self.flush(client, collection_name) # 3. search_iterator - vectors_to_search = rng.random((1, default_dim)) + vectors_to_search = cf.gen_vectors(1, default_dim) check_items = {"batch_size": batch_size, "limit": default_nb, "metric_type": default_metric_type} if "radius" in search_params: check_items["radius"] = search_params["radius"] @@ -353,15 +344,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): self.create_collection(client, collection_name, default_dim, metric_type=metric_type, auto_id=auto_id, consistency_level="Strong") # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] if auto_id: for row in rows: row.pop(default_primary_key_field_name) self.insert(client, collection_name, rows) # 3. search_iterator - vectors_to_search = rng.random((1, default_dim)) + vectors_to_search = cf.gen_vectors(1, default_dim) limit = default_limit if default_limit < default_batch_size else default_batch_size check_items = {"batch_size": default_batch_size, "limit": limit, "metric_type": metric_type} if "radius" in search_params: @@ -390,15 +380,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): self.create_collection(client, collection_name, default_dim, metric_type=metric_type, auto_id=auto_id, consistency_level="Strong") # 2. insert - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] if auto_id: for row in rows: row.pop(default_primary_key_field_name) self.insert(client, collection_name, rows) # 3. search_iterator - vectors_to_search = rng.random((1, default_dim)) + vectors_to_search = cf.gen_vectors(1, default_dim) limit = default_limit if default_limit < default_batch_size else default_batch_size check_items = {"batch_size": default_batch_size, "limit": limit, "metric_type": metric_type} if "radius" in search_params: @@ -427,15 +416,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert default_nb = 1000 - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] pks = self.insert(client, collection_name, rows)[0] # 3. delete delete_num = 3 self.delete(client, collection_name, ids=[i for i in range(delete_num)]) # 4. search_iterator - vectors_to_search = rng.random((1, default_dim)) + vectors_to_search = cf.gen_vectors(1, default_dim) insert_ids = [i for i in range(default_nb)] for insert_id in range(delete_num): if insert_id in insert_ids: @@ -466,15 +454,14 @@ class TestMilvusClientSearchIteratorValid(TestMilvusClientV2Base): self.create_collection(client, collection_name, default_dim, consistency_level="Strong") # 2. insert default_nb = 1000 - rng = np.random.default_rng(seed=19530) - rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]), + rows = [{default_primary_key_field_name: i, default_vector_field_name: list(cf.gen_vectors(1, default_dim)[0]), default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)] pks = self.insert(client, collection_name, rows)[0] # 3. delete delete_num = 3 self.delete(client, collection_name, filter=f"id < {delete_num}") # 4. search_iterator - vectors_to_search = rng.random((1, default_dim)) + vectors_to_search = cf.gen_vectors(1, default_dim) insert_ids = [i for i in range(default_nb)] for insert_id in range(delete_num): if insert_id in insert_ids: diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 4d1c7f7f89..a53a211d34 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -10387,31 +10387,51 @@ class TestSearchIterator(TestcaseBase): """ Test case of search iterator """ @pytest.mark.tags(CaseLabel.L0) + @pytest.mark.parametrize("metric_type", ct.float_metrics) @pytest.mark.parametrize("vector_data_type", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"]) - def test_search_iterator_normal(self, vector_data_type): + def test_range_search_iterator_default(self, metric_type, vector_data_type): """ - target: test search iterator normal + target: test iterator range search method: 1. search iterator - 2. check the result, expect pk + 2. check the result, expect pk not repeat and meet the range requirements expected: search successfully """ # 1. initialize with data - dim = 128 - collection_w = self.init_collection_general(prefix, True, dim=dim, is_index=False, + batch_size = 100 + collection_w = self.init_collection_general(prefix, True, dim=default_dim, is_index=False, vector_data_type=vector_data_type)[0] - collection_w.create_index(field_name, {"metric_type": "L2"}) + collection_w.create_index(field_name, {"metric_type": metric_type}) collection_w.load() + search_vector = cf.gen_vectors(1, default_dim, vector_data_type) + search_params = {"metric_type": metric_type} + collection_w.search_iterator(search_vector, field_name, search_params, batch_size, + check_task=CheckTasks.check_search_iterator, + check_items={"metric_type": metric_type, + "batch_size": batch_size}) + + limit = 200 + res = collection_w.search(search_vector, field_name, param=search_params, limit=200, + check_task=CheckTasks.check_search_results, + check_items={"nq": 1, "limit": limit})[0] # 2. search iterator - search_params = {"metric_type": "L2"} - vectors = cf.gen_vectors_based_on_vector_type(1, dim, vector_data_type) - batch_size = 200 - collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size, - check_task=CheckTasks.check_search_iterator, - check_items={"batch_size": batch_size}) - batch_size = 600 - collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size, - check_task=CheckTasks.check_search_iterator, - check_items={"batch_size": batch_size}) + if metric_type != "L2": + radius = res[0][limit//2].distance - 0.1 # pick a radius to make sure there exists results + range_filter = res[0][0].distance + 0.1 + search_params = {"metric_type": metric_type, "params": {"radius": radius, "range_filter": range_filter}} + collection_w.search_iterator(search_vector, field_name, search_params, batch_size, + check_task=CheckTasks.check_search_iterator, + check_items={"metric_type": metric_type, "batch_size": batch_size, + "radius": radius, + "range_filter": range_filter}) + else: + radius = res[0][limit//2].distance + 0.1 + range_filter = res[0][0].distance - 0.1 + search_params = {"metric_type": metric_type, "params": {"radius": radius, "range_filter": range_filter}} + collection_w.search_iterator(search_vector, field_name, search_params, batch_size, + check_task=CheckTasks.check_search_iterator, + check_items={"metric_type": metric_type, "batch_size": batch_size, + "radius": radius, + "range_filter": range_filter}) @pytest.mark.tags(CaseLabel.L1) def test_search_iterator_binary(self): @@ -10455,113 +10475,6 @@ class TestSearchIterator(TestcaseBase): expr=expression, check_task=CheckTasks.check_search_iterator, check_items={}) - @pytest.mark.tags(CaseLabel.L2) - def test_range_search_iterator_L2(self): - """ - target: test iterator range search - method: 1. search iterator - 2. check the result, expect pk not repeat and meet the expr requirements - expected: search successfully - """ - # 1. initialize with data - batch_size = 100 - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] - collection_w.create_index(field_name, {"metric_type": "L2"}) - collection_w.load() - # 2. search iterator - search_params = {"metric_type": "L2", "params": {"radius": 35.0, "range_filter": 34.0}} - collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size, - check_task=CheckTasks.check_search_iterator, - check_items={"metric_type": "L2", - "radius": 35.0, - "range_filter": 34.0}) - - @pytest.mark.tags(CaseLabel.L2) - def test_range_search_iterator_IP(self): - """ - target: test iterator range search - method: 1. search iterator - 2. check the result, expect pk not repeat and meet the expr requirements - expected: search successfully - """ - # 1. initialize with data - batch_size = 100 - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] - collection_w.create_index(field_name, {"metric_type": "IP"}) - collection_w.load() - # 2. search iterator - search_params = {"metric_type": "IP", "params": {"radius": 0, "range_filter": 45}} - collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size, - check_task=CheckTasks.check_search_iterator, - check_items={"metric_type": "IP", - "radius": 0, - "range_filter": 45}) - - @pytest.mark.tags(CaseLabel.L1) - def test_range_search_iterator_COSINE(self): - """ - target: test iterator range search - method: 1. search iterator - 2. check the result, expect pk not repeat and meet the expr requirements - expected: search successfully - """ - # 1. initialize with data - batch_size = 100 - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] - collection_w.create_index(field_name, {"metric_type": "COSINE"}) - collection_w.load() - # 2. search iterator - search_params = {"metric_type": "COSINE", "params": {"radius": 0.8, "range_filter": 1}} - collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size, - check_task=CheckTasks.check_search_iterator, - check_items={"metric_type": "COSINE", - "radius": 0.8, - "range_filter": 1}) - - @pytest.mark.tags(CaseLabel.L2) - def test_range_search_iterator_only_radius(self): - """ - target: test search iterator normal - method: 1. search iterator - 2. check the result, expect pk not repeat and meet the expr requirements - expected: search successfully - """ - # 1. initialize with data - batch_size = 100 - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] - collection_w.create_index(field_name, {"metric_type": "L2"}) - collection_w.load() - # 2. search iterator - search_params = {"metric_type": "L2", "params": {"radius": 35.0}} - collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size, - check_task=CheckTasks.check_search_iterator, - check_items={"metric_type": "L2", - "radius": 35.0}) - - @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.skip("issue #25145") - @pytest.mark.parametrize("index", ct.all_index_types[:7]) - @pytest.mark.parametrize("metrics", ct.float_metrics) - def test_search_iterator_after_different_index_metrics(self, index, metrics): - """ - target: test search iterator using different index - method: 1. search iterator - 2. check the result, expect pk not repeat and meet the expr requirements - expected: search successfully - """ - # 1. initialize with data - batch_size = 100 - collection_w = self.init_collection_general(prefix, True, is_index=False)[0] - params = cf.get_index_params_params(index) - default_index = {"index_type": index, "params": params, "metric_type": metrics} - collection_w.create_index(field_name, default_index) - collection_w.load() - # 2. search iterator - search_params = {"metric_type": metrics} - collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size, - check_task=CheckTasks.check_search_iterator, - check_items={}) - @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("batch_size", [10, 100, 777, 1000]) def test_search_iterator_with_different_limit(self, batch_size):