Add test cases for search without flush (#17844)

Signed-off-by: Binbin Lv <binbin.lv@zilliz.com>
2022-06-28 20:00:24 +08:00 · 2022-06-28 20:00:24 +08:00 · 617e029dd3
parent ea901393ef
commit 617e029dd3
2 changed files with 49 additions and 11 deletions
--- a/tests/python_client/base/client_base.py
+++ b/tests/python_client/base/client_base.py
@ -124,7 +124,8 @@ class TestcaseBase(Base):

    def init_collection_general(self, prefix, insert_data=False, nb=ct.default_nb,
                                partition_num=0, is_binary=False, is_all_data_type=False,
-                                auto_id=False, dim=ct.default_dim, is_index=False, primary_field=ct.default_int64_field_name):
+                                auto_id=False, dim=ct.default_dim, is_index=False,
+                                primary_field=ct.default_int64_field_name, is_flush=True):
        """
        target: create specified collections
        method: 1. create collections (binary/non-binary, default/all data type, auto_id or not)
@ -157,8 +158,9 @@ class TestcaseBase(Base):
        if insert_data:
            collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp = \
                cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id, dim=dim)
-            assert collection_w.is_empty is False
-            assert collection_w.num_entities == nb
+            if is_flush:
+                assert collection_w.is_empty is False
+                assert collection_w.num_entities == nb
            log.info("insert_data: inserted data into collection %s (num_entities: %s)"
                     % (collection_w.name, nb))
            # This condition will be removed after auto index feature
--- a/tests/python_client/testcases/test_search.py
+++ b/tests/python_client/testcases/test_search.py
@ -850,6 +850,14 @@ class TestCollectionSearch(TestcaseBase):
    def _async(self, request):
        yield request.param

+    @pytest.fixture(scope="function", params=["JACCARD", "HAMMING", "TANIMOTO"])
+    def metrics(self, request):
+        yield request.param
+
+    @pytest.fixture(scope="function", params=[False, True])
+    def is_flush(self, request):
+        yield request.param
+
    """
    ******************************************************************
    #  The following are valid base cases
@ -857,7 +865,7 @@ class TestCollectionSearch(TestcaseBase):
    """

    @pytest.mark.tags(CaseLabel.L0)
-    def test_search_normal(self, nq, dim, auto_id):
+    def test_search_normal(self, nq, dim, auto_id, is_flush):
        """
        target: test search normal case
        method: create connection, collection, insert and search
@ -866,7 +874,7 @@ class TestCollectionSearch(TestcaseBase):
        """
        # 1. initialize with data
        collection_w, _, _, insert_ids, time_stamp = \
-            self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim)[0:5]
+            self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush)[0:5]
        # 2. search before insert time_stamp
        log.info("test_search_normal: searching collection %s" % collection_w.name)
        vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
@ -1503,7 +1511,7 @@ class TestCollectionSearch(TestcaseBase):

    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize("index", ["BIN_FLAT", "BIN_IVF_FLAT"])
-    def test_search_binary_jaccard_flat_index(self, nq, dim, auto_id, _async, index):
+    def test_search_binary_jaccard_flat_index(self, nq, dim, auto_id, _async, index, is_flush):
        """
        target: search binary_collection, and check the result: distance
        method: compare the return distance value with value computed with JACCARD
@ -1514,7 +1522,8 @@ class TestCollectionSearch(TestcaseBase):
                                                                                                  is_binary=True,
                                                                                                  auto_id=auto_id,
                                                                                                  dim=dim,
-                                                                                                  is_index=True)[0:5]
+                                                                                                  is_index=True,
+                                                                                                  is_flush=is_flush)[0:5]
        # 2. create index
        default_index = {"index_type": index, "params": {"nlist": 128}, "metric_type": "JACCARD"}
        collection_w.create_index("binary_vector", default_index)
@ -1541,7 +1550,7 @@ class TestCollectionSearch(TestcaseBase):

    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize("index", ["BIN_FLAT", "BIN_IVF_FLAT"])
-    def test_search_binary_hamming_flat_index(self, nq, dim, auto_id, _async, index):
+    def test_search_binary_hamming_flat_index(self, nq, dim, auto_id, _async, index, is_flush):
        """
        target: search binary_collection, and check the result: distance
        method: compare the return distance value with value computed with HAMMING
@ -1552,7 +1561,8 @@ class TestCollectionSearch(TestcaseBase):
                                                                                      is_binary=True,
                                                                                      auto_id=auto_id,
                                                                                      dim=dim,
-                                                                                      is_index=True)[0:4]
+                                                                                      is_index=True,
+                                                                                      is_flush=is_flush)[0:4]
        # 2. create index
        default_index = {"index_type": index, "params": {"nlist": 128}, "metric_type": "HAMMING"}
        collection_w.create_index("binary_vector", default_index)
@ -1579,7 +1589,7 @@ class TestCollectionSearch(TestcaseBase):
    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.xfail(reason="issue 6843")
    @pytest.mark.parametrize("index", ["BIN_FLAT", "BIN_IVF_FLAT"])
-    def test_search_binary_tanimoto_flat_index(self, nq, dim, auto_id, _async, index):
+    def test_search_binary_tanimoto_flat_index(self, nq, dim, auto_id, _async, index, is_flush):
        """
        target: search binary_collection, and check the result: distance
        method: compare the return distance value with value computed with TANIMOTO
@ -1590,7 +1600,8 @@ class TestCollectionSearch(TestcaseBase):
                                                                                      is_binary=True,
                                                                                      auto_id=auto_id,
                                                                                      dim=dim,
-                                                                                      is_index=True)[0:4]
+                                                                                      is_index=True,
+                                                                                      is_flush=is_flush)[0:4]
        log.info("auto_id= %s, _async= %s" % (auto_id, _async))
        # 2. create index
        default_index = {"index_type": index, "params": {"nlist": 128}, "metric_type": "TANIMOTO"}
@ -1615,6 +1626,31 @@ class TestCollectionSearch(TestcaseBase):
            res = res.result()
        assert abs(res[0].distances[0] - min(distance_0, distance_1)) <= epsilon

+    @pytest.mark.tags(CaseLabel.L2)
+    def test_search_binary_without_flush(self, metrics, auto_id):
+        """
+        target: test search without flush for binary data (no index)
+        method: create connection, collection, insert, load and search
+        expected: search successfully with limit(topK)
+        """
+        # 1. initialize a collection without data
+        collection_w = self.init_collection_general(prefix, is_binary=True, auto_id=auto_id)[0]
+        # 2. insert data
+        insert_ids = cf.insert_data(collection_w, default_nb, is_binary=True, auto_id=auto_id)[3]
+        # 3. load data
+        collection_w.load()
+        # 4. search
+        log.info("test_search_binary_without_flush: searching collection %s" % collection_w.name)
+        binary_vectors = cf.gen_binary_vectors(default_nq, default_dim)[1]
+        search_params = {"metric_type": metrics, "params": {"nprobe": 10}}
+        collection_w.search(binary_vectors[:default_nq], "binary_vector",
+                            search_params, default_limit,
+                            default_search_exp,
+                            check_task=CheckTasks.check_search_results,
+                            check_items={"nq": default_nq,
+                                         "ids": insert_ids,
+                                         "limit": default_limit})
+
    @pytest.mark.tags(CaseLabel.L2)
    def test_search_travel_time_without_expression(self, auto_id):
        """