test: Add tests for upsert with auto id (#35556)

Related issue: #34668 --------- Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
2024-08-20 14:20:56 +08:00 · 2024-08-20 14:20:56 +08:00 · 249dc4d9eb
parent e09dc3be58
commit 249dc4d9eb
4 changed files with 81 additions and 36 deletions
--- a/tests/python_client/base/collection_wrapper.py
+++ b/tests/python_client/base/collection_wrapper.py
@ -339,10 +339,10 @@ class ApiCollectionWrapper:
        return res, check_result

    @trace()
-    def compact(self, timeout=None, check_task=None, check_items=None, **kwargs):
+    def compact(self, is_clustering=False, timeout=None, check_task=None, check_items=None, **kwargs):
        timeout = TIMEOUT if timeout is None else timeout
        func_name = sys._getframe().f_code.co_name
-        res, check = api_request([self.collection.compact, timeout], **kwargs)
+        res, check = api_request([self.collection.compact, is_clustering, timeout], **kwargs)
        check_result = ResponseChecker(res, func_name, check_task, check_items, check, **kwargs).run()
        return res, check_result

--- a/tests/python_client/requirements.txt
+++ b/tests/python_client/requirements.txt
@ -12,8 +12,8 @@ allure-pytest==2.7.0
 pytest-print==0.2.1
 pytest-level==0.1.1
 pytest-xdist==2.5.0
-pymilvus==2.5.0rc45
-pymilvus[bulk_writer]==2.5.0rc45
+pymilvus==2.5.0rc70
+pymilvus[bulk_writer]==2.5.0rc70
 pytest-rerunfailures==9.1.1
 git+https://github.com/Projectplace/pytest-tags
 ndg-httpsclient
--- a/tests/python_client/testcases/test_insert.py
+++ b/tests/python_client/testcases/test_insert.py
@ -513,7 +513,7 @@ class TestInsertOperation(TestcaseBase):
        data = [vectors, ["limit_1___________",
                          "limit_2___________"], ['1', '2']]
        error = {ct.err_code: 999,
-                 ct.err_msg: "invalid input, length of string exceeds max length"}
+                 ct.err_msg: "length of string exceeds max length"}
        collection_w.insert(
            data, check_task=CheckTasks.err_res, check_items=error)

@ -815,16 +815,6 @@ class TestInsertOperation(TestcaseBase):
            t.join()
        assert collection_w.num_entities == ct.default_nb * thread_num

-    @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.skip(reason="Currently primary keys are not unique")
-    def test_insert_multi_threading_auto_id(self):
-        """
-        target: test concurrent insert auto_id=True collection
-        method: 1.create auto_id=True collection 2.concurrent insert
-        expected: verify primary keys unique
-        """
-        pass
-
    @pytest.mark.tags(CaseLabel.L1)
    def test_insert_multi_times(self, dim):
        """
@ -1211,11 +1201,11 @@ class TestInsertInvalid(TestcaseBase):
                                              check_items=error)

    @pytest.mark.tags(CaseLabel.L2)
-    def test_insert_invalid_with_pk_varchar_auto_id_true(self):
+    def test_insert_with_pk_varchar_auto_id_true(self):
        """
        target: test insert invalid with pk varchar and auto id true
        method: set pk varchar max length < 18, insert data
-        expected: raise exception
+        expected: varchar pk supports auto_id=true
        """
        string_field = cf.gen_string_field(is_primary=True, max_length=6)
        embedding_field = cf.gen_float_vec_field()
@ -1547,8 +1537,56 @@ class TestUpsertValid(TestcaseBase):
        res = collection_w.query(exp, output_fields=[default_float_name])[0]
        assert [res[i][default_float_name] for i in range(upsert_nb)] == float_values.to_list()

-    @pytest.mark.tags(CaseLabel.L2)
-    def test_upsert_with_primary_key_string(self):
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_upsert_with_auto_id(self):
+        """
+        target: test upsert with auto id
+        method: 1. create a collection with autoID=true
+                2. upsert 10 entities with non-existing pks
+                verify: success, and the pks are auto-generated
+                3. query 10 entities to get the existing pks
+                4. upsert 10 entities with existing pks
+                verify: success, and the pks are re-generated, and the new pks are visibly
+        """
+        dim = 32
+        collection_w, _, _, insert_ids, _ = self.init_collection_general(pre_upsert, auto_id=True,
+                                                                         dim=dim, insert_data=True, with_json=False)
+        nb = 10
+        start = ct.default_nb * 10
+        data = cf.gen_default_list_data(dim=dim, nb=nb, start=start, with_json=False)
+        res_upsert1 = collection_w.upsert(data=data)[0]
+        collection_w.flush()
+        # assert the pks are auto-generated, and num_entities increased for upsert with non_existing pks
+        assert res_upsert1.primary_keys[0] > insert_ids[-1]
+        assert collection_w.num_entities == ct.default_nb + nb
+
+        # query 10 entities to get the existing pks
+        res_q = collection_w.query(expr='', limit=nb)[0]
+        print(f"res_q: {res_q}")
+        existing_pks = [res_q[i][ct.default_int64_field_name] for i in range(nb)]
+        existing_count = collection_w.query(expr=f"{ct.default_int64_field_name} in {existing_pks}",
+                                            output_fields=[ct.default_count_output])[0]
+        assert nb == existing_count[0].get(ct.default_count_output)
+        # upsert 10 entities with the existing pks
+        start = ct.default_nb * 20
+        data = cf.gen_default_list_data(dim=dim, nb=nb, start=start, with_json=False)
+        data[0] = existing_pks
+        res_upsert2 = collection_w.upsert(data=data)[0]
+        collection_w.flush()
+        # assert the new pks are auto-generated again
+        assert res_upsert2.primary_keys[0] > res_upsert1.primary_keys[-1]
+        existing_count = collection_w.query(expr=f"{ct.default_int64_field_name} in {existing_pks}",
+                                            output_fields=[ct.default_count_output])[0]
+        assert 0 == existing_count[0].get(ct.default_count_output)
+        res_q = collection_w.query(expr=f"{ct.default_int64_field_name} in {res_upsert2.primary_keys}",
+                                   output_fields=["*"])[0]
+        assert nb == len(res_q)
+        current_count = collection_w.query(expr='', output_fields=[ct.default_count_output])[0]
+        assert current_count[0].get(ct.default_count_output) == ct.default_nb + nb
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.parametrize("auto_id", [True, False])
+    def test_upsert_with_primary_key_string(self, auto_id):
        """
        target: test upsert with string primary key
        method: 1. create a collection with pk string
@ -1558,11 +1596,18 @@ class TestUpsertValid(TestcaseBase):
        """
        c_name = cf.gen_unique_str(pre_upsert)
        fields = [cf.gen_string_field(), cf.gen_float_vec_field(dim=ct.default_dim)]
-        schema = cf.gen_collection_schema(fields=fields, primary_field=ct.default_string_field_name)
+        schema = cf.gen_collection_schema(fields=fields, primary_field=ct.default_string_field_name,
+                                          auto_id=auto_id)
        collection_w = self.init_collection_wrap(name=c_name, schema=schema)
        vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(2)]
-        collection_w.insert([["a", "b"], vectors])
-        collection_w.upsert([[" a", "b  "], vectors])
+        if not auto_id:
+            collection_w.insert([["a", "b"], vectors])
+            res_upsert = collection_w.upsert([[" a", "b  "], vectors])[0]
+            assert res_upsert.primary_keys[0] == " a" and res_upsert.primary_keys[1] == "b  "
+        else:
+            collection_w.insert([vectors])
+            res_upsert = collection_w.upsert([[" a", "b  "], vectors])[0]
+            assert res_upsert.primary_keys[0] != " a" and res_upsert.primary_keys[1] != "b  "
        assert collection_w.num_entities == 4

    @pytest.mark.tags(CaseLabel.L2)
@ -2046,7 +2091,7 @@ class TestUpsertInvalid(TestcaseBase):
                            check_task=CheckTasks.err_res, check_items=error)

    @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.skip("insert and upsert have removed the [] error check")
+    @pytest.mark.xfail("insert and upsert have removed the [] error check")
    def test_upsert_multi_partitions(self):
        """
        target: test upsert two partitions
@ -2066,20 +2111,20 @@ class TestUpsertInvalid(TestcaseBase):
                            check_task=CheckTasks.err_res, check_items=error)

    @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.skip(reason="smellthemoon: behavior changed")
-    def test_upsert_with_auto_id(self):
+    def test_upsert_with_auto_id_pk_type_dismacth(self):
        """
-        target: test upsert with auto id
-        method: 1. create a collection with autoID=true
-                2. upsert data no pk
+        target: test upsert with auto_id and pk type dismatch
+        method: 1. create a collection with pk int64 and auto_id=True
+                2. upsert with pk string type dismatch
        expected: raise exception
        """
-        collection_w = self.init_collection_general(pre_upsert, auto_id=True, is_index=False)[0]
-        error = {ct.err_code: 999,
-                 ct.err_msg: "Upsert don't support autoid == true"}
-        float_vec_values = cf.gen_vectors(ct.default_nb, ct.default_dim)
-        data = [[np.float32(i) for i in range(ct.default_nb)], [str(i) for i in range(ct.default_nb)],
-                float_vec_values]
+        dim = 16
+        collection_w = self.init_collection_general(pre_upsert, auto_id=False,
+                                                    dim=dim, insert_data=True, with_json=False)[0]
+        nb = 10
+        data = cf.gen_default_list_data(dim=dim, nb=nb, with_json=False)
+        data[0] = [str(i) for i in range(nb)]
+        error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
        collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)

    @pytest.mark.tags(CaseLabel.L2)
--- a/tests/python_client/testcases/test_utility.py
+++ b/tests/python_client/testcases/test_utility.py
@ -731,7 +731,7 @@ class TestUtilityBase(TestcaseBase):
        cw = self.init_collection_wrap(name=c_name)
        self.index_wrap.init_index(cw.collection, default_field_name, default_index_params)
        res, _ = self.utility_wrap.index_building_progress(c_name)
-        exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0}
+        exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0, 'state': 'Finished'}
        assert res == exp_res

    @pytest.mark.tags(CaseLabel.L2)
@ -822,7 +822,7 @@ class TestUtilityBase(TestcaseBase):
        cw.create_index(default_field_name, default_index_params)
        assert self.utility_wrap.wait_for_index_building_complete(c_name)[0]
        res, _ = self.utility_wrap.index_building_progress(c_name)
-        exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0}
+        exp_res = {'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0, 'state': 'Finished'}
        assert res == exp_res

    @pytest.mark.tags(CaseLabel.L1)