test: add scalar index for compatibility testing (#39291)

/kind improvement Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
2025-01-17 14:33:10 +08:00 · 2025-01-17 14:33:10 +08:00 · bef042afd5
parent 3ca6c0127d
commit bef042afd5
2 changed files with 31 additions and 9 deletions
--- a/tests/python_client/common/common_func.py
+++ b/tests/python_client/common/common_func.py
@ -1803,6 +1803,16 @@ def get_int64_field_name(schema=None):
    return None


+def get_varchar_field_name(schema=None):
+    if schema is None:
+        schema = gen_default_collection_schema()
+    fields = schema.fields
+    for field in fields:
+        if field.dtype == DataType.VARCHAR:
+            return field.name
+    return None
+
+
 def get_text_field_name(schema=None):
    if schema is None:
        schema = gen_default_collection_schema()
--- a/tests/python_client/deploy/testcases/test_action_first_deployment.py
+++ b/tests/python_client/deploy/testcases/test_action_first_deployment.py
@ -67,11 +67,11 @@ class TestActionFirstDeployment(TestDeployBase):
    @pytest.mark.parametrize("replica_number", [0, 1, 2])
    @pytest.mark.parametrize("is_compacted", ["is_compacted", "not_compacted"])
    @pytest.mark.parametrize("is_deleted", ["is_deleted"])
-    @pytest.mark.parametrize("is_string_indexed", ["is_string_indexed", "not_string_indexed"])
+    @pytest.mark.parametrize("is_scalar_indexed", ["is_scalar_indexed", "not_scalar_indexed"])
    @pytest.mark.parametrize("segment_status", ["only_growing", "all"])
    @pytest.mark.parametrize("index_type", ["HNSW", "BIN_IVF_FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ"])
    def test_task_all(self, index_type, is_compacted,
-                      segment_status, is_string_indexed, replica_number, is_deleted, data_size):
+                      segment_status, is_scalar_indexed, replica_number, is_deleted, data_size):
        """
        before reinstall: create collection and insert data, load and search
        """
@ -90,7 +90,7 @@ class TestActionFirstDeployment(TestDeployBase):
            pytest.skip("skip test, not enough nodes")

        log.info(f"collection name: {name}, replica_number: {replica_number}, is_compacted: {is_compacted},"
-                 f"is_deleted: {is_deleted}, is_string_indexed: {is_string_indexed},"
+                 f"is_deleted: {is_deleted}, is_scalar_indexed: {is_scalar_indexed},"
                 f"segment_status: {segment_status}, index_type: {index_type}")

        is_binary = True if "BIN" in index_type else False
@ -119,12 +119,24 @@ class TestActionFirstDeployment(TestDeployBase):
        # create index for vector
        default_index_param = gen_index_param(index_type)
        collection_w.create_index(default_index_field, default_index_param)
-        # create index for string
-        if is_string_indexed == "is_string_indexed":
-            default_string_index_params = {}
-            default_string_index_name = "_default_string_idx"
-            collection_w.create_index(
-                default_string_field_name, default_string_index_params, index_name=default_string_index_name)
+        # create index for scalar
+        if is_scalar_indexed == "is_scalar_indexed":
+            int_field_name = cf.get_int64_field_name(schema=collection_w.schema)
+            # create stl sort index for int field
+            collection_w.create_index(int_field_name, {"index_type": "STL_SORT"})
+
+            varchar_field_name = cf.get_varchar_field_name(schema=collection_w.schema)
+            # 50% chance to create trie index for varchar field
+            if random.randint(0, 1) == 1:
+                collection_w.create_index(varchar_field_name, {"index_type": "TRIE"})
+            scalar_field_names = cf.get_scalar_field_name_list(schema=collection_w.schema)
+            indexes = [index.to_dict() for index in collection_w.indexes]
+            indexed_fields = [index['field'] for index in indexes]
+            # create inverted index for other scalar field
+            for f in scalar_field_names:
+                if f in indexed_fields:
+                    continue
+                collection_w.create_index(f, {"index_type": "INVERTED"},)

        # load for growing segment
        if replica_number >= 1: