test: add more bitmap test cases (#36131)

1. verified issues #36054 and #35971 2. add mix scenes test cases for BITMAP index Signed-off-by: wangting0128 <ting.wang@zilliz.com>
2024-09-10 10:55:07 +08:00 · 2024-09-10 10:55:07 +08:00 · 53a87825f3
parent 5aedc169cd
commit 53a87825f3
4 changed files with 672 additions and 29 deletions
--- a/tests/python_client/common/code_mapping.py
+++ b/tests/python_client/common/code_mapping.py
@ -38,6 +38,7 @@ class IndexErrorMessage(ExceptionsMessage):
    VectorMetricTypeExist = "metric type not set for vector index"
    CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field"
    CheckBitmapOnPK = "create bitmap index on primary key not supported"
+    CheckBitmapCardinality = "failed to check bitmap cardinality limit, should be larger than 0 and smaller than 1000"


 class QueryErrorMessage(ExceptionsMessage):
--- a/tests/python_client/common/common_params.py
+++ b/tests/python_client/common/common_params.py
@ -377,3 +377,8 @@ class DefaultScalarIndexParams:
    @staticmethod
    def list_bitmap(fields: List[str]) -> Dict[str, IndexPrams]:
        return {n: IndexPrams(index_type=IndexName.BITMAP) for n in fields}
+
+
+class AlterIndexParams:
+    IndexOffsetCache = {'indexoffsetcache.enabled': True}
+    IndexMmap = {'mmap.enabled': True}
--- a/tests/python_client/testcases/test_index.py
+++ b/tests/python_client/testcases/test_index.py
@ -14,7 +14,7 @@ from common.common_type import CaseLabel, CheckTasks
 from common.code_mapping import CollectionErrorMessage as clem
 from common.code_mapping import IndexErrorMessage as iem
 from common.common_params import (
-    IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType
+    IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType, AlterIndexParams
 )

 from utils.util_pymilvus import *
@ -2299,7 +2299,8 @@ class TestInvertedIndexValid(TestcaseBase):
    def test_binary_arith_expr_on_inverted_index(self):
        prefix = "test_binary_arith_expr_on_inverted_index"
        nb = 5000
-        collection_w, _, _, insert_ids, _ = self.init_collection_general(prefix, insert_data=True, is_index=True, is_all_data_type=True)
+        collection_w, _, _, insert_ids, _ = self.init_collection_general(prefix, insert_data=True, is_index=True,
+                                                                         is_all_data_type=True)
        index_name = "test_binary_arith_expr_on_inverted_index"
        scalar_index_params = {"index_type": "INVERTED"}
        collection_w.release()
@ -2723,3 +2724,144 @@ class TestBitmapIndex(TestcaseBase):
        # check segment row number
        counts = [int(n.num_rows) for n in segment_info]
        assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
+
+    @pytest.mark.tags(CaseLabel.L2)
+    def test_bitmap_offset_cache_enable(self, request):
+        """
+        target:
+            1. alter index `{indexoffsetcache.enabled: true}` and rebuild index again
+        method:
+            1. create a collection with scalar fields
+            2. build BITMAP index on scalar fields
+            3. altering index `indexoffsetcache` enable
+            4. insert some data and flush
+            5. rebuild indexes with the same params again
+            6. load collection
+        expected:
+            1. alter index not failed
+            2. rebuild index not failed
+            3. load not failed
+        """
+        # init params
+        collection_name, primary_field, nb = f"{request.function.__name__}", "int64_pk", 3000
+
+        # create a collection with fields that can build `BITMAP` index
+        self.collection_wrap.init_collection(
+            name=collection_name,
+            schema=cf.set_collection_schema(
+                fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
+                field_params={primary_field: FieldParams(is_primary=True).to_dict},
+            )
+        )
+
+        # build `BITMAP` index on empty collection
+        index_params = {
+            **DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
+            **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
+        }
+        self.build_multi_index(index_params=index_params)
+        assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
+
+        # enable offset cache
+        for index_name in self.get_bitmap_support_dtype_names:
+            self.collection_wrap.alter_index(index_name=index_name, extra_params=AlterIndexParams.IndexOffsetCache)
+
+        # prepare data (> 1024 triggering index building)
+        self.collection_wrap.insert(data=cf.gen_values(self.collection_wrap.schema, nb=nb),
+                                    check_task=CheckTasks.check_insert_result)
+
+        # flush collection, segment sealed
+        self.collection_wrap.flush()
+
+        # rebuild `BITMAP` index
+        index_params = {
+            **DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
+            **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
+        }
+        self.build_multi_index(index_params=index_params)
+        assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
+
+        # load collection
+        self.collection_wrap.load()
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize("bitmap_cardinality_limit", [-10, 0, 1001])
+    def test_bitmap_cardinality_limit_invalid(self, request, bitmap_cardinality_limit):
+        """
+        target:
+            1. check auto index setting `bitmap_cardinality_limit` param
+        method:
+            1. create a collection with scalar fields
+            4. build scalar index with `bitmap_cardinality_limit`
+        expected:
+            1. build index failed
+        """
+        # init params
+        collection_name = f"{request.function.__name__}_{str(bitmap_cardinality_limit).replace('-', '_')}"
+        primary_field, nb = "int64_pk", 3000
+
+        # create a collection with fields that can build `BITMAP` index
+        self.collection_wrap.init_collection(
+            name=collection_name,
+            schema=cf.set_collection_schema(
+                fields=[primary_field, DataType.FLOAT_VECTOR.name, DataType.INT64.name],
+                field_params={primary_field: FieldParams(is_primary=True).to_dict},
+            )
+        )
+
+        # build scalar index and check failed
+        self.collection_wrap.create_index(
+            field_name=DataType.INT64.name, index_name=DataType.INT64.name,
+            index_params={"index_type": IndexName.AUTOINDEX, "bitmap_cardinality_limit": bitmap_cardinality_limit},
+            check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapCardinality})
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize("bitmap_cardinality_limit", [1, 1000])
+    def test_bitmap_cardinality_limit_enable(self, request, bitmap_cardinality_limit):
+        """
+        target:
+            1. check auto index setting `bitmap_cardinality_limit` not failed
+        method:
+            1. create a collection with scalar fields
+            2. insert some data and flush
+            3. build vector index
+            4. build scalar index with `bitmap_cardinality_limit`
+        expected:
+            1. alter index not failed
+            2. rebuild index not failed
+            3. load not failed
+
+        Notice:
+            This parameter setting does not automatically check whether the result meets expectations,
+            but is only used to verify that the index is successfully built.
+        """
+        # init params
+        collection_name, primary_field, nb = f"{request.function.__name__}_{bitmap_cardinality_limit}", "int64_pk", 3000
+
+        # create a collection with fields that can build `BITMAP` index
+        self.collection_wrap.init_collection(
+            name=collection_name,
+            schema=cf.set_collection_schema(
+                fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
+                field_params={primary_field: FieldParams(is_primary=True).to_dict},
+            )
+        )
+
+        # prepare data (> 1024 triggering index building)
+        self.collection_wrap.insert(data=cf.gen_values(self.collection_wrap.schema, nb=nb),
+                                    check_task=CheckTasks.check_insert_result)
+
+        # flush collection, segment sealed
+        self.collection_wrap.flush()
+
+        # build vector index
+        self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name))
+
+        # build scalar index
+        for scalar_field in self.get_bitmap_support_dtype_names:
+            self.collection_wrap.create_index(
+                field_name=scalar_field, index_name=scalar_field,
+                index_params={"index_type": IndexName.AUTOINDEX, "bitmap_cardinality_limit": bitmap_cardinality_limit})
+
+        # load collection
+        self.collection_wrap.load()
--- a/tests/python_client/testcases/test_mix_scenes.py
+++ b/tests/python_client/testcases/test_mix_scenes.py
@ -1,13 +1,14 @@
 import re
+import math  # do not remove `math`
 import pytest
-from pymilvus import DataType
+from pymilvus import DataType, AnnSearchRequest, RRFRanker

 from common.common_type import CaseLabel, CheckTasks
 from common import common_type as ct
 from common import common_func as cf
 from common.code_mapping import QueryErrorMessage as qem
 from common.common_params import (
-    IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType, Expr
+    FieldParams, MetricType, DefaultVectorIndexParams, DefaultScalarIndexParams, Expr, AlterIndexParams
 )
 from base.client_base import TestcaseBase, TestCaseClassBase

@ -54,7 +55,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
        # flush collection, segment sealed
        self.collection_wrap.flush()

-        # build `Hybrid index` on empty collection
+        # build vectors index
        index_params = {
            **DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name),
            **DefaultVectorIndexParams.IVF_FLAT(DataType.BFLOAT16_VECTOR.name),
@ -67,7 +68,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
        # load collection
        self.collection_wrap.load()

-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize("expr, output_fields", [
        (Expr.In(Expr.MOD('INT8', 13).subset, [0, 1, 2]).value, ['INT8']),
        (Expr.Nin(Expr.MOD('INT16', 100).subset, [10, 20, 30, 40]).value, ['INT16']),
@ -86,7 +87,6 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
        self.collection_wrap.query(expr=expr, check_task=CheckTasks.err_res,
                                   check_items={ct.err_code: 1100, ct.err_msg: qem.ParseExpressionFailed})

-    @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
    @pytest.mark.tags(CaseLabel.L1)
    @pytest.mark.parametrize(
        "expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64']))
@ -103,13 +103,14 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
            1. query response equal to min(insert data, limit)
        """
        # the total number of inserted data that matches the expression
-        expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
+        expr_count = len([i for i in self.insert_data.get(expr_field, []) if
+                          eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])

        # query
        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"

-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
    @pytest.mark.parametrize("limit", [1, 10, 3000])
    def test_no_index_query_with_string(self, expr, expr_field, limit, rex):
@ -130,7 +131,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"

-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize(
        "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
    @pytest.mark.parametrize("limit", [1, 10, 3000])
@ -168,7 +169,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
        self._connect(self)

        # init params
-        self.primary_field, nb = "int64_pk", 3000
+        self.primary_field, self.nb = "int64_pk", 3000

        # create a collection with fields
        self.collection_wrap.init_collection(
@ -186,7 +187,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
        )

        # prepare data (> 1024 triggering index building)
-        self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
+        self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)

    @pytest.fixture(scope="class", autouse=True)
    def prepare_data(self):
@ -195,7 +196,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
        # flush collection, segment sealed
        self.collection_wrap.flush()

-        # build `Hybrid index` on empty collection
+        # build `Hybrid index`
        index_params = {
            **DefaultVectorIndexParams.DISKANN(DataType.FLOAT16_VECTOR.name),
            **DefaultVectorIndexParams.IVF_SQ8(DataType.BFLOAT16_VECTOR.name),
@ -210,7 +211,6 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
        # load collection
        self.collection_wrap.load()

-    @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
    @pytest.mark.tags(CaseLabel.L1)
    @pytest.mark.parametrize(
        "expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64']))
@ -227,13 +227,14 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
            1. query response equal to min(insert data, limit)
        """
        # the total number of inserted data that matches the expression
-        expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
+        expr_count = len([i for i in self.insert_data.get(expr_field, []) if
+                          eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])

        # query
        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"

-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
    @pytest.mark.parametrize("limit", [1, 10, 3000])
    def test_hybrid_index_query_with_string(self, expr, expr_field, limit, rex):
@ -254,7 +255,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"

-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize(
        "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
    @pytest.mark.parametrize("limit", [1, 10, 3000])
@ -276,6 +277,22 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"

+    @pytest.mark.tags(CaseLabel.L1)
+    def test_hybrid_index_query_count(self):
+        """
+        target:
+            1. check query with count(*)
+        method:
+            1. prepare some data and build `Hybrid index` on scalar fields
+            2. query with count(*)
+            3. check query result
+        expected:
+            1. query response equal to insert nb
+        """
+        # query count(*)
+        self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
+                                   check_items={"exp_res": [{"count(*)": self.nb}]})
+

@pytest.mark.xdist_group("TestInvertedIndexDQLExpr")
 class TestInvertedIndexDQLExpr(TestCaseClassBase):
@ -319,7 +336,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
        # flush collection, segment sealed
        self.collection_wrap.flush()

-        # build `Hybrid index` on empty collection
+        # build `INVERTED index`
        index_params = {
            **DefaultVectorIndexParams.IVF_FLAT(DataType.FLOAT16_VECTOR.name),
            **DefaultVectorIndexParams.HNSW(DataType.BFLOAT16_VECTOR.name),
@ -334,7 +351,6 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
        # load collection
        self.collection_wrap.load()

-    @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
    @pytest.mark.tags(CaseLabel.L1)
    @pytest.mark.parametrize(
        "expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64']))
@ -351,13 +367,14 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
            1. query response equal to min(insert data, limit)
        """
        # the total number of inserted data that matches the expression
-        expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
+        expr_count = len([i for i in self.insert_data.get(expr_field, []) if
+                          eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])

        # query
        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"

-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
    @pytest.mark.parametrize("limit", [1, 10, 3000])
    def test_inverted_index_query_with_string(self, expr, expr_field, limit, rex):
@ -378,7 +395,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"

-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize(
        "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
    @pytest.mark.parametrize("limit", [1, 10, 3000])
@ -416,7 +433,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
        self._connect(self)

        # init params
-        self.primary_field, nb = "int64_pk", 3000
+        self.primary_field, self.nb = "int64_pk", 3000

        # create a collection with fields
        self.collection_wrap.init_collection(
@ -434,7 +451,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
        )

        # prepare data (> 1024 triggering index building)
-        self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
+        self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)

    @pytest.fixture(scope="class", autouse=True)
    def prepare_data(self):
@ -443,7 +460,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
        # flush collection, segment sealed
        self.collection_wrap.flush()

-        # build `Hybrid index` on empty collection
+        # build `BITMAP index`
        index_params = {
            **DefaultVectorIndexParams.HNSW(DataType.FLOAT16_VECTOR.name),
            **DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name),
@ -458,7 +475,6 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
        # load collection
        self.collection_wrap.load()

-    @pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
    @pytest.mark.tags(CaseLabel.L1)
    @pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64']))
    @pytest.mark.parametrize("limit", [1, 10, 3000])
@ -474,13 +490,14 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
            1. query response equal to min(insert data, limit)
        """
        # the total number of inserted data that matches the expression
-        expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
+        expr_count = len([i for i in self.insert_data.get(expr_field, []) if
+                          eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])

        # query
        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"

-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
    @pytest.mark.parametrize("limit", [1, 10, 3000])
    def test_bitmap_index_query_with_string(self, expr, expr_field, limit, rex):
@ -501,7 +518,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"

-    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize(
        "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
    @pytest.mark.parametrize("limit", [1, 10, 3000])
@ -522,3 +539,481 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
        # query
        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
+
+    @pytest.mark.tags(CaseLabel.L1)
+    def test_bitmap_index_query_count(self):
+        """
+        target:
+            1. check query with count(*)
+        method:
+            1. prepare some data and build `BITMAP index` on scalar fields
+            2. query with count(*)
+            3. check query result
+        expected:
+            1. query response equal to insert nb
+        """
+        # query count(*)
+        self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
+                                   check_items={"exp_res": [{"count(*)": self.nb}]})
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize("batch_size", [10, 1000])
+    def test_bitmap_index_search_iterator(self, batch_size):
+        """
+        target:
+            1. check search iterator with BITMAP index built on scalar fields
+        method:
+            1. prepare some data and build `BITMAP index` on scalar fields
+            2. search iterator and check result
+        expected:
+            1. search iterator with BITMAP index
+        """
+        search_params, vector_field = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR.name
+        self.collection_wrap.search_iterator(
+            cf.gen_vectors(nb=1, dim=3, vector_data_type=vector_field), vector_field, search_params, batch_size,
+            expr='int64_pk > 15', check_task=CheckTasks.check_search_iterator, check_items={"batch_size": batch_size})
+
+    @pytest.mark.tags(CaseLabel.L2)
+    def test_bitmap_index_hybrid_search(self):
+        """
+        target:
+            1. check hybrid search with expr
+        method:
+            1. prepare some data and build `BITMAP index` on scalar fields
+            2. hybrid search with expr
+        expected:
+            1. hybrid search with expr
+        """
+        nq, limit = 10, 10
+        vectors = cf.gen_field_values(self.collection_wrap.schema, nb=nq)
+
+        req_list = [
+            AnnSearchRequest(
+                data=vectors.get(DataType.FLOAT16_VECTOR.name), anns_field=DataType.FLOAT16_VECTOR.name,
+                param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
+                expr=Expr.In('INT64', [i for i in range(10, 30)]).value
+            ),
+            AnnSearchRequest(
+                data=vectors.get(DataType.BFLOAT16_VECTOR.name), anns_field=DataType.BFLOAT16_VECTOR.name,
+                param={"metric_type": MetricType.L2, "search_list": 30}, limit=limit,
+                expr=Expr.OR(Expr.GT(Expr.SUB('INT8', 30).subset, 10), Expr.LIKE('VARCHAR', 'a%')).value
+            ),
+            AnnSearchRequest(
+                data=vectors.get(DataType.SPARSE_FLOAT_VECTOR.name), anns_field=DataType.SPARSE_FLOAT_VECTOR.name,
+                param={"metric_type": MetricType.IP, "drop_ratio_search": 0.2}, limit=limit),
+            AnnSearchRequest(
+                data=vectors.get(DataType.BINARY_VECTOR.name), anns_field=DataType.BINARY_VECTOR.name,
+                param={"metric_type": MetricType.JACCARD, "nprobe": 128}, limit=limit)
+        ]
+        self.collection_wrap.hybrid_search(
+            req_list, RRFRanker(), limit, check_task=CheckTasks.check_search_results,
+            check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
+
+
+@pytest.mark.xdist_group("TestBitmapIndexOffsetCacheDQL")
+class TestBitmapIndexOffsetCache(TestCaseClassBase):
+    """
+    Scalar fields build BITMAP index, and altering index indexoffsetcache
+
+    Author: Ting.Wang
+    """
+
+    def setup_class(self):
+        super().setup_class(self)
+
+        # connect to server before testing
+        self._connect(self)
+
+        # init params
+        self.primary_field, self.nb = "int64_pk", 3000
+
+        # create a collection with fields
+        self.collection_wrap.init_collection(
+            name=cf.gen_unique_str("test_bitmap_index_dql_expr"),
+            schema=cf.set_collection_schema(
+                fields=[self.primary_field, DataType.FLOAT_VECTOR.name, *self().all_scalar_fields],
+                field_params={
+                    self.primary_field: FieldParams(is_primary=True).to_dict
+                },
+            )
+        )
+
+        # prepare data (> 1024 triggering index building)
+        self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
+
+    @pytest.fixture(scope="class", autouse=True)
+    def prepare_data(self):
+        self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result)
+
+        # flush collection, segment sealed
+        self.collection_wrap.flush()
+
+        # build `BITMAP index`
+        index_params = {
+            **DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
+            # build BITMAP index
+            **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
+        }
+        self.build_multi_index(index_params=index_params)
+        assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
+
+        # enable offset cache
+        for index_name in self.bitmap_support_dtype_names:
+            self.collection_wrap.alter_index(index_name=index_name, extra_params=AlterIndexParams.IndexOffsetCache)
+
+        # load collection
+        self.collection_wrap.load()
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64']))
+    @pytest.mark.parametrize("limit", [1, 10])
+    def test_bitmap_offset_cache_query_with_modulo(self, expr, expr_field, limit):
+        """
+        target:
+            1. check modulo expression
+        method:
+            1. prepare some data and build `BITMAP index` on scalar fields
+            2. query with the different expr and limit
+            3. check query result
+        expected:
+            1. query response equal to min(insert data, limit)
+        """
+        # the total number of inserted data that matches the expression
+        expr_count = len([i for i in self.insert_data.get(expr_field, []) if
+                          eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
+
+        # query
+        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=['*'])
+        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
+    @pytest.mark.parametrize("limit", [1, 10])
+    def test_bitmap_offset_cache_query_with_string(self, expr, expr_field, limit, rex):
+        """
+        target:
+            1. check string expression
+        method:
+            1. prepare some data and build `BITMAP index` on scalar fields
+            2. query with the different expr and limit
+            3. check query result
+        expected:
+            1. query response equal to min(insert data, limit)
+        """
+        # the total number of inserted data that matches the expression
+        expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None])
+
+        # query
+        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=['*'])
+        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize(
+        "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
+    @pytest.mark.parametrize("limit", [1, 10])
+    def test_bitmap_offset_cache_query_with_operation(self, expr, expr_field, limit):
+        """
+        target:
+            1. check number operation
+        method:
+            1. prepare some data and  build `BITMAP index` on scalar fields
+            2. query with the different expr and limit
+            3. check query result
+        expected:
+            1. query response equal to min(insert data, limit)
+        """
+        # the total number of inserted data that matches the expression
+        expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
+
+        # query
+        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=['*'])
+        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
+
+    @pytest.mark.tags(CaseLabel.L2)
+    def test_bitmap_offset_cache_query_count(self):
+        """
+        target:
+            1. check query with count(*)
+        method:
+            1. prepare some data and build `BITMAP index` on scalar fields
+            2. query with count(*)
+            3. check query result
+        expected:
+            1. query response equal to insert nb
+        """
+        # query count(*)
+        self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
+                                   check_items={"exp_res": [{"count(*)": self.nb}]})
+
+    @pytest.mark.tags(CaseLabel.L2)
+    def test_bitmap_offset_cache_hybrid_search(self):
+        """
+        target:
+            1. check hybrid search with expr
+        method:
+            1. prepare some data and build `BITMAP index` on scalar fields
+            2. hybrid search with expr
+        expected:
+            1. hybrid search with expr
+        """
+        nq, limit = 10, 10
+        vectors = cf.gen_field_values(self.collection_wrap.schema, nb=nq)
+
+        req_list = [
+            AnnSearchRequest(
+                data=vectors.get(DataType.FLOAT_VECTOR.name), anns_field=DataType.FLOAT_VECTOR.name,
+                param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
+                expr=Expr.In('INT64', [i for i in range(10, 30)]).value
+            ),
+            AnnSearchRequest(
+                data=vectors.get(DataType.FLOAT_VECTOR.name), anns_field=DataType.FLOAT_VECTOR.name,
+                param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
+                expr=Expr.OR(Expr.GT(Expr.SUB('INT8', 30).subset, 10), Expr.LIKE('VARCHAR', 'a%')).value
+            )
+        ]
+        self.collection_wrap.hybrid_search(
+            req_list, RRFRanker(), limit, check_task=CheckTasks.check_search_results,
+            check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
+
+
+@pytest.mark.xdist_group("TestBitmapIndexOffsetCacheDQL")
+class TestBitmapIndexMmap(TestCaseClassBase):
+    """
+    Scalar fields build BITMAP index, and altering index Mmap
+
+    Author: Ting.Wang
+    """
+
+    def setup_class(self):
+        super().setup_class(self)
+
+        # connect to server before testing
+        self._connect(self)
+
+        # init params
+        self.primary_field, self.nb = "int64_pk", 3000
+
+        # create a collection with fields
+        self.collection_wrap.init_collection(
+            name=cf.gen_unique_str("test_bitmap_index_dql_expr"),
+            schema=cf.set_collection_schema(
+                fields=[self.primary_field, DataType.FLOAT_VECTOR.name, *self().all_scalar_fields],
+                field_params={
+                    self.primary_field: FieldParams(is_primary=True).to_dict
+                },
+            )
+        )
+
+        # prepare data (> 1024 triggering index building)
+        self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
+
+    @pytest.fixture(scope="class", autouse=True)
+    def prepare_data(self):
+        self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result)
+
+        # flush collection, segment sealed
+        self.collection_wrap.flush()
+
+        # build `BITMAP index`
+        index_params = {
+            **DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
+            # build BITMAP index
+            **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
+        }
+        self.build_multi_index(index_params=index_params)
+        assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
+
+        # enable offset cache
+        for index_name in self.bitmap_support_dtype_names:
+            self.collection_wrap.alter_index(index_name=index_name, extra_params=AlterIndexParams.IndexMmap)
+
+        # load collection
+        self.collection_wrap.load()
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64']))
+    @pytest.mark.parametrize("limit", [1, 10])
+    def test_bitmap_mmap_query_with_modulo(self, expr, expr_field, limit):
+        """
+        target:
+            1. check modulo expression
+        method:
+            1. prepare some data and build `BITMAP index` on scalar fields
+            2. query with the different expr and limit
+            3. check query result
+        expected:
+            1. query response equal to min(insert data, limit)
+        """
+        # the total number of inserted data that matches the expression
+        expr_count = len([i for i in self.insert_data.get(expr_field, []) if
+                          eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
+
+        # query
+        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
+        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
+    @pytest.mark.parametrize("limit", [1, 10])
+    def test_bitmap_mmap_query_with_string(self, expr, expr_field, limit, rex):
+        """
+        target:
+            1. check string expression
+        method:
+            1. prepare some data and build `BITMAP index` on scalar fields
+            2. query with the different expr and limit
+            3. check query result
+        expected:
+            1. query response equal to min(insert data, limit)
+        """
+        # the total number of inserted data that matches the expression
+        expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None])
+
+        # query
+        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
+        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize(
+        "expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
+    @pytest.mark.parametrize("limit", [1, 10])
+    def test_bitmap_mmap_query_with_operation(self, expr, expr_field, limit):
+        """
+        target:
+            1. check number operation
+        method:
+            1. prepare some data and  build `BITMAP index` on scalar fields
+            2. query with the different expr and limit
+            3. check query result
+        expected:
+            1. query response equal to min(insert data, limit)
+        """
+        # the total number of inserted data that matches the expression
+        expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
+
+        # query
+        res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
+        assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
+
+    @pytest.mark.tags(CaseLabel.L2)
+    def test_bitmap_mmap_query_count(self):
+        """
+        target:
+            1. check query with count(*)
+        method:
+            1. prepare some data and build `BITMAP index` on scalar fields
+            2. query with count(*)
+            3. check query result
+        expected:
+            1. query response equal to insert nb
+        """
+        # query count(*)
+        self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
+                                   check_items={"exp_res": [{"count(*)": self.nb}]})
+
+    @pytest.mark.tags(CaseLabel.L2)
+    def test_bitmap_mmap_hybrid_search(self):
+        """
+        target:
+            1. check hybrid search with expr
+        method:
+            1. prepare some data and build `BITMAP index` on scalar fields
+            2. hybrid search with expr
+        expected:
+            1. hybrid search with expr
+        """
+        nq, limit = 10, 10
+        vectors = cf.gen_field_values(self.collection_wrap.schema, nb=nq)
+
+        req_list = [
+            AnnSearchRequest(
+                data=vectors.get(DataType.FLOAT_VECTOR.name), anns_field=DataType.FLOAT_VECTOR.name,
+                param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
+                expr=Expr.In('INT64', [i for i in range(10, 30)]).value
+            ),
+            AnnSearchRequest(
+                data=vectors.get(DataType.FLOAT_VECTOR.name), anns_field=DataType.FLOAT_VECTOR.name,
+                param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
+                expr=Expr.OR(Expr.GT(Expr.SUB('INT8', 30).subset, 10), Expr.LIKE('VARCHAR', 'a%')).value
+            )
+        ]
+        self.collection_wrap.hybrid_search(
+            req_list, RRFRanker(), limit, check_task=CheckTasks.check_search_results,
+            check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
+
+
+class TestMixScenes(TestcaseBase):
+    """
+    Testing cross-combination scenarios
+
+    Author: Ting.Wang
+    """
+
+    @pytest.mark.tags(CaseLabel.L2)
+    def test_bitmap_upsert_and_delete(self, request):
+        """
+        target:
+            1. upsert data and query returns the updated data
+        method:
+            1. create a collection with scalar fields
+            2. insert some data and build BITMAP index
+            3. query the data of the specified primary key value
+            4. upsert the specified primary key value
+            5. re-query and check data equal to the updated data
+            6. delete the specified primary key value
+            7. re-query and check result is []
+        expected:
+            1. check whether the upsert and delete data is effective
+        """
+        # init params
+        collection_name, primary_field, nb = f"{request.function.__name__}", "int64_pk", 3000
+        # scalar fields
+        scalar_fields, expr = [DataType.INT64.name, f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}"], 'int64_pk == 10'
+
+        # connect to server before testing
+        self._connect()
+
+        # create a collection with fields that can build `BITMAP` index
+        self.collection_wrap.init_collection(
+            name=collection_name,
+            schema=cf.set_collection_schema(
+                fields=[primary_field, DataType.FLOAT_VECTOR.name, *scalar_fields],
+                field_params={primary_field: FieldParams(is_primary=True).to_dict},
+            )
+        )
+
+        # prepare data (> 1024 triggering index building)
+        insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
+        self.collection_wrap.insert(data=list(insert_data.values()), check_task=CheckTasks.check_insert_result)
+
+        # flush collection, segment sealed
+        self.collection_wrap.flush()
+
+        # rebuild `BITMAP` index
+        self.build_multi_index(index_params={
+            **DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
+            **DefaultScalarIndexParams.list_bitmap(scalar_fields)
+        })
+
+        # load collection
+        self.collection_wrap.load()
+
+        # query before upsert
+        expected_res = [{k: v[10] for k, v in insert_data.items() if k != DataType.FLOAT_VECTOR.name}]
+        self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results,
+                                   check_items={"exp_res": expected_res, "primary_field": primary_field})
+
+        # upsert int64_pk = 10
+        upsert_data = cf.gen_field_values(self.collection_wrap.schema, nb=1,
+                                          default_values={primary_field: [10]}, start_id=10)
+        self.collection_wrap.upsert(data=list(upsert_data.values()))
+        # re-query
+        expected_upsert_res = [{k: v[0] for k, v in upsert_data.items() if k != DataType.FLOAT_VECTOR.name}]
+        self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results,
+                                   check_items={"exp_res": expected_upsert_res, "primary_field": primary_field})
+
+        # delete int64_pk = 10
+        self.collection_wrap.delete(expr=expr)
+        # re-query
+        self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results,
+                                   check_items={"exp_res": []})