test: add bitmap index cases (#35909)

Signed-off-by: wangting0128 <ting.wang@zilliz.com>
2024-09-03 16:49:03 +08:00 · 2024-09-03 16:49:03 +08:00 · cb49b32358
parent 74048ce34f
commit cb49b32358
7 changed files with 977 additions and 35 deletions
--- a/tests/python_client/base/client_base.py
+++ b/tests/python_client/base/client_base.py
@ -1,5 +1,6 @@
 import pytest
 import sys
+from typing import Dict, List
 from pymilvus import DefaultConfig

 from base.database_wrapper import ApiDatabaseWrapper
@ -15,6 +16,7 @@ from base.high_level_api_wrapper import HighLevelApiWrapper
 from utils.util_log import test_log as log
 from common import common_func as cf
 from common import common_type as ct
+from common.common_params import IndexPrams

 from pymilvus import ResourceGroupInfo

@ -395,3 +397,23 @@ class TestcaseBase(Base):

        return tmp_user, tmp_pwd, tmp_role

+    def build_multi_index(self, index_params: Dict[str, IndexPrams], collection_obj: ApiCollectionWrapper = None):
+        collection_obj = collection_obj or self.collection_wrap
+        for k, v in index_params.items():
+            collection_obj.create_index(field_name=k, index_params=v.to_dict, index_name=k)
+        log.info(f"[TestcaseBase] Build all indexes done: {list(index_params.keys())}")
+        return collection_obj
+
+    def drop_multi_index(self, index_names: List[str], collection_obj: ApiCollectionWrapper = None,
+                         check_task=None, check_items=None):
+        collection_obj = collection_obj or self.collection_wrap
+        for n in index_names:
+            collection_obj.drop_index(index_name=n, check_task=check_task, check_items=check_items)
+        log.info(f"[TestcaseBase] Drop all indexes done: {index_names}")
+        return collection_obj
+
+    def show_indexes(self, collection_obj: ApiCollectionWrapper = None):
+        collection_obj = collection_obj or self.collection_wrap
+        indexes = {n.field_name: n.params for n in self.collection_wrap.indexes}
+        log.info("[TestcaseBase] Collection: `{0}` index: {1}".format(collection_obj.name, indexes))
+        return indexes
--- a/tests/python_client/check/func_check.py
+++ b/tests/python_client/check/func_check.py
@ -104,6 +104,10 @@ class ResponseChecker:
            # describe collection interface(high level api) response check
            result = self.check_describe_collection_property(self.response, self.func_name, self.check_items)

+        elif self.check_task == CheckTasks.check_insert_result:
+            # check `insert` interface response
+            result = self.check_insert_response(check_items=self.check_items)
+
        # Add check_items here if something new need verify

        return result
@ -602,3 +606,18 @@ class ResponseChecker:
            log.error("[CheckFunc] Response of API is not an error: %s" % str(res))
            assert False
        return True
+
+    def check_insert_response(self, check_items):
+        # check request successful
+        self.assert_succ(self.succ, True)
+
+        # get insert count
+        real = check_items.get("insert_count", None) if isinstance(check_items, dict) else None
+        if real is None:
+            real = len(self.kwargs_dict.get("data", [[]])[0])
+
+        # check insert count
+        error_message = "[CheckFunc] Insert count does not meet expectations, response:{0} != expected:{1}"
+        assert self.response.insert_count == real, error_message.format(self.response.insert_count, real)
+
+        return True
--- a/tests/python_client/common/code_mapping.py
+++ b/tests/python_client/common/code_mapping.py
@ -32,3 +32,9 @@ class PartitionErrorMessage(ExceptionsMessage):

 class IndexErrorMessage(ExceptionsMessage):
    WrongFieldName = "cannot create index on non-vector field: %s"
+    DropLoadedIndex = "index cannot be dropped, collection is loaded, please release it first"
+    CheckVectorIndex = "data type {0} can't build with this index {1}"
+    SparseFloatVectorMetricType = "only IP is the supported metric type for sparse index"
+    VectorMetricTypeExist = "metric type not set for vector index"
+    CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field"
+    CheckBitmapOnPK = "create bitmap index on primary key not supported"
--- a/tests/python_client/common/common_func.py
+++ b/tests/python_client/common/common_func.py
@ -14,7 +14,7 @@ from npy_append_array import NpyAppendArray
 from faker import Faker
 from pathlib import Path
 from minio import Minio
-from pymilvus import DataType
+from pymilvus import DataType, CollectionSchema
 from base.schema_wrapper import ApiCollectionSchemaWrapper, ApiFieldSchemaWrapper
 from common import common_type as ct
 from utils.util_log import test_log as log
@ -24,6 +24,12 @@ fake = Faker()
 """" Methods of processing data """


+try:
+    RNG = np.random.default_rng(seed=0)
+except ValueError as e:
+    RNG = None
+
+
@singledispatch
 def to_serializable(val):
    """Used by default."""
@ -1230,20 +1236,23 @@ def gen_data_by_collection_field(field, nb=None, start=None):
    if data_type == DataType.BFLOAT16_VECTOR:
        dim = field.params['dim']
        if nb is None:
-            raw_vector = [random.random() for _ in range(dim)]
-            bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
-            return bytes(bf16_vector)
-        bf16_vectors = []
-        for i in range(nb):
-            raw_vector = [random.random() for _ in range(dim)]
-            bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
-            bf16_vectors.append(bytes(bf16_vector))
-        return bf16_vectors
+            return RNG.uniform(size=dim).astype(bfloat16)
+        return [RNG.uniform(size=dim).astype(bfloat16) for _ in range(int(nb))]
+        # if nb is None:
+        #     raw_vector = [random.random() for _ in range(dim)]
+        #     bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
+        #     return bytes(bf16_vector)
+        # bf16_vectors = []
+        # for i in range(nb):
+        #     raw_vector = [random.random() for _ in range(dim)]
+        #     bf16_vector = np.array(raw_vector, dtype=bfloat16).view(np.uint8).tolist()
+        #     bf16_vectors.append(bytes(bf16_vector))
+        # return bf16_vectors
    if data_type == DataType.FLOAT16_VECTOR:
        dim = field.params['dim']
        if nb is None:
-            return [random.random() for i in range(dim)]
-        return [[random.random() for i in range(dim)] for _ in range(nb)]
+            return np.array([random.random() for _ in range(int(dim))], dtype=np.float16)
+        return [np.array([random.random() for _ in range(int(dim))], dtype=np.float16) for _ in range(int(nb))]
    if data_type == DataType.BINARY_VECTOR:
        dim = field.params['dim']
        if nb is None:
@ -1251,9 +1260,21 @@ def gen_data_by_collection_field(field, nb=None, start=None):
            binary_byte = bytes(np.packbits(raw_vector, axis=-1).tolist())
            return binary_byte
        return [bytes(np.packbits([random.randint(0, 1) for _ in range(dim)], axis=-1).tolist()) for _ in range(nb)]
+    if data_type == DataType.SPARSE_FLOAT_VECTOR:
+        if nb is None:
+            return gen_sparse_vectors(nb=1)[0]
+        return gen_sparse_vectors(nb=nb)
    if data_type == DataType.ARRAY:
        max_capacity = field.params['max_capacity']
        element_type = field.element_type
+        if element_type == DataType.INT8:
+            if nb is None:
+                return [random.randint(-128, 127) for _ in range(max_capacity)]
+            return [[random.randint(-128, 127) for _ in range(max_capacity)] for _ in range(nb)]
+        if element_type == DataType.INT16:
+            if nb is None:
+                return [random.randint(-32768, 32767) for _ in range(max_capacity)]
+            return [[random.randint(-32768, 32767) for _ in range(max_capacity)] for _ in range(nb)]
        if element_type == DataType.INT32:
            if nb is None:
                return [random.randint(-2147483648, 2147483647) for _ in range(max_capacity)]
@ -1279,7 +1300,6 @@ def gen_data_by_collection_field(field, nb=None, start=None):
            if nb is None:
                return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)]
            return [["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)] for _ in range(nb)]
-
    return None


@ -1296,6 +1316,25 @@ def gen_data_by_collection_schema(schema, nb, r=0):
    return data


+def gen_varchar_values(nb: int, length: int = 0):
+    return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(nb)]
+
+
+def gen_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}):
+    """
+    generate default value according to the collection fields,
+    which can replace the value of the specified field
+    """
+    data = []
+    for field in schema.fields:
+        default_value = default_values.get(field.name, None)
+        if default_value is not None:
+            data.append(default_value)
+        elif field.auto_id is False:
+            data.append(gen_data_by_collection_field(field, nb, start_id * nb))
+    return data
+
+
 def gen_json_files_for_bulk_insert(data, schema, data_dir):
    for d in data:
        if len(d) > 0:
@ -2288,3 +2327,71 @@ def gen_vectors_based_on_vector_type(num, dim, vector_data_type):
        vectors = gen_sparse_vectors(num, dim)

    return vectors
+
+
+def field_types() -> dict:
+    return dict(sorted(dict(DataType.__members__).items(), key=lambda item: item[0], reverse=True))
+
+
+def get_array_element_type(data_type: str):
+    if hasattr(DataType, "ARRAY") and data_type.startswith(DataType.ARRAY.name):
+        element_type = data_type.lstrip(DataType.ARRAY.name).lstrip("_")
+        for _field in field_types().keys():
+            if str(element_type).upper().startswith(_field):
+                return _field, getattr(DataType, _field)
+        raise ValueError(f"[get_array_data_type] Can't find element type:{element_type} for array:{data_type}")
+    raise ValueError(f"[get_array_data_type] Data type is not start with array: {data_type}")
+
+
+def set_field_schema(field: str, params: dict):
+    for k, v in field_types().items():
+        if str(field).upper().startswith(k):
+            _kwargs = {}
+
+            _field_element, _data_type = k, DataType.NONE
+            if hasattr(DataType, "ARRAY") and _field_element == DataType.ARRAY.name:
+                _field_element, _data_type = get_array_element_type(field)
+                _kwargs.update({"max_capacity": ct.default_max_capacity, "element_type": _data_type})
+
+            if _field_element in [DataType.STRING.name, DataType.VARCHAR.name]:
+                _kwargs.update({"max_length": ct.default_length})
+
+            elif _field_element in [DataType.BINARY_VECTOR.name, DataType.FLOAT_VECTOR.name,
+                                    DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name]:
+                _kwargs.update({"dim": ct.default_dim})
+
+            if isinstance(params, dict):
+                _kwargs.update(params)
+            else:
+                raise ValueError(
+                    f"[set_field_schema] Field `{field}` params is not a dict, type: {type(params)}, params: {params}")
+            return ApiFieldSchemaWrapper().init_field_schema(name=field, dtype=v, **_kwargs)[0]
+    raise ValueError(f"[set_field_schema] Can't set field:`{field}` schema: {params}")
+
+
+def set_collection_schema(fields: list, field_params: dict = {}, **kwargs):
+    """
+    :param fields: List[str]
+    :param field_params: {<field name>: dict<field params>}
+            int64_1:
+                is_primary: bool
+                description: str
+            varchar_1:
+                is_primary: bool
+                description: str
+                max_length: int = 65535
+            array_int8_1:
+                max_capacity: int = 100
+            array_varchar_1:
+                max_capacity: int = 100
+                max_length: int = 65535
+            float_vector:
+                dim: int = 128
+    :param kwargs: <params for collection schema>
+            description: str
+            primary_field: str
+            auto_id: bool
+            enable_dynamic_field: bool
+    """
+    field_schemas = [set_field_schema(field=field, params=field_params.get(field, {})) for field in fields]
+    return ApiCollectionSchemaWrapper().init_collection_schema(fields=field_schemas, **kwargs)[0]
--- a/tests/python_client/common/common_params.py
+++ b/tests/python_client/common/common_params.py
@ -0,0 +1,365 @@
+from dataclasses import dataclass
+from typing import List, Dict
+
+""" Define param names"""
+
+
+class IndexName:
+    # Vector
+    AUTOINDEX = "AUTOINDEX"
+    FLAT = "FLAT"
+    IVF_FLAT = "IVF_FLAT"
+    IVF_SQ8 = "IVF_SQ8"
+    IVF_PQ = "IVF_PQ"
+    IVF_HNSW = "IVF_HNSW"
+    HNSW = "HNSW"
+    DISKANN = "DISKANN"
+    SCANN = "SCANN"
+    # binary
+    BIN_FLAT = "BIN_FLAT"
+    BIN_IVF_FLAT = "BIN_IVF_FLAT"
+    # Sparse
+    SPARSE_WAND = "SPARSE_WAND"
+    SPARSE_INVERTED_INDEX = "SPARSE_INVERTED_INDEX"
+    # GPU
+    GPU_IVF_FLAT = "GPU_IVF_FLAT"
+    GPU_IVF_PQ = "GPU_IVF_PQ"
+    GPU_CAGRA = "GPU_CAGRA"
+    GPU_BRUTE_FORCE = "GPU_BRUTE_FORCE"
+
+    # Scalar
+    INVERTED = "INVERTED"
+    BITMAP = "BITMAP"
+    Trie = "Trie"
+    STL_SORT = "STL_SORT"
+
+
+class MetricType:
+    L2 = "L2"
+    IP = "IP"
+    COSINE = "COSINE"
+    JACCARD = "JACCARD"
+
+
+""" expressions """
+
+
+@dataclass
+class ExprBase:
+    expr: str
+
+    @property
+    def subset(self):
+        return f"({self.expr})"
+
+    def __repr__(self):
+        return self.expr
+
+
+class Expr:
+    # BooleanConstant: 'true' | 'True' | 'TRUE' | 'false' | 'False' | 'FALSE'
+
+    @staticmethod
+    def LT(left, right):
+        return ExprBase(expr=f"{left} < {right}")
+
+    @staticmethod
+    def LE(left, right):
+        return ExprBase(expr=f"{left} <= {right}")
+
+    @staticmethod
+    def GT(left, right):
+        return ExprBase(expr=f"{left} > {right}")
+
+    @staticmethod
+    def GE(left, right):
+        return ExprBase(expr=f"{left} >= {right}")
+
+    @staticmethod
+    def EQ(left, right):
+        return ExprBase(expr=f"{left} == {right}")
+
+    @staticmethod
+    def NE(left, right):
+        return ExprBase(expr=f"{left} != {right}")
+
+    @staticmethod
+    def like(left, right):
+        return ExprBase(expr=f'{left} like "{right}"')
+
+    @staticmethod
+    def LIKE(left, right):
+        return ExprBase(expr=f'{left} LIKE "{right}"')
+
+    @staticmethod
+    def exists(name):
+        return ExprBase(expr=f'exists {name}')
+
+    @staticmethod
+    def EXISTS(name):
+        return ExprBase(expr=f'EXISTS {name}')
+
+    @staticmethod
+    def ADD(left, right):
+        return ExprBase(expr=f"{left} + {right}")
+
+    @staticmethod
+    def SUB(left, right):
+        return ExprBase(expr=f"{left} - {right}")
+
+    @staticmethod
+    def MUL(left, right):
+        return ExprBase(expr=f"{left} * {right}")
+
+    @staticmethod
+    def DIV(left, right):
+        return ExprBase(expr=f"{left} / {right}")
+
+    @staticmethod
+    def MOD(left, right):
+        return ExprBase(expr=f"{left} % {right}")
+
+    @staticmethod
+    def POW(left, right):
+        return ExprBase(expr=f"{left} ** {right}")
+
+    @staticmethod
+    def SHL(left, right):
+        # Note: not supported
+        return ExprBase(expr=f"{left}<<{right}")
+
+    @staticmethod
+    def SHR(left, right):
+        # Note: not supported
+        return ExprBase(expr=f"{left}>>{right}")
+
+    @staticmethod
+    def BAND(left, right):
+        # Note: not supported
+        return ExprBase(expr=f"{left} & {right}")
+
+    @staticmethod
+    def BOR(left, right):
+        # Note: not supported
+        return ExprBase(expr=f"{left} | {right}")
+
+    @staticmethod
+    def BXOR(left, right):
+        # Note: not supported
+        return ExprBase(expr=f"{left} ^ {right}")
+
+    @staticmethod
+    def AND(left, right):
+        return ExprBase(expr=f"{left} && {right}")
+
+    @staticmethod
+    def And(left, right):
+        return ExprBase(expr=f"{left} and {right}")
+
+    @staticmethod
+    def OR(left, right):
+        return ExprBase(expr=f"{left} || {right}")
+
+    @staticmethod
+    def Or(left, right):
+        return ExprBase(expr=f"{left} or {right}")
+
+    @staticmethod
+    def BNOT(name):
+        # Note: not supported
+        return ExprBase(expr=f"~{name}")
+
+    @staticmethod
+    def NOT(name):
+        return ExprBase(expr=f"!{name}")
+
+    @staticmethod
+    def Not(name):
+        return ExprBase(expr=f"not {name}")
+
+    @staticmethod
+    def In(left, right):
+        return ExprBase(expr=f"{left} in {right}")
+
+    @staticmethod
+    def Nin(left, right):
+        return ExprBase(expr=f"{left} not in {right}")
+
+    @staticmethod
+    def json_contains(left, right):
+        return ExprBase(expr=f"json_contains({left}, {right})")
+
+    @staticmethod
+    def JSON_CONTAINS(left, right):
+        return ExprBase(expr=f"JSON_CONTAINS({left}, {right})")
+
+    @staticmethod
+    def json_contains_all(left, right):
+        return ExprBase(expr=f"json_contains_all({left}, {right})")
+
+    @staticmethod
+    def JSON_CONTAINS_ALL(left, right):
+        return ExprBase(expr=f"JSON_CONTAINS_ALL({left}, {right})")
+
+    @staticmethod
+    def json_contains_any(left, right):
+        return ExprBase(expr=f"json_contains_any({left}, {right})")
+
+    @staticmethod
+    def JSON_CONTAINS_ANY(left, right):
+        return ExprBase(expr=f"JSON_CONTAINS_ANY({left}, {right})")
+
+    @staticmethod
+    def array_contains(left, right):
+        return ExprBase(expr=f"array_contains({left}, {right})")
+
+    @staticmethod
+    def ARRAY_CONTAINS(left, right):
+        return ExprBase(expr=f"ARRAY_CONTAINS({left}, {right})")
+
+    @staticmethod
+    def array_contains_all(left, right):
+        return ExprBase(expr=f"array_contains_all({left}, {right})")
+
+    @staticmethod
+    def ARRAY_CONTAINS_ALL(left, right):
+        return ExprBase(expr=f"ARRAY_CONTAINS_ALL({left}, {right})")
+
+    @staticmethod
+    def array_contains_any(left, right):
+        return ExprBase(expr=f"array_contains_any({left}, {right})")
+
+    @staticmethod
+    def ARRAY_CONTAINS_ANY(left, right):
+        return ExprBase(expr=f"ARRAY_CONTAINS_ANY({left}, {right})")
+
+    @staticmethod
+    def array_length(name):
+        return ExprBase(expr=f"array_length({name})")
+
+    @staticmethod
+    def ARRAY_LENGTH(name):
+        return ExprBase(expr=f"ARRAY_LENGTH({name})")
+
+
+"""" Define pass in params """
+
+
+@dataclass
+class BasePrams:
+    @property
+    def to_dict(self):
+        return {k: v for k, v in vars(self).items() if v is not None}
+
+
+@dataclass
+class FieldParams(BasePrams):
+    description: str = None
+
+    # varchar
+    max_length: int = None
+
+    # array
+    max_capacity: int = None
+
+    # for vector
+    dim: int = None
+
+    # scalar
+    is_primary: bool = None
+    # auto_id: bool = None
+    is_partition_key: bool = None
+    is_clustering_key: bool = None
+
+
+@dataclass
+class IndexPrams(BasePrams):
+    index_type: str = None
+    params: dict = None
+    metric_type: str = None
+
+
+""" Define default params """
+
+
+class DefaultVectorIndexParams:
+
+    @staticmethod
+    def FLAT(field: str, metric_type=MetricType.L2):
+        return {field: IndexPrams(index_type=IndexName.FLAT, params={}, metric_type=metric_type)}
+
+    @staticmethod
+    def IVF_FLAT(field: str, nlist: int = 1024, metric_type=MetricType.L2):
+        return {
+            field: IndexPrams(index_type=IndexName.IVF_FLAT, params={"nlist": nlist}, metric_type=metric_type)
+        }
+
+    @staticmethod
+    def IVF_SQ8(field: str, nlist: int = 1024, metric_type=MetricType.L2):
+        return {
+            field: IndexPrams(index_type=IndexName.IVF_SQ8, params={"nlist": nlist}, metric_type=metric_type)
+        }
+
+    @staticmethod
+    def HNSW(field: str, m: int = 8, ef: int = 200, metric_type=MetricType.L2):
+        return {
+            field: IndexPrams(index_type=IndexName.HNSW, params={"M": m, "efConstruction": ef}, metric_type=metric_type)
+        }
+
+    @staticmethod
+    def DISKANN(field: str, metric_type=MetricType.L2):
+        return {field: IndexPrams(index_type=IndexName.DISKANN, params={}, metric_type=metric_type)}
+
+    @staticmethod
+    def BIN_FLAT(field: str, nlist: int = 1024, metric_type=MetricType.JACCARD):
+        return {
+            field: IndexPrams(index_type=IndexName.BIN_FLAT, params={"nlist": nlist}, metric_type=metric_type)
+        }
+
+    @staticmethod
+    def BIN_IVF_FLAT(field: str, nlist: int = 1024, metric_type=MetricType.JACCARD):
+        return {
+            field: IndexPrams(index_type=IndexName.BIN_IVF_FLAT, params={"nlist": nlist},
+                              metric_type=metric_type)
+        }
+
+    @staticmethod
+    def SPARSE_WAND(field: str, drop_ratio_build: int = 0.2, metric_type=MetricType.IP):
+        return {
+            field: IndexPrams(index_type=IndexName.SPARSE_WAND, params={"drop_ratio_build": drop_ratio_build},
+                              metric_type=metric_type)
+        }
+
+    @staticmethod
+    def SPARSE_INVERTED_INDEX(field: str, drop_ratio_build: int = 0.2, metric_type=MetricType.IP):
+        return {
+            field: IndexPrams(index_type=IndexName.SPARSE_INVERTED_INDEX, params={"drop_ratio_build": drop_ratio_build},
+                              metric_type=metric_type)
+        }
+
+
+class DefaultScalarIndexParams:
+
+    @staticmethod
+    def Default(field: str):
+        return {field: IndexPrams()}
+
+    @staticmethod
+    def Trie(field: str):
+        return {field: IndexPrams(index_type=IndexName.Trie)}
+
+    @staticmethod
+    def STL_SORT(field: str):
+        return {field: IndexPrams(index_type=IndexName.STL_SORT)}
+
+    @staticmethod
+    def INVERTED(field: str):
+        return {field: IndexPrams(index_type=IndexName.INVERTED)}
+
+    @staticmethod
+    def BITMAP(field: str):
+        return {field: IndexPrams(index_type=IndexName.BITMAP)}
+
+    @staticmethod
+    def list_bitmap(fields: List[str]) -> Dict[str, IndexPrams]:
+        return {n: IndexPrams(index_type=IndexName.BITMAP) for n in fields}
--- a/tests/python_client/common/common_type.py
+++ b/tests/python_client/common/common_type.py
@ -286,6 +286,7 @@ class CheckTasks:
    check_value_equal = "check_value_equal"
    check_rg_property = "check_resource_group_property"
    check_describe_collection_property = "check_describe_collection_property"
+    check_insert_result = "check_insert_result"


 class BulkLoadStates:
--- a/tests/python_client/testcases/test_index.py
+++ b/tests/python_client/testcases/test_index.py
@ -13,6 +13,9 @@ from common import common_type as ct
 from common.common_type import CaseLabel, CheckTasks
 from common.code_mapping import CollectionErrorMessage as clem
 from common.code_mapping import IndexErrorMessage as iem
+from common.common_params import (
+    IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType
+)

 from utils.util_pymilvus import *
 from common.constants import *
@ -336,7 +339,8 @@ class TestIndexOperation(TestcaseBase):
        vec_field2 = cf.gen_float_vec_field(name="vec_field2", dim=32)
        str_field = cf.gen_string_field(name="str_field")
        str_field2 = cf.gen_string_field(name="str_field2")
-        schema, _ = self.collection_schema_wrap.init_collection_schema([id_field, vec_field, vec_field2, str_field, str_field2])
+        schema, _ = self.collection_schema_wrap.init_collection_schema(
+            [id_field, vec_field, vec_field2, str_field, str_field2])
        collection_w = self.init_collection_wrap(schema=schema)
        vec_index = ct.default_index
        vec_index_name = "my_index"
@ -378,7 +382,7 @@ class TestIndexOperation(TestcaseBase):
        cf.assert_equal_index(index, collection_w.collection.indexes[0])
        self.index_wrap.drop()
        assert len(collection_w.indexes) == 0
-    
+
    @pytest.mark.tags(CaseLabel.L1)
    def test_index_drop_repeatedly(self):
        """
@ -640,7 +644,8 @@ class TestNewIndexBase(TestcaseBase):
        collection_w = self.init_collection_wrap(name=c_name)
        data = cf.gen_default_list_data()
        collection_w.insert(data=data)
-        index_prams = [default_ivf_flat_index, {"metric_type": "L2", "index_type": "IVF_SQ8", "params": {"nlist": 1024}}]
+        index_prams = [default_ivf_flat_index,
+                       {"metric_type": "L2", "index_type": "IVF_SQ8", "params": {"nlist": 1024}}]
        for index in index_prams:
            index_name = cf.gen_unique_str("name")
            collection_w.create_index(default_float_vec_field_name, index, index_name=index_name)
@ -1018,7 +1023,6 @@ class TestNewIndexBase(TestcaseBase):

@pytest.mark.tags(CaseLabel.GPU)
 class TestNewIndexBinary(TestcaseBase):
-
    """
        ******************************************************************
          The following cases are used to test `create_index` function
@ -1176,7 +1180,7 @@ class TestIndexInvalid(TestcaseBase):
    Test create / describe / drop index interfaces with invalid collection names
    """

-    @pytest.fixture(scope="function", params=["Trie", "STL_SORT", "INVERTED"])
+    @pytest.fixture(scope="function", params=["Trie", "STL_SORT", "INVERTED", IndexName.BITMAP])
    def scalar_index(self, request):
        yield request.param

@ -1366,7 +1370,7 @@ class TestIndexInvalid(TestcaseBase):
        collection_w.alter_index("random_index_345", {'mmap.enabled': True},
                                 check_task=CheckTasks.err_res,
                                 check_items={ct.err_code: 65535,
-                                    ct.err_msg: f"index not found"})
+                                              ct.err_msg: f"index not found"})

    @pytest.mark.tags(CaseLabel.L1)
    def test_load_mmap_index(self):
@ -1460,8 +1464,8 @@ class TestIndexInvalid(TestcaseBase):
        params = {"index_type": index, "metric_type": metric_type, "params": param}
        error = {ct.err_code: 65535, ct.err_msg: "only IP is the supported metric type for sparse index"}
        index, _ = self.index_wrap.init_index(collection_w.collection, ct.default_sparse_vec_field_name, params,
-                            check_task=CheckTasks.err_res,
-                            check_items=error)
+                                              check_task=CheckTasks.err_res,
+                                              check_items=error)

    @pytest.mark.tags(CaseLabel.L2)
    @pytest.mark.parametrize("ratio", [-0.5, 1, 3])
@ -1478,7 +1482,8 @@ class TestIndexInvalid(TestcaseBase):
        data = cf.gen_default_list_sparse_data()
        collection_w.insert(data=data)
        params = {"index_type": index, "metric_type": "IP", "params": {"drop_ratio_build": ratio}}
-        error = {ct.err_code: 1100, ct.err_msg: f"invalid drop_ratio_build: {ratio}, must be in range [0, 1): invalid parameter[expected=valid index params"}
+        error = {ct.err_code: 1100,
+                 ct.err_msg: f"invalid drop_ratio_build: {ratio}, must be in range [0, 1): invalid parameter[expected=valid index params"}
        index, _ = self.index_wrap.init_index(collection_w.collection, ct.default_sparse_vec_field_name, params,
                                              check_task=CheckTasks.err_res,
                                              check_items=error)
@ -1605,7 +1610,8 @@ class TestIndexString(TestcaseBase):
        index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name,
                                              default_string_index_params)
        cf.assert_equal_index(index, collection_w.indexes[0])
-        collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index, index_name="vector_flat")
+        collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index,
+                                  index_name="vector_flat")
        collection_w.load()
        assert collection_w.num_entities == default_nb

@ -1621,7 +1627,8 @@ class TestIndexString(TestcaseBase):
        collection_w = self.init_collection_wrap(name=c_name)
        data = cf.gen_default_list_data(ct.default_nb)
        collection_w.insert(data=data)
-        collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index, index_name="vector_flat")
+        collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index,
+                                  index_name="vector_flat")
        index, _ = self.index_wrap.init_index(collection_w.collection, default_string_field_name,
                                              default_string_index_params)
        collection_w.load()
@ -1753,7 +1760,7 @@ class TestIndexString(TestcaseBase):
        collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
        collection_w.drop_index(index_name=index_name2)
        assert len(collection_w.indexes) == 0
-    
+
    @pytest.mark.tags(CaseLabel.L1)
    def test_index_with_string_field_empty(self):
        """
@ -1767,7 +1774,7 @@ class TestIndexString(TestcaseBase):

        nb = 3000
        data = cf.gen_default_list_data(nb)
-        data[2] = [""for _ in range(nb)] 
+        data[2] = ["" for _ in range(nb)]
        collection_w.insert(data=data)

        collection_w.create_index(default_string_field_name, default_string_index_params, index_name=index_name2)
@ -1783,6 +1790,7 @@ class TestIndexDiskann(TestcaseBase):
      The following cases are used to test create index about diskann
    ******************************************************************
    """
+
    @pytest.fixture(scope="function", params=[False, True])
    def _async(self, request):
        yield request.param
@ -1804,14 +1812,15 @@ class TestIndexDiskann(TestcaseBase):
        data = cf.gen_default_list_data()
        collection_w.insert(data=data)
        assert collection_w.num_entities == default_nb
-        index, _ = self.index_wrap.init_index(collection_w.collection, default_float_vec_field_name, ct.default_diskann_index)
+        index, _ = self.index_wrap.init_index(collection_w.collection, default_float_vec_field_name,
+                                              ct.default_diskann_index)
        log.info(self.index_wrap.params)
        cf.assert_equal_index(index, collection_w.indexes[0])
        collection_w.load()
        vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
        search_res, _ = collection_w.search(vectors[:default_nq], default_search_field,
                                            ct.default_diskann_search_params, default_limit,
-                                            default_search_exp, 
+                                            default_search_exp,
                                            check_task=CheckTasks.check_search_results,
                                            check_items={"nq": default_nq,
                                                         "limit": default_limit})
@ -1854,11 +1863,11 @@ class TestIndexDiskann(TestcaseBase):
        vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
        search_res, _ = collection_w.search(vectors[:default_nq], default_search_field,
                                            ct.default_diskann_search_params, default_limit,
-                                            default_search_exp, 
+                                            default_search_exp,
                                            check_task=CheckTasks.check_search_results,
                                            check_items={"nq": default_nq,
                                                         "limit": default_limit})
-    
+
    @pytest.mark.tags(CaseLabel.L2)
    def test_create_diskann_index_drop_with_async(self, _async):
        """
@ -1901,7 +1910,7 @@ class TestIndexDiskann(TestcaseBase):
                                  index_name=field_name)
        collection_w.load()
        assert collection_w.has_index(index_name=field_name)[0] is True
-        assert len(collection_w.indexes) == 1 
+        assert len(collection_w.indexes) == 1
        collection_w.release()
        collection_w.drop_index(index_name=field_name)
        assert collection_w.has_index(index_name=field_name)[0] is False
@ -1926,7 +1935,7 @@ class TestIndexDiskann(TestcaseBase):
        collection_w.release()
        collection_w.drop_index(index_name=index_name1)
        assert collection_w.has_index(index_name=index_name1)[0] is False
-    
+
    @pytest.mark.tags(CaseLabel.L2)
    def test_drop_diskann_index_and_create_again(self):
        """
@ -1968,7 +1977,7 @@ class TestIndexDiskann(TestcaseBase):
        default_params = {}
        collection_w.create_index("float", default_params, index_name="c")
        assert collection_w.has_index(index_name="c")[0] == True
-   
+
    @pytest.mark.tags(CaseLabel.L2)
    def test_drop_diskann_index_with_partition(self):
        """
@ -2045,7 +2054,8 @@ class TestIndexDiskann(TestcaseBase):
        c_name = cf.gen_unique_str(prefix)
        collection_w = self.init_collection_wrap(c_name, schema=default_schema)
        collection_w.insert(cf.gen_default_list_data())
-        collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index, index_name=ct.default_index_name)
+        collection_w.create_index(default_float_vec_field_name, ct.default_diskann_index,
+                                  index_name=ct.default_index_name)
        collection_w.set_properties({'mmap.enabled': True})
        desc, _ = collection_w.describe()
        pro = desc.get("properties")
@ -2204,7 +2214,6 @@ class TestInvertedIndexValid(TestcaseBase):
    def vector_data_type(self, request):
        yield request.param

-
    @pytest.mark.tags(CaseLabel.L1)
    @pytest.mark.parametrize("scalar_field_name", [ct.default_int8_field_name, ct.default_int16_field_name,
                                                   ct.default_int32_field_name, ct.default_int64_field_name,
@ -2285,3 +2294,416 @@ class TestInvertedIndexValid(TestcaseBase):
            scalar_index_params = {"index_type": f"{scalar_index[i]}"}
            collection_w.create_index(scalar_fields[i], index_params=scalar_index_params, index_name=index_name)
            assert collection_w.has_index(index_name=index_name)[0] is True
+
+
+class TestBitmapIndex(TestcaseBase):
+    """
+    Functional `BITMAP` index
+
+    Author: Ting.Wang
+    """
+
+    def setup_method(self, method):
+        super().setup_method(method)
+
+        # connect to server before testing
+        self._connect()
+
+    @property
+    def get_bitmap_support_dtype_names(self):
+        dtypes = [DataType.BOOL, DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64, DataType.VARCHAR]
+        dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes]
+        return dtype_names
+
+    @property
+    def get_bitmap_not_support_dtype_names(self):
+        dtypes = [DataType.FLOAT, DataType.DOUBLE]
+        dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes] + [DataType.JSON.name]
+        return dtype_names
+
+    @pytest.mark.tags(CaseLabel.L0)
+    @pytest.mark.parametrize("auto_id", [True, False])
+    @pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
+    def test_bitmap_on_primary_key_field(self, request, primary_field, auto_id):
+        """
+        target:
+            1. build BITMAP index on primary key field
+        method:
+            1. create an empty collection
+            2. build `BITMAP` index on primary key field
+        expected:
+            1. Primary key filed does not support building bitmap index
+        """
+        # init params
+        collection_name = f"{request.function.__name__}_{primary_field}_{auto_id}"
+
+        # create a collection with fields that can build `BITMAP` index
+        self.collection_wrap.init_collection(
+            name=collection_name,
+            schema=cf.set_collection_schema(
+                fields=[primary_field, DataType.FLOAT_VECTOR.name],
+                field_params={primary_field: FieldParams(is_primary=True).to_dict},
+                auto_id=auto_id
+            )
+        )
+
+        # build `BITMAP` index on primary key field
+        self.collection_wrap.create_index(
+            field_name=primary_field, index_params={"index_type": IndexName.BITMAP}, index_name=primary_field,
+            check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapOnPK})
+
+    @pytest.mark.tags(CaseLabel.L0)
+    def test_bitmap_on_not_supported_fields(self, request):
+        """
+        target:
+            1. build `BITMAP` index on not supported fields
+        method:
+            1. create an empty collection with fields:
+                [`varchar_pk`, `SPARSE_FLOAT_VECTOR`, `FLOAT`, `DOUBLE`, `JSON`, `ARRAY`, `ARRAY_FLOAT`, `ARRAY_DOUBLE`]
+            2. build different `BITMAP` index params on not supported fields
+        expected:
+            1. check build index failed, assert error code and message
+        """
+        # init params
+        collection_name, primary_field = f"{request.function.__name__}", "varchar_pk"
+
+        # create a collection with fields that can build `BITMAP` index
+        self.collection_wrap.init_collection(
+            name=collection_name,
+            schema=cf.set_collection_schema(
+                fields=[primary_field, DataType.SPARSE_FLOAT_VECTOR.name, *self.get_bitmap_not_support_dtype_names],
+                field_params={primary_field: FieldParams(is_primary=True).to_dict}
+            )
+        )
+
+        # build `BITMAP` index on sparse vector field
+        for msg, index_params in {
+            iem.VectorMetricTypeExist: IndexPrams(index_type=IndexName.BITMAP),
+            iem.SparseFloatVectorMetricType: IndexPrams(index_type=IndexName.BITMAP, metric_type=MetricType.L2),
+            iem.CheckVectorIndex.format(DataType.SPARSE_FLOAT_VECTOR, IndexName.BITMAP): IndexPrams(
+                index_type=IndexName.BITMAP, metric_type=MetricType.IP)
+        }.items():
+            self.collection_wrap.create_index(
+                field_name=DataType.SPARSE_FLOAT_VECTOR.name, index_params=index_params.to_dict,
+                check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: msg}
+            )
+
+        # build `BITMAP` index on not supported scalar fields
+        for _field_name in self.get_bitmap_not_support_dtype_names:
+            self.collection_wrap.create_index(
+                field_name=_field_name, index_params=IndexPrams(index_type=IndexName.BITMAP).to_dict,
+                check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapIndex}
+            )
+
+    @pytest.mark.tags(CaseLabel.L0)
+    @pytest.mark.parametrize("auto_id", [True, False])
+    @pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
+    def test_bitmap_on_empty_collection(self, request, primary_field, auto_id):
+        """
+        target:
+            1. create `BITMAP` index on all supported fields
+            2. build scalar index on loaded collection
+        method:
+            1. build and drop `BITMAP` index on an empty collection
+            2. rebuild `BITMAP` index on loaded collection
+            3. drop index on loaded collection and raises expected error
+            4. re-build the same index on loaded collection
+        expected:
+            1. build and drop index successful on a not loaded collection
+            2. build index successful on non-indexed and loaded fields
+            3. can not drop index on loaded collection
+        """
+        # init params
+        collection_name, nb = f"{request.function.__name__}_{primary_field}_{auto_id}", 3000
+
+        # create a collection with fields that can build `BITMAP` index
+        self.collection_wrap.init_collection(
+            name=collection_name,
+            schema=cf.set_collection_schema(
+                fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
+                field_params={primary_field: FieldParams(is_primary=True).to_dict},
+                auto_id=auto_id
+            )
+        )
+
+        # build `BITMAP` index on empty collection
+        index_params = {
+            **DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
+            **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
+        }
+        self.build_multi_index(index_params=index_params)
+        assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
+
+        # drop scalars' index
+        self.drop_multi_index(index_names=list(set(index_params.keys()) - {DataType.FLOAT_VECTOR.name}))
+        assert len(self.collection_wrap.indexes) == 1
+
+        # load collection
+        self.collection_wrap.load()
+
+        # build scalars' index after loading collection
+        self.build_multi_index(index_params={k: v for k, v in index_params.items() if v.index_type == IndexName.BITMAP})
+
+        # reload collection
+        self.collection_wrap.load()
+
+        # re-drop scalars' index
+        self.drop_multi_index(index_names=list(set(index_params.keys()) - {DataType.FLOAT_VECTOR.name}),
+                              check_task=CheckTasks.err_res,
+                              check_items={ct.err_code: 65535, ct.err_msg: iem.DropLoadedIndex})
+
+        # re-build loaded index
+        self.build_multi_index(index_params=index_params)
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.parametrize("auto_id", [True, False])
+    @pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
+    def test_bitmap_insert_after_loading(self, request, primary_field, auto_id):
+        """
+        target:
+            1. insert data after building `BITMAP` index and loading collection
+        method:
+            1. build index and loaded an empty collection
+            2. insert 3k data
+            3. check no indexed data
+            4. flush collection, re-build index and refresh load collection
+            5. row number of indexed data equal to insert data
+        expected:
+            1. insertion is successful
+            2. segment row number == inserted rows
+        """
+        # init params
+        collection_name, nb = f"{request.function.__name__}_{primary_field}_{auto_id}", 3000
+
+        # create a collection with fields that can build `BITMAP` index
+        self.collection_wrap.init_collection(
+            name=collection_name,
+            schema=cf.set_collection_schema(
+                fields=[primary_field, DataType.FLOAT16_VECTOR.name, *self.get_bitmap_support_dtype_names],
+                field_params={primary_field: FieldParams(is_primary=True).to_dict},
+                auto_id=auto_id
+            )
+        )
+
+        # build `BITMAP` index on empty collection
+        index_params = {
+            **DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name),
+            **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
+        }
+        self.build_multi_index(index_params=index_params)
+        assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
+
+        # load collection
+        self.collection_wrap.load()
+
+        # prepare 3k data (> 1024 triggering index building)
+        self.collection_wrap.insert(data=cf.gen_values(self.collection_wrap.schema, nb=nb),
+                                    check_task=CheckTasks.check_insert_result)
+
+        # check no indexed segments
+        res, _ = self.utility_wrap.get_query_segment_info(collection_name=collection_name)
+        assert len(res) == 0
+
+        # flush collection, segment sealed
+        self.collection_wrap.flush()
+
+        # re-build vector field index
+        self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name))
+        # load refresh, ensure that loaded indexed segments
+        self.collection_wrap.load(_refresh=True)
+
+        # check segment row number
+        counts = [int(n.num_rows) for n in self.utility_wrap.get_query_segment_info(collection_name=collection_name)[0]]
+        assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
+
+    @pytest.mark.tags(CaseLabel.L1)
+    @pytest.mark.parametrize("auto_id", [True, False])
+    @pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
+    def test_bitmap_insert_before_loading(self, request, primary_field, auto_id):
+        """
+        target:
+            1. insert data before building `BITMAP` index and loading collection
+        method:
+            1. insert data into an empty collection
+            2. flush collection, build index and load collection
+            3. the number of segments equal to shards_num
+        expected:
+            1. insertion is successful
+            2. the number of segments == shards_num
+            3. segment row number == inserted rows
+        """
+        # init params
+        collection_name, nb, shards_num = f"{request.function.__name__}_{primary_field}_{auto_id}", 3000, 16
+
+        # create a collection with fields that can build `BITMAP` index
+        self.collection_wrap.init_collection(
+            name=collection_name,
+            schema=cf.set_collection_schema(
+                fields=[primary_field, DataType.BFLOAT16_VECTOR.name, *self.get_bitmap_support_dtype_names],
+                field_params={primary_field: FieldParams(is_primary=True).to_dict},
+                auto_id=auto_id
+            ),
+            shards_num=shards_num
+        )
+
+        # prepare data (> 1024 triggering index building)
+        pk_type = "str" if primary_field.startswith(DataType.VARCHAR.name.lower()) else "int"
+        default_values = {} if auto_id else {primary_field: [eval(f"{pk_type}({n})") for n in range(nb)]}
+        self.collection_wrap.insert(
+            data=cf.gen_values(self.collection_wrap.schema, nb=nb, default_values=default_values),
+            check_task=CheckTasks.check_insert_result
+        )
+
+        # flush collection, segment sealed
+        self.collection_wrap.flush()
+
+        # build `BITMAP` index
+        index_params = {
+            **DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name),
+            **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
+        }
+        self.build_multi_index(index_params=index_params)
+        assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
+
+        # load collection
+        self.collection_wrap.load()
+
+        # get segment info
+        segment_info, _ = self.utility_wrap.get_query_segment_info(collection_name=collection_name)
+
+        # check segment counts == shards_num
+        assert len(segment_info) == shards_num
+
+        # check segment row number
+        counts = [int(n.num_rows) for n in segment_info]
+        assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
+    @pytest.mark.parametrize("shards_num, nb", [(2, 3791), (16, 1600), (16, 10)])
+    def test_bitmap_primary_field_data_repeated(self, request, primary_field, shards_num, nb):
+        """
+        target:
+            1. the same pk value is inserted into the same shard
+        method:
+            1. generate the same pk value and insert data into an empty collection
+            2. flush collection, build index and load collection
+            3. the number of segments equal to 1
+            4. row number of indexed data equal to insert data
+        expected:
+            1. insertion is successful
+            2. the number of segments == 1
+            3. segment row number == inserted rows
+        """
+        # init params
+        collection_name = f"{request.function.__name__}_{primary_field}_{shards_num}_{nb}"
+
+        # create a collection with fields that can build `BITMAP` index
+        self.collection_wrap.init_collection(
+            name=collection_name,
+            schema=cf.set_collection_schema(
+                fields=[primary_field, DataType.BINARY_VECTOR.name, *self.get_bitmap_support_dtype_names],
+                field_params={primary_field: FieldParams(is_primary=True).to_dict},
+            ),
+            shards_num=shards_num
+        )
+
+        # prepare data (> 1024 triggering index building)
+        pk_key = str(shards_num) if primary_field.startswith(DataType.VARCHAR.name.lower()) else shards_num
+        self.collection_wrap.insert(
+            data=cf.gen_values(self.collection_wrap.schema, nb=nb,
+                               default_values={primary_field: [pk_key for _ in range(nb)]}),
+            check_task=CheckTasks.check_insert_result
+        )
+
+        # flush collection, segment sealed
+        self.collection_wrap.flush()
+
+        # build `BITMAP` index
+        index_params = {
+            **DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
+            **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
+        }
+        self.build_multi_index(index_params=index_params)
+        assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
+
+        # load collection
+        self.collection_wrap.load()
+
+        # get segment info
+        segment_info, _ = self.utility_wrap.get_query_segment_info(collection_name=collection_name)
+
+        # check segments count
+        msg = f"`{collection_name}` Segments count:{len(segment_info)} != 1, pk field data is repeated."
+        assert len(segment_info) == 1, msg
+
+        # check segment row number
+        counts = [int(n.num_rows) for n in segment_info]
+        assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
+    @pytest.mark.parametrize("shards_num, nb", [(1, 1000), (2, 3791), (16, 1600), (16, 10)])
+    def test_bitmap_primary_field_data_not_repeated(self, request, primary_field, shards_num, nb):
+        """
+        target:
+            1. different pk values are inserted into the different shards
+        method:
+            1. generate different pk values and insert data into an empty collection
+            2. flush collection, build index and load collection
+            3. the number of segments equal to shards_num or less than insert data
+            4. row number of indexed data equal to insert data
+        expected:
+            1. insertion is successful
+            2. the number of segments == shards_num or <= insert data
+            3. segment row number == inserted rows
+        """
+        # init params
+        collection_name = f"{request.function.__name__}_{primary_field}_{shards_num}_{nb}"
+
+        # create a collection with fields that can build `BITMAP` index
+        self.collection_wrap.init_collection(
+            name=collection_name,
+            schema=cf.set_collection_schema(
+                fields=[primary_field, DataType.BINARY_VECTOR.name, *self.get_bitmap_support_dtype_names],
+                field_params={primary_field: FieldParams(is_primary=True).to_dict},
+            ),
+            shards_num=shards_num
+        )
+
+        # prepare data (> 1024 triggering index building)
+        pk_type = "str" if primary_field.startswith(DataType.VARCHAR.name.lower()) else "int"
+        self.collection_wrap.insert(
+            data=cf.gen_values(self.collection_wrap.schema, nb=nb,
+                               default_values={primary_field: [eval(f"{pk_type}({n})") for n in range(nb)]}),
+            check_task=CheckTasks.check_insert_result
+        )
+
+        # flush collection, segment sealed
+        self.collection_wrap.flush()
+
+        # build `BITMAP` index on empty collection
+        index_params = {
+            **DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
+            **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
+        }
+        self.build_multi_index(index_params=index_params)
+        assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
+
+        # load collection
+        self.collection_wrap.load()
+
+        # get segment info
+        segment_info, _ = self.utility_wrap.get_query_segment_info(collection_name=collection_name)
+
+        # check segments count
+        if shards_num > nb:
+            msg = f"`{collection_name}` Segments count:{len(segment_info)} > insert data:{nb}"
+            assert len(segment_info) <= nb, msg
+        else:
+            msg = f"`{collection_name}` Segments count:{len(segment_info)} != shards_num:{shards_num}"
+            assert len(segment_info) == shards_num, msg
+
+        # check segment row number
+        counts = [int(n.num_rows) for n in segment_info]
+        assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"