test: add more bitmap test cases (#36290)

Signed-off-by: wangting0128 <ting.wang@zilliz.com>
pull/36310/head
wt 2024-09-16 11:03:09 +08:00 committed by GitHub
parent dcd904d2fa
commit 526a672bae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 302 additions and 66 deletions

View File

@ -86,8 +86,8 @@ class Base:
rgs_list = self.utility_wrap.list_resource_groups()[0]
for rg_name in self.resource_group_list:
if rg_name is not None and rg_name in rgs_list:
rg = \
self.utility_wrap.describe_resource_group(name=rg_name, check_task=ct.CheckTasks.check_nothing)[0]
rg = self.utility_wrap.describe_resource_group(
name=rg_name, check_task=ct.CheckTasks.check_nothing)[0]
if isinstance(rg, ResourceGroupInfo):
if rg.num_available_node > 0:
self.utility_wrap.transfer_node(source=rg_name,
@ -443,27 +443,7 @@ class TestcaseBase(Base):
log.info("[TestcaseBase] Collection: `{0}` index: {1}".format(collection_obj.name, indexes))
return indexes
class TestCaseClassBase(TestcaseBase):
"""
Setup objects on class
"""
def setup_class(self):
log.info("[setup_class] " + " Start setup class ".center(100, "~"))
self._setup_objects(self)
def teardown_class(self):
log.info("[teardown_class]" + " Start teardown class ".center(100, "~"))
self._teardown_objects(self)
def setup_method(self, method):
log.info(" setup ".center(80, "*"))
log.info("[setup_method] Start setup test case %s." % method.__name__)
def teardown_method(self, method):
log.info(" teardown ".center(80, "*"))
log.info("[teardown_method] Start teardown test case %s..." % method.__name__)
""" Property """
@property
def all_scalar_fields(self):
@ -493,3 +473,25 @@ class TestCaseClassBase(TestcaseBase):
@property
def bitmap_not_support_dtype_names(self):
return list(set(self.all_scalar_fields) - set(self.bitmap_support_dtype_names))
class TestCaseClassBase(TestcaseBase):
"""
Setup objects on class
"""
def setup_class(self):
log.info("[setup_class] " + " Start setup class ".center(100, "~"))
self._setup_objects(self)
def teardown_class(self):
log.info("[teardown_class]" + " Start teardown class ".center(100, "~"))
self._teardown_objects(self)
def setup_method(self, method):
log.info(" setup ".center(80, "*"))
log.info("[setup_method] Start setup test case %s." % method.__name__)
def teardown_method(self, method):
log.info(" teardown ".center(80, "*"))
log.info("[teardown_method] Start teardown test case %s..." % method.__name__)

View File

@ -39,6 +39,7 @@ class IndexErrorMessage(ExceptionsMessage):
CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field"
CheckBitmapOnPK = "create bitmap index on primary key not supported"
CheckBitmapCardinality = "failed to check bitmap cardinality limit, should be larger than 0 and smaller than 1000"
NotConfigable = "{0} is not configable index param"
class QueryErrorMessage(ExceptionsMessage):

View File

@ -17,6 +17,7 @@ from minio import Minio
from pymilvus import DataType, CollectionSchema
from base.schema_wrapper import ApiCollectionSchemaWrapper, ApiFieldSchemaWrapper
from common import common_type as ct
from common.common_params import ExprCheckParams
from utils.util_log import test_log as log
from customize.milvus_operator import MilvusOperator
import pickle
@ -2061,6 +2062,48 @@ def gen_varchar_expression(expr_fields):
return exprs
def gen_varchar_unicode_expression(expr_fields):
exprs = []
for field in expr_fields:
exprs.extend([
(Expr.like(field, "%").value, field, r'^国.*'),
(Expr.LIKE(field, "%").value, field, r'.*中$'),
(Expr.AND(Expr.like(field, "%").subset, Expr.LIKE(field, "%").subset).value, field, r'^麚.*江$'),
(Expr.And(Expr.like(field, "%").subset, Expr.LIKE(field, "%").subset).value, field, r'^鄷.*薞$'),
(Expr.OR(Expr.like(field, "%%").subset, Expr.LIKE(field, "%臥蜜").subset).value, field, fr'(?:核.*|.*臥蜜$)'),
(Expr.Or(Expr.like(field, "咴矷%").subset, Expr.LIKE(field, "%濉蠬%").subset).value, field, fr'(?:^咴矷.*|.*濉蠬)'),
])
return exprs
def gen_varchar_unicode_expression_array(expr_fields):
exprs = []
for field in expr_fields:
exprs.extend([
ExprCheckParams(field, Expr.ARRAY_CONTAINS(field, '""').value, 'set([""]).issubset({0})'),
ExprCheckParams(field, Expr.array_contains(field, '""').value, 'set([""]).issubset({0})'),
ExprCheckParams(field, Expr.ARRAY_CONTAINS_ALL(field, [""]).value, 'set([""]).issubset({0})'),
ExprCheckParams(field, Expr.array_contains_all(field, ["", ""]).value, 'set(["", ""]).issubset({0})'),
ExprCheckParams(field, Expr.ARRAY_CONTAINS_ANY(field, [""]).value, 'not set([""]).isdisjoint({0})'),
ExprCheckParams(field, Expr.array_contains_any(field, ["", "", "", ""]).value,
'not set(["", "", "", ""]).isdisjoint({0})'),
ExprCheckParams(field, Expr.AND(Expr.ARRAY_CONTAINS(field, '""').value,
Expr.ARRAY_CONTAINS_ANY(field, ["", "", ""]).value).value,
'set([""]).issubset({0}) and not set(["", "", ""]).isdisjoint({0})'),
ExprCheckParams(field, Expr.And(Expr.ARRAY_CONTAINS_ALL(field, [""]).value,
Expr.array_contains_any(field, ["", "", "", ""]).value).value,
'set([""]).issubset({0}) and not set(["", "", "", ""]).isdisjoint({0})'),
ExprCheckParams(field, Expr.OR(Expr.array_contains(field, '""').value,
Expr.array_contains_all(field, ["", ""]).value).value,
'set([""]).issubset({0}) or set(["", ""]).issubset({0})'),
ExprCheckParams(field, Expr.Or(Expr.ARRAY_CONTAINS_ANY(field, ["", "", "", "", "", ""]).value,
Expr.array_contains_any(field, ["", "", "", "", ""]).value).value,
'not set(["", "", "", "", "", ""]).isdisjoint({0}) or ' +
'not set(["", "", "", "", ""]).isdisjoint({0})')
])
return exprs
def gen_number_operation(expr_fields):
exprs = []
for field in expr_fields:
@ -2747,3 +2790,16 @@ def check_key_exist(source: dict, target: dict):
check_keys(source, target)
return flag
def gen_unicode_string():
return chr(random.randint(0x4e00, 0x9fbf))
def gen_unicode_string_batch(nb, string_len: int = 1):
return [''.join([gen_unicode_string() for _ in range(string_len)]) for _ in range(nb)]
def gen_unicode_string_array_batch(nb, string_len: int = 1, max_capacity: int = ct.default_max_capacity):
return [[''.join([gen_unicode_string() for _ in range(min(random.randint(1, string_len), 50))]) for _ in
range(random.randint(0, max_capacity))] for _ in range(nb)]

View File

@ -388,3 +388,10 @@ class AlterIndexParams:
@staticmethod
def index_mmap(enable: bool = True):
return {'mmap.enabled': enable}
@dataclass
class ExprCheckParams:
field: str
field_expr: str
rex: str

View File

@ -2327,18 +2327,6 @@ class TestBitmapIndex(TestcaseBase):
# connect to server before testing
self._connect()
@property
def get_bitmap_support_dtype_names(self):
dtypes = [DataType.BOOL, DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64, DataType.VARCHAR]
dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes]
return dtype_names
@property
def get_bitmap_not_support_dtype_names(self):
dtypes = [DataType.FLOAT, DataType.DOUBLE]
dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes] + [DataType.JSON.name]
return dtype_names
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("auto_id", [True, False])
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
@ -2389,7 +2377,7 @@ class TestBitmapIndex(TestcaseBase):
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.SPARSE_FLOAT_VECTOR.name, *self.get_bitmap_not_support_dtype_names],
fields=[primary_field, DataType.SPARSE_FLOAT_VECTOR.name, *self.bitmap_not_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict}
)
)
@ -2407,7 +2395,7 @@ class TestBitmapIndex(TestcaseBase):
)
# build `BITMAP` index on not supported scalar fields
for _field_name in self.get_bitmap_not_support_dtype_names:
for _field_name in self.bitmap_not_support_dtype_names:
self.collection_wrap.create_index(
field_name=_field_name, index_params=IndexPrams(index_type=IndexName.BITMAP).to_dict,
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapIndex}
@ -2438,7 +2426,7 @@ class TestBitmapIndex(TestcaseBase):
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
auto_id=auto_id
)
@ -2447,7 +2435,7 @@ class TestBitmapIndex(TestcaseBase):
# build `BITMAP` index on empty collection
index_params = {
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
@ -2497,7 +2485,7 @@ class TestBitmapIndex(TestcaseBase):
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT16_VECTOR.name, *self.get_bitmap_support_dtype_names],
fields=[primary_field, DataType.FLOAT16_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
auto_id=auto_id
)
@ -2506,7 +2494,7 @@ class TestBitmapIndex(TestcaseBase):
# build `BITMAP` index on empty collection
index_params = {
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
@ -2557,7 +2545,7 @@ class TestBitmapIndex(TestcaseBase):
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.BFLOAT16_VECTOR.name, *self.get_bitmap_support_dtype_names],
fields=[primary_field, DataType.BFLOAT16_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
auto_id=auto_id
),
@ -2578,7 +2566,7 @@ class TestBitmapIndex(TestcaseBase):
# build `BITMAP` index
index_params = {
**DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
@ -2620,7 +2608,7 @@ class TestBitmapIndex(TestcaseBase):
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.BINARY_VECTOR.name, *self.get_bitmap_support_dtype_names],
fields=[primary_field, DataType.BINARY_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
),
shards_num=shards_num
@ -2640,7 +2628,7 @@ class TestBitmapIndex(TestcaseBase):
# build `BITMAP` index
index_params = {
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
@ -2683,7 +2671,7 @@ class TestBitmapIndex(TestcaseBase):
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.BINARY_VECTOR.name, *self.get_bitmap_support_dtype_names],
fields=[primary_field, DataType.BINARY_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
),
shards_num=shards_num
@ -2703,7 +2691,7 @@ class TestBitmapIndex(TestcaseBase):
# build `BITMAP` index on empty collection
index_params = {
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
@ -2757,7 +2745,7 @@ class TestBitmapIndex(TestcaseBase):
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
@ -2765,13 +2753,13 @@ class TestBitmapIndex(TestcaseBase):
# build `BITMAP` index on empty collection
index_params = {
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# enable offset cache / mmap
for index_name in self.get_bitmap_support_dtype_names:
for index_name in self.bitmap_support_dtype_names:
self.collection_wrap.alter_index(index_name=index_name, extra_params=extra_params)
# prepare data (> 1024 triggering index building)
@ -2784,20 +2772,58 @@ class TestBitmapIndex(TestcaseBase):
# rebuild `BITMAP` index
index_params = {
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# check alter index
scalar_indexes = [{i.field_name: i.params} for i in self.collection_wrap.indexes if
i.field_name in self.get_bitmap_support_dtype_names]
i.field_name in self.bitmap_support_dtype_names]
msg = f"Scalar indexes: {scalar_indexes}, expected all to contain {extra_params}"
assert len([i for i in scalar_indexes for v in i.values() if not cf.check_key_exist(extra_params, v)]) == 0, msg
# load collection
self.collection_wrap.load()
@pytest.mark.tags(CaseLabel.L2)
def test_bitmap_alter_cardinality_limit(self, request):
"""
target:
1. alter index `bitmap_cardinality_limit` failed
method:
1. create a collection with scalar fields
2. build BITMAP index on scalar fields
3. altering index `bitmap_cardinality_limit`
expected:
1. alter index failed with param `bitmap_cardinality_limit`
"""
# init params
collection_name, primary_field, nb = f"{request.function.__name__}", "int64_pk", 3000
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# build `BITMAP` index on empty collection
index_params = {
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# alter `bitmap_cardinality_limit` failed
for index_name in self.bitmap_support_dtype_names:
self.collection_wrap.alter_index(
index_name=index_name, extra_params={"bitmap_cardinality_limit": 10}, check_task=CheckTasks.err_res,
check_items={ct.err_code: 1100, ct.err_msg: iem.NotConfigable.format("bitmap_cardinality_limit")})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("bitmap_cardinality_limit", [-10, 0, 1001])
def test_bitmap_cardinality_limit_invalid(self, request, bitmap_cardinality_limit):
@ -2830,8 +2856,9 @@ class TestBitmapIndex(TestcaseBase):
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapCardinality})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("bitmap_cardinality_limit", [1, 1000])
def test_bitmap_cardinality_limit_enable(self, request, bitmap_cardinality_limit):
@pytest.mark.parametrize("bitmap_cardinality_limit", [1, 100, 1000])
@pytest.mark.parametrize("index_params, name", [({"index_type": IndexName.AUTOINDEX}, "AUTOINDEX"), ({}, "None")])
def test_bitmap_cardinality_limit_enable(self, request, bitmap_cardinality_limit, index_params, name):
"""
target:
1. check auto index setting `bitmap_cardinality_limit` not failed
@ -2850,13 +2877,14 @@ class TestBitmapIndex(TestcaseBase):
but is only used to verify that the index is successfully built.
"""
# init params
collection_name, primary_field, nb = f"{request.function.__name__}_{bitmap_cardinality_limit}", "int64_pk", 3000
collection_name = f"{request.function.__name__}_{bitmap_cardinality_limit}_{name}"
primary_field, nb = "int64_pk", 3000
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
@ -2872,20 +2900,23 @@ class TestBitmapIndex(TestcaseBase):
self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name))
# build scalar index
for scalar_field in self.get_bitmap_support_dtype_names:
for scalar_field in self.bitmap_support_dtype_names:
self.collection_wrap.create_index(
field_name=scalar_field, index_name=scalar_field,
index_params={"index_type": IndexName.AUTOINDEX, "bitmap_cardinality_limit": bitmap_cardinality_limit})
index_params={**index_params, "bitmap_cardinality_limit": bitmap_cardinality_limit})
# load collection
self.collection_wrap.load()
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("config, name", [({"bitmap_cardinality_limit": 1000}, 1000), ({}, None)])
def test_bitmap_cardinality_limit_low_data(self, request, config, name):
@pytest.mark.parametrize("config, cardinality_data_range, name",
[({"bitmap_cardinality_limit": 1000}, (-128, 127), 1000),
({"bitmap_cardinality_limit": 100}, (-128, 127), 100),
({}, (1, 100), "None_100"), ({}, (1, 99), "None_99")])
def test_bitmap_cardinality_limit_low_data(self, request, config, name, cardinality_data_range):
"""
target:
1. check auto index setting `bitmap_cardinality_limit` and insert low cardinality data
1. check auto index setting `bitmap_cardinality_limit`(default value=100) and insert low cardinality data
method:
1. create a collection with scalar fields
2. insert some data and flush
@ -2907,13 +2938,13 @@ class TestBitmapIndex(TestcaseBase):
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# prepare data (> 1024 triggering index building)
low_cardinality = [random.randint(-128, 127) for _ in range(nb)]
low_cardinality = [random.randint(*cardinality_data_range) for _ in range(nb)]
self.collection_wrap.insert(
data=cf.gen_values(
self.collection_wrap.schema, nb=nb,
@ -2938,7 +2969,7 @@ class TestBitmapIndex(TestcaseBase):
self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name))
# build scalar index
for scalar_field in self.get_bitmap_support_dtype_names:
for scalar_field in self.bitmap_support_dtype_names:
self.collection_wrap.create_index(
field_name=scalar_field, index_name=scalar_field,
index_params={"index_type": IndexName.AUTOINDEX, **config})

View File

@ -342,7 +342,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
**DefaultVectorIndexParams.HNSW(DataType.BFLOAT16_VECTOR.name),
**DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name),
**DefaultVectorIndexParams.BIN_FLAT(DataType.BINARY_VECTOR.name),
# build Hybrid index
# build INVERTED index
**DefaultScalarIndexParams.list_inverted([self.primary_field] + self.inverted_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
@ -466,7 +466,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
**DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name),
**DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name),
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
# build Hybrid index
# build BITMAP index
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
@ -475,6 +475,32 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
# load collection
self.collection_wrap.load()
# https://github.com/milvus-io/milvus/issues/36221
@pytest.mark.tags(CaseLabel.L1)
def test_bitmap_index_query_with_invalid_array_params(self):
"""
target:
1. check query with invalid array params
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. query with the different wrong expr
3. check query result error
expected:
1. query response check error
"""
# query
self.collection_wrap.query(
expr=Expr.array_contains_any('ARRAY_VARCHAR', [['a', 'b']]).value, limit=1, check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535, ct.err_msg: "fail to Query on QueryNode"})
self.collection_wrap.query(
expr=Expr.array_contains_all('ARRAY_VARCHAR', [['a', 'b']]).value, limit=1, check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535, ct.err_msg: "fail to Query on QueryNode"})
self.collection_wrap.query(
expr=Expr.array_contains('ARRAY_VARCHAR', [['a', 'b']]).value, limit=1, check_task=CheckTasks.err_res,
check_items={ct.err_code: 1100, ct.err_msg: qem.ParseExpressionFailed})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64']))
@pytest.mark.parametrize("limit", [1, 10, 3000])
@ -942,6 +968,119 @@ class TestBitmapIndexMmap(TestCaseClassBase):
check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
@pytest.mark.xdist_group("TestIndexUnicodeString")
class TestIndexUnicodeString(TestCaseClassBase):
"""
Scalar fields build BITMAP index, and verify Unicode string
Author: Ting.Wang
"""
def setup_class(self):
super().setup_class(self)
# connect to server before testing
self._connect(self)
# init params
self.primary_field, self.nb = "int64_pk", 3000
# create a collection with fields
self.collection_wrap.init_collection(
name=cf.gen_unique_str("test_bitmap_index_unicode"),
schema=cf.set_collection_schema(
fields=[self.primary_field, DataType.FLOAT_VECTOR.name,
f"{DataType.VARCHAR.name}_BITMAP", f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_BITMAP",
f"{DataType.VARCHAR.name}_INVERTED", f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_INVERTED",
f"{DataType.VARCHAR.name}_NoIndex", f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_NoIndex"],
field_params={
self.primary_field: FieldParams(is_primary=True).to_dict
},
)
)
# prepare data (> 1024 triggering index building)
# insert unicode string
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb, default_values={
f"{DataType.VARCHAR.name}_BITMAP": cf.gen_unicode_string_batch(nb=self.nb, string_len=30),
f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_BITMAP": cf.gen_unicode_string_array_batch(
nb=self.nb, string_len=1, max_capacity=100),
f"{DataType.VARCHAR.name}_INVERTED": cf.gen_unicode_string_batch(nb=self.nb, string_len=30),
f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_INVERTED": cf.gen_unicode_string_array_batch(
nb=self.nb, string_len=1, max_capacity=100),
f"{DataType.VARCHAR.name}_NoIndex": cf.gen_unicode_string_batch(nb=self.nb, string_len=30),
f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_NoIndex": cf.gen_unicode_string_array_batch(
nb=self.nb, string_len=1, max_capacity=100),
})
@pytest.fixture(scope="class", autouse=True)
def prepare_data(self):
self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result)
# flush collection, segment sealed
self.collection_wrap.flush()
# build scalar index
index_params = {
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
# build BITMAP index
**DefaultScalarIndexParams.list_bitmap([f"{DataType.VARCHAR.name}_BITMAP",
f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_BITMAP"]),
# build INVERTED index
**DefaultScalarIndexParams.list_inverted([f"{DataType.VARCHAR.name}_INVERTED",
f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_INVERTED"])
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# load collection
self.collection_wrap.load()
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expr, expr_field, rex",
cf.gen_varchar_unicode_expression(['VARCHAR_BITMAP', 'VARCHAR_INVERTED']))
@pytest.mark.parametrize("limit", [1, 10, 3000])
def test_index_unicode_string_query(self, expr, expr_field, limit, rex):
"""
target:
1. check string expression
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. query with the different expr and limit
3. check query result
expected:
1. query response equal to min(insert data, limit)
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("obj", cf.gen_varchar_unicode_expression_array(
['ARRAY_VARCHAR_BITMAP', 'ARRAY_VARCHAR_INVERTED', 'ARRAY_VARCHAR_NoIndex']))
@pytest.mark.parametrize("limit", [1])
def test_index_unicode_string_array_query(self, limit, obj):
"""
target:
1. check string expression
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. query with the different expr and limit
3. check query result
expected:
1. query response equal to min(insert data, limit)
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(obj.field, []) if eval(obj.rex.format(str(i)))])
# query
res, _ = self.collection_wrap.query(expr=obj.field_expr, limit=limit, output_fields=[obj.field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
class TestMixScenes(TestcaseBase):
"""
Testing cross-combination scenarios