mirror of https://github.com/milvus-io/milvus.git
test: add more bitmap test cases (#36290)
Signed-off-by: wangting0128 <ting.wang@zilliz.com>pull/36310/head
parent
dcd904d2fa
commit
526a672bae
|
|
@ -86,8 +86,8 @@ class Base:
|
|||
rgs_list = self.utility_wrap.list_resource_groups()[0]
|
||||
for rg_name in self.resource_group_list:
|
||||
if rg_name is not None and rg_name in rgs_list:
|
||||
rg = \
|
||||
self.utility_wrap.describe_resource_group(name=rg_name, check_task=ct.CheckTasks.check_nothing)[0]
|
||||
rg = self.utility_wrap.describe_resource_group(
|
||||
name=rg_name, check_task=ct.CheckTasks.check_nothing)[0]
|
||||
if isinstance(rg, ResourceGroupInfo):
|
||||
if rg.num_available_node > 0:
|
||||
self.utility_wrap.transfer_node(source=rg_name,
|
||||
|
|
@ -443,27 +443,7 @@ class TestcaseBase(Base):
|
|||
log.info("[TestcaseBase] Collection: `{0}` index: {1}".format(collection_obj.name, indexes))
|
||||
return indexes
|
||||
|
||||
|
||||
class TestCaseClassBase(TestcaseBase):
|
||||
"""
|
||||
Setup objects on class
|
||||
"""
|
||||
|
||||
def setup_class(self):
|
||||
log.info("[setup_class] " + " Start setup class ".center(100, "~"))
|
||||
self._setup_objects(self)
|
||||
|
||||
def teardown_class(self):
|
||||
log.info("[teardown_class]" + " Start teardown class ".center(100, "~"))
|
||||
self._teardown_objects(self)
|
||||
|
||||
def setup_method(self, method):
|
||||
log.info(" setup ".center(80, "*"))
|
||||
log.info("[setup_method] Start setup test case %s." % method.__name__)
|
||||
|
||||
def teardown_method(self, method):
|
||||
log.info(" teardown ".center(80, "*"))
|
||||
log.info("[teardown_method] Start teardown test case %s..." % method.__name__)
|
||||
""" Property """
|
||||
|
||||
@property
|
||||
def all_scalar_fields(self):
|
||||
|
|
@ -493,3 +473,25 @@ class TestCaseClassBase(TestcaseBase):
|
|||
@property
|
||||
def bitmap_not_support_dtype_names(self):
|
||||
return list(set(self.all_scalar_fields) - set(self.bitmap_support_dtype_names))
|
||||
|
||||
|
||||
class TestCaseClassBase(TestcaseBase):
|
||||
"""
|
||||
Setup objects on class
|
||||
"""
|
||||
|
||||
def setup_class(self):
|
||||
log.info("[setup_class] " + " Start setup class ".center(100, "~"))
|
||||
self._setup_objects(self)
|
||||
|
||||
def teardown_class(self):
|
||||
log.info("[teardown_class]" + " Start teardown class ".center(100, "~"))
|
||||
self._teardown_objects(self)
|
||||
|
||||
def setup_method(self, method):
|
||||
log.info(" setup ".center(80, "*"))
|
||||
log.info("[setup_method] Start setup test case %s." % method.__name__)
|
||||
|
||||
def teardown_method(self, method):
|
||||
log.info(" teardown ".center(80, "*"))
|
||||
log.info("[teardown_method] Start teardown test case %s..." % method.__name__)
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ class IndexErrorMessage(ExceptionsMessage):
|
|||
CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field"
|
||||
CheckBitmapOnPK = "create bitmap index on primary key not supported"
|
||||
CheckBitmapCardinality = "failed to check bitmap cardinality limit, should be larger than 0 and smaller than 1000"
|
||||
NotConfigable = "{0} is not configable index param"
|
||||
|
||||
|
||||
class QueryErrorMessage(ExceptionsMessage):
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ from minio import Minio
|
|||
from pymilvus import DataType, CollectionSchema
|
||||
from base.schema_wrapper import ApiCollectionSchemaWrapper, ApiFieldSchemaWrapper
|
||||
from common import common_type as ct
|
||||
from common.common_params import ExprCheckParams
|
||||
from utils.util_log import test_log as log
|
||||
from customize.milvus_operator import MilvusOperator
|
||||
import pickle
|
||||
|
|
@ -2061,6 +2062,48 @@ def gen_varchar_expression(expr_fields):
|
|||
return exprs
|
||||
|
||||
|
||||
def gen_varchar_unicode_expression(expr_fields):
|
||||
exprs = []
|
||||
for field in expr_fields:
|
||||
exprs.extend([
|
||||
(Expr.like(field, "国%").value, field, r'^国.*'),
|
||||
(Expr.LIKE(field, "%中").value, field, r'.*中$'),
|
||||
(Expr.AND(Expr.like(field, "%江").subset, Expr.LIKE(field, "麚%").subset).value, field, r'^麚.*江$'),
|
||||
(Expr.And(Expr.like(field, "鄷%").subset, Expr.LIKE(field, "%薞").subset).value, field, r'^鄷.*薞$'),
|
||||
(Expr.OR(Expr.like(field, "%核%").subset, Expr.LIKE(field, "%臥蜜").subset).value, field, fr'(?:核.*|.*臥蜜$)'),
|
||||
(Expr.Or(Expr.like(field, "咴矷%").subset, Expr.LIKE(field, "%濉蠬%").subset).value, field, fr'(?:^咴矷.*|.*濉蠬)'),
|
||||
])
|
||||
return exprs
|
||||
|
||||
|
||||
def gen_varchar_unicode_expression_array(expr_fields):
|
||||
exprs = []
|
||||
for field in expr_fields:
|
||||
exprs.extend([
|
||||
ExprCheckParams(field, Expr.ARRAY_CONTAINS(field, '"中"').value, 'set(["中"]).issubset({0})'),
|
||||
ExprCheckParams(field, Expr.array_contains(field, '"国"').value, 'set(["国"]).issubset({0})'),
|
||||
ExprCheckParams(field, Expr.ARRAY_CONTAINS_ALL(field, ["华"]).value, 'set(["华"]).issubset({0})'),
|
||||
ExprCheckParams(field, Expr.array_contains_all(field, ["中", "国"]).value, 'set(["中", "国"]).issubset({0})'),
|
||||
ExprCheckParams(field, Expr.ARRAY_CONTAINS_ANY(field, ["紅"]).value, 'not set(["紅"]).isdisjoint({0})'),
|
||||
ExprCheckParams(field, Expr.array_contains_any(field, ["紅", "父", "环", "稵"]).value,
|
||||
'not set(["紅", "父", "环", "稵"]).isdisjoint({0})'),
|
||||
ExprCheckParams(field, Expr.AND(Expr.ARRAY_CONTAINS(field, '"噜"').value,
|
||||
Expr.ARRAY_CONTAINS_ANY(field, ["浮", "沮", "茫"]).value).value,
|
||||
'set(["噜"]).issubset({0}) and not set(["浮", "沮", "茫"]).isdisjoint({0})'),
|
||||
ExprCheckParams(field, Expr.And(Expr.ARRAY_CONTAINS_ALL(field, ["爤"]).value,
|
||||
Expr.array_contains_any(field, ["暁", "非", "鸳", "丹"]).value).value,
|
||||
'set(["爤"]).issubset({0}) and not set(["暁", "非", "鸳", "丹"]).isdisjoint({0})'),
|
||||
ExprCheckParams(field, Expr.OR(Expr.array_contains(field, '"草"').value,
|
||||
Expr.array_contains_all(field, ["昩", "苴"]).value).value,
|
||||
'set(["草"]).issubset({0}) or set(["昩", "苴"]).issubset({0})'),
|
||||
ExprCheckParams(field, Expr.Or(Expr.ARRAY_CONTAINS_ANY(field, ["魡", "展", "隶", "韀", "脠", "噩"]).value,
|
||||
Expr.array_contains_any(field, ["备", "嘎", "蝐", "秦", "万"]).value).value,
|
||||
'not set(["魡", "展", "隶", "韀", "脠", "噩"]).isdisjoint({0}) or ' +
|
||||
'not set(["备", "嘎", "蝐", "秦", "万"]).isdisjoint({0})')
|
||||
])
|
||||
return exprs
|
||||
|
||||
|
||||
def gen_number_operation(expr_fields):
|
||||
exprs = []
|
||||
for field in expr_fields:
|
||||
|
|
@ -2747,3 +2790,16 @@ def check_key_exist(source: dict, target: dict):
|
|||
|
||||
check_keys(source, target)
|
||||
return flag
|
||||
|
||||
|
||||
def gen_unicode_string():
|
||||
return chr(random.randint(0x4e00, 0x9fbf))
|
||||
|
||||
|
||||
def gen_unicode_string_batch(nb, string_len: int = 1):
|
||||
return [''.join([gen_unicode_string() for _ in range(string_len)]) for _ in range(nb)]
|
||||
|
||||
|
||||
def gen_unicode_string_array_batch(nb, string_len: int = 1, max_capacity: int = ct.default_max_capacity):
|
||||
return [[''.join([gen_unicode_string() for _ in range(min(random.randint(1, string_len), 50))]) for _ in
|
||||
range(random.randint(0, max_capacity))] for _ in range(nb)]
|
||||
|
|
|
|||
|
|
@ -388,3 +388,10 @@ class AlterIndexParams:
|
|||
@staticmethod
|
||||
def index_mmap(enable: bool = True):
|
||||
return {'mmap.enabled': enable}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExprCheckParams:
|
||||
field: str
|
||||
field_expr: str
|
||||
rex: str
|
||||
|
|
|
|||
|
|
@ -2327,18 +2327,6 @@ class TestBitmapIndex(TestcaseBase):
|
|||
# connect to server before testing
|
||||
self._connect()
|
||||
|
||||
@property
|
||||
def get_bitmap_support_dtype_names(self):
|
||||
dtypes = [DataType.BOOL, DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64, DataType.VARCHAR]
|
||||
dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes]
|
||||
return dtype_names
|
||||
|
||||
@property
|
||||
def get_bitmap_not_support_dtype_names(self):
|
||||
dtypes = [DataType.FLOAT, DataType.DOUBLE]
|
||||
dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes] + [DataType.JSON.name]
|
||||
return dtype_names
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("auto_id", [True, False])
|
||||
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
|
||||
|
|
@ -2389,7 +2377,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.SPARSE_FLOAT_VECTOR.name, *self.get_bitmap_not_support_dtype_names],
|
||||
fields=[primary_field, DataType.SPARSE_FLOAT_VECTOR.name, *self.bitmap_not_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict}
|
||||
)
|
||||
)
|
||||
|
|
@ -2407,7 +2395,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
)
|
||||
|
||||
# build `BITMAP` index on not supported scalar fields
|
||||
for _field_name in self.get_bitmap_not_support_dtype_names:
|
||||
for _field_name in self.bitmap_not_support_dtype_names:
|
||||
self.collection_wrap.create_index(
|
||||
field_name=_field_name, index_params=IndexPrams(index_type=IndexName.BITMAP).to_dict,
|
||||
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapIndex}
|
||||
|
|
@ -2438,7 +2426,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
auto_id=auto_id
|
||||
)
|
||||
|
|
@ -2447,7 +2435,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
# build `BITMAP` index on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
|
@ -2497,7 +2485,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT16_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
fields=[primary_field, DataType.FLOAT16_VECTOR.name, *self.bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
auto_id=auto_id
|
||||
)
|
||||
|
|
@ -2506,7 +2494,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
# build `BITMAP` index on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
|
@ -2557,7 +2545,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.BFLOAT16_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
fields=[primary_field, DataType.BFLOAT16_VECTOR.name, *self.bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
auto_id=auto_id
|
||||
),
|
||||
|
|
@ -2578,7 +2566,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
# build `BITMAP` index
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
|
@ -2620,7 +2608,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.BINARY_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
fields=[primary_field, DataType.BINARY_VECTOR.name, *self.bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
),
|
||||
shards_num=shards_num
|
||||
|
|
@ -2640,7 +2628,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
# build `BITMAP` index
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
|
@ -2683,7 +2671,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.BINARY_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
fields=[primary_field, DataType.BINARY_VECTOR.name, *self.bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
),
|
||||
shards_num=shards_num
|
||||
|
|
@ -2703,7 +2691,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
# build `BITMAP` index on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
|
@ -2757,7 +2745,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
)
|
||||
)
|
||||
|
|
@ -2765,13 +2753,13 @@ class TestBitmapIndex(TestcaseBase):
|
|||
# build `BITMAP` index on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# enable offset cache / mmap
|
||||
for index_name in self.get_bitmap_support_dtype_names:
|
||||
for index_name in self.bitmap_support_dtype_names:
|
||||
self.collection_wrap.alter_index(index_name=index_name, extra_params=extra_params)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
|
|
@ -2784,20 +2772,58 @@ class TestBitmapIndex(TestcaseBase):
|
|||
# rebuild `BITMAP` index
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# check alter index
|
||||
scalar_indexes = [{i.field_name: i.params} for i in self.collection_wrap.indexes if
|
||||
i.field_name in self.get_bitmap_support_dtype_names]
|
||||
i.field_name in self.bitmap_support_dtype_names]
|
||||
msg = f"Scalar indexes: {scalar_indexes}, expected all to contain {extra_params}"
|
||||
assert len([i for i in scalar_indexes for v in i.values() if not cf.check_key_exist(extra_params, v)]) == 0, msg
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_alter_cardinality_limit(self, request):
|
||||
"""
|
||||
target:
|
||||
1. alter index `bitmap_cardinality_limit` failed
|
||||
method:
|
||||
1. create a collection with scalar fields
|
||||
2. build BITMAP index on scalar fields
|
||||
3. altering index `bitmap_cardinality_limit`
|
||||
expected:
|
||||
1. alter index failed with param `bitmap_cardinality_limit`
|
||||
"""
|
||||
# init params
|
||||
collection_name, primary_field, nb = f"{request.function.__name__}", "int64_pk", 3000
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
)
|
||||
)
|
||||
|
||||
# build `BITMAP` index on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# alter `bitmap_cardinality_limit` failed
|
||||
for index_name in self.bitmap_support_dtype_names:
|
||||
self.collection_wrap.alter_index(
|
||||
index_name=index_name, extra_params={"bitmap_cardinality_limit": 10}, check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1100, ct.err_msg: iem.NotConfigable.format("bitmap_cardinality_limit")})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("bitmap_cardinality_limit", [-10, 0, 1001])
|
||||
def test_bitmap_cardinality_limit_invalid(self, request, bitmap_cardinality_limit):
|
||||
|
|
@ -2830,8 +2856,9 @@ class TestBitmapIndex(TestcaseBase):
|
|||
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapCardinality})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("bitmap_cardinality_limit", [1, 1000])
|
||||
def test_bitmap_cardinality_limit_enable(self, request, bitmap_cardinality_limit):
|
||||
@pytest.mark.parametrize("bitmap_cardinality_limit", [1, 100, 1000])
|
||||
@pytest.mark.parametrize("index_params, name", [({"index_type": IndexName.AUTOINDEX}, "AUTOINDEX"), ({}, "None")])
|
||||
def test_bitmap_cardinality_limit_enable(self, request, bitmap_cardinality_limit, index_params, name):
|
||||
"""
|
||||
target:
|
||||
1. check auto index setting `bitmap_cardinality_limit` not failed
|
||||
|
|
@ -2850,13 +2877,14 @@ class TestBitmapIndex(TestcaseBase):
|
|||
but is only used to verify that the index is successfully built.
|
||||
"""
|
||||
# init params
|
||||
collection_name, primary_field, nb = f"{request.function.__name__}_{bitmap_cardinality_limit}", "int64_pk", 3000
|
||||
collection_name = f"{request.function.__name__}_{bitmap_cardinality_limit}_{name}"
|
||||
primary_field, nb = "int64_pk", 3000
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
)
|
||||
)
|
||||
|
|
@ -2872,20 +2900,23 @@ class TestBitmapIndex(TestcaseBase):
|
|||
self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name))
|
||||
|
||||
# build scalar index
|
||||
for scalar_field in self.get_bitmap_support_dtype_names:
|
||||
for scalar_field in self.bitmap_support_dtype_names:
|
||||
self.collection_wrap.create_index(
|
||||
field_name=scalar_field, index_name=scalar_field,
|
||||
index_params={"index_type": IndexName.AUTOINDEX, "bitmap_cardinality_limit": bitmap_cardinality_limit})
|
||||
index_params={**index_params, "bitmap_cardinality_limit": bitmap_cardinality_limit})
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("config, name", [({"bitmap_cardinality_limit": 1000}, 1000), ({}, None)])
|
||||
def test_bitmap_cardinality_limit_low_data(self, request, config, name):
|
||||
@pytest.mark.parametrize("config, cardinality_data_range, name",
|
||||
[({"bitmap_cardinality_limit": 1000}, (-128, 127), 1000),
|
||||
({"bitmap_cardinality_limit": 100}, (-128, 127), 100),
|
||||
({}, (1, 100), "None_100"), ({}, (1, 99), "None_99")])
|
||||
def test_bitmap_cardinality_limit_low_data(self, request, config, name, cardinality_data_range):
|
||||
"""
|
||||
target:
|
||||
1. check auto index setting `bitmap_cardinality_limit` and insert low cardinality data
|
||||
1. check auto index setting `bitmap_cardinality_limit`(default value=100) and insert low cardinality data
|
||||
method:
|
||||
1. create a collection with scalar fields
|
||||
2. insert some data and flush
|
||||
|
|
@ -2907,13 +2938,13 @@ class TestBitmapIndex(TestcaseBase):
|
|||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
)
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
low_cardinality = [random.randint(-128, 127) for _ in range(nb)]
|
||||
low_cardinality = [random.randint(*cardinality_data_range) for _ in range(nb)]
|
||||
self.collection_wrap.insert(
|
||||
data=cf.gen_values(
|
||||
self.collection_wrap.schema, nb=nb,
|
||||
|
|
@ -2938,7 +2969,7 @@ class TestBitmapIndex(TestcaseBase):
|
|||
self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name))
|
||||
|
||||
# build scalar index
|
||||
for scalar_field in self.get_bitmap_support_dtype_names:
|
||||
for scalar_field in self.bitmap_support_dtype_names:
|
||||
self.collection_wrap.create_index(
|
||||
field_name=scalar_field, index_name=scalar_field,
|
||||
index_params={"index_type": IndexName.AUTOINDEX, **config})
|
||||
|
|
|
|||
|
|
@ -342,7 +342,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
|
|||
**DefaultVectorIndexParams.HNSW(DataType.BFLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name),
|
||||
**DefaultVectorIndexParams.BIN_FLAT(DataType.BINARY_VECTOR.name),
|
||||
# build Hybrid index
|
||||
# build INVERTED index
|
||||
**DefaultScalarIndexParams.list_inverted([self.primary_field] + self.inverted_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
|
|
@ -466,7 +466,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
**DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name),
|
||||
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
|
||||
# build Hybrid index
|
||||
# build BITMAP index
|
||||
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
|
|
@ -475,6 +475,32 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
# https://github.com/milvus-io/milvus/issues/36221
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_bitmap_index_query_with_invalid_array_params(self):
|
||||
"""
|
||||
target:
|
||||
1. check query with invalid array params
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different wrong expr
|
||||
3. check query result error
|
||||
expected:
|
||||
1. query response check error
|
||||
"""
|
||||
# query
|
||||
self.collection_wrap.query(
|
||||
expr=Expr.array_contains_any('ARRAY_VARCHAR', [['a', 'b']]).value, limit=1, check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 65535, ct.err_msg: "fail to Query on QueryNode"})
|
||||
|
||||
self.collection_wrap.query(
|
||||
expr=Expr.array_contains_all('ARRAY_VARCHAR', [['a', 'b']]).value, limit=1, check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 65535, ct.err_msg: "fail to Query on QueryNode"})
|
||||
|
||||
self.collection_wrap.query(
|
||||
expr=Expr.array_contains('ARRAY_VARCHAR', [['a', 'b']]).value, limit=1, check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1100, ct.err_msg: qem.ParseExpressionFailed})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
|
|
@ -942,6 +968,119 @@ class TestBitmapIndexMmap(TestCaseClassBase):
|
|||
check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestIndexUnicodeString")
|
||||
class TestIndexUnicodeString(TestCaseClassBase):
|
||||
"""
|
||||
Scalar fields build BITMAP index, and verify Unicode string
|
||||
|
||||
Author: Ting.Wang
|
||||
"""
|
||||
|
||||
def setup_class(self):
|
||||
super().setup_class(self)
|
||||
|
||||
# connect to server before testing
|
||||
self._connect(self)
|
||||
|
||||
# init params
|
||||
self.primary_field, self.nb = "int64_pk", 3000
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_bitmap_index_unicode"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[self.primary_field, DataType.FLOAT_VECTOR.name,
|
||||
f"{DataType.VARCHAR.name}_BITMAP", f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_BITMAP",
|
||||
f"{DataType.VARCHAR.name}_INVERTED", f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_INVERTED",
|
||||
f"{DataType.VARCHAR.name}_NoIndex", f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_NoIndex"],
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
# insert unicode string
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb, default_values={
|
||||
f"{DataType.VARCHAR.name}_BITMAP": cf.gen_unicode_string_batch(nb=self.nb, string_len=30),
|
||||
f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_BITMAP": cf.gen_unicode_string_array_batch(
|
||||
nb=self.nb, string_len=1, max_capacity=100),
|
||||
f"{DataType.VARCHAR.name}_INVERTED": cf.gen_unicode_string_batch(nb=self.nb, string_len=30),
|
||||
f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_INVERTED": cf.gen_unicode_string_array_batch(
|
||||
nb=self.nb, string_len=1, max_capacity=100),
|
||||
f"{DataType.VARCHAR.name}_NoIndex": cf.gen_unicode_string_batch(nb=self.nb, string_len=30),
|
||||
f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_NoIndex": cf.gen_unicode_string_array_batch(
|
||||
nb=self.nb, string_len=1, max_capacity=100),
|
||||
})
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build scalar index
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
|
||||
# build BITMAP index
|
||||
**DefaultScalarIndexParams.list_bitmap([f"{DataType.VARCHAR.name}_BITMAP",
|
||||
f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_BITMAP"]),
|
||||
# build INVERTED index
|
||||
**DefaultScalarIndexParams.list_inverted([f"{DataType.VARCHAR.name}_INVERTED",
|
||||
f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_INVERTED"])
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expr, expr_field, rex",
|
||||
cf.gen_varchar_unicode_expression(['VARCHAR_BITMAP', 'VARCHAR_INVERTED']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_index_unicode_string_query(self, expr, expr_field, limit, rex):
|
||||
"""
|
||||
target:
|
||||
1. check string expression
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("obj", cf.gen_varchar_unicode_expression_array(
|
||||
['ARRAY_VARCHAR_BITMAP', 'ARRAY_VARCHAR_INVERTED', 'ARRAY_VARCHAR_NoIndex']))
|
||||
@pytest.mark.parametrize("limit", [1])
|
||||
def test_index_unicode_string_array_query(self, limit, obj):
|
||||
"""
|
||||
target:
|
||||
1. check string expression
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(obj.field, []) if eval(obj.rex.format(str(i)))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=obj.field_expr, limit=limit, output_fields=[obj.field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
|
||||
class TestMixScenes(TestcaseBase):
|
||||
"""
|
||||
Testing cross-combination scenarios
|
||||
|
|
|
|||
Loading…
Reference in New Issue