mirror of https://github.com/milvus-io/milvus.git
test: add more scalar filter expressions (#36545)
Signed-off-by: wangting0128 <ting.wang@zilliz.com>pull/36564/head
parent
9444329da1
commit
ff4c62e44f
|
@ -40,6 +40,7 @@ class IndexErrorMessage(ExceptionsMessage):
|
|||
CheckBitmapOnPK = "create bitmap index on primary key not supported"
|
||||
CheckBitmapCardinality = "failed to check bitmap cardinality limit, should be larger than 0 and smaller than 1000"
|
||||
NotConfigable = "{0} is not configable index param"
|
||||
InvalidOffsetCache = "invalid offset cache index params"
|
||||
|
||||
|
||||
class QueryErrorMessage(ExceptionsMessage):
|
||||
|
|
|
@ -1688,6 +1688,10 @@ def get_dim_by_schema(schema=None):
|
|||
return None
|
||||
|
||||
|
||||
def gen_varchar_data(length: int, nb: int):
|
||||
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(nb)]
|
||||
|
||||
|
||||
def gen_data_by_collection_field(field, nb=None, start=None):
|
||||
# if nb is None, return one data, else return a list of data
|
||||
data_type = field.dtype
|
||||
|
@ -1726,8 +1730,8 @@ def gen_data_by_collection_field(field, nb=None, start=None):
|
|||
max_length = min(20, max_length-1)
|
||||
length = random.randint(0, max_length)
|
||||
if nb is None:
|
||||
return "".join([chr(random.randint(97, 122)) for _ in range(length)])
|
||||
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(nb)]
|
||||
return gen_varchar_data(length=length, nb=1)[0]
|
||||
return gen_varchar_data(length=length, nb=nb)
|
||||
if data_type == DataType.JSON:
|
||||
if nb is None:
|
||||
return {"name": fake.name(), "address": fake.address()}
|
||||
|
|
|
@ -2461,6 +2461,81 @@ class TestBitmapIndex(TestcaseBase):
|
|||
# re-build loaded index
|
||||
self.build_multi_index(index_params=index_params)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("index_obj, filed_name", [(DefaultScalarIndexParams.Default, 'INT64_hybrid_index'),
|
||||
(DefaultScalarIndexParams.INVERTED, 'INT64_inverted'),
|
||||
(DefaultScalarIndexParams.STL_SORT, 'INT64_stl_sort'),
|
||||
(DefaultScalarIndexParams.Trie, 'VARCHAR_trie')])
|
||||
def test_bitmap_offset_cache_on_not_bitmap_fields(self, request, index_obj, filed_name):
|
||||
"""
|
||||
target:
|
||||
1. alter offset cache on not `BITMAP` index scalar field
|
||||
method:
|
||||
1. build scalar index on scalar field
|
||||
2. alter offset cache on scalar index field
|
||||
expected:
|
||||
1. alter index raises expected error
|
||||
"""
|
||||
# init params
|
||||
collection_name, primary_field = f"{request.function.__name__}_{filed_name}", 'INT64_pk'
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, filed_name],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
)
|
||||
)
|
||||
|
||||
# build scalar index on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
|
||||
**index_obj(filed_name)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# enable offset cache and raises error
|
||||
self.collection_wrap.alter_index(
|
||||
index_name=filed_name, extra_params=AlterIndexParams.index_offset_cache(),
|
||||
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.InvalidOffsetCache}
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_bitmap_offset_cache_on_vector_field(self, request):
|
||||
"""
|
||||
target:
|
||||
1. alter offset cache on vector field
|
||||
method:
|
||||
1. build vector index on an empty collection
|
||||
2. alter offset cache on vector index field
|
||||
expected:
|
||||
1. alter index raises expected error
|
||||
"""
|
||||
# init params
|
||||
collection_name, primary_field = f"{request.function.__name__}", 'INT64_pk'
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
)
|
||||
)
|
||||
|
||||
# build index on empty collection
|
||||
index_params = DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name)
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# enable offset cache and raises error
|
||||
self.collection_wrap.alter_index(
|
||||
index_name=DataType.FLOAT_VECTOR.name, extra_params=AlterIndexParams.index_offset_cache(),
|
||||
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.InvalidOffsetCache}
|
||||
)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("auto_id", [True, False])
|
||||
@pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"])
|
||||
|
|
|
@ -31,7 +31,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
|
|||
self._connect(self)
|
||||
|
||||
# init params
|
||||
self.primary_field, nb = "int64_pk", 3000
|
||||
self.primary_field, self.nb = "int64_pk", 3000
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
|
@ -49,7 +49,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
|
|||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
|
@ -156,6 +156,48 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
|
|||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("range_num, counts", [([-100, 200], 10), ([2000, 5000], 10), ([3000, 4000], 5)])
|
||||
@pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64'])
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_no_index_query_with_int_in(self, range_num, counts, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation `in` and `not in`, calculate total number via expr
|
||||
method:
|
||||
1. prepare some data
|
||||
2. query with the different expr(in, not in) and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# random set expr list
|
||||
range_numbers = [random.randint(*range_num) for _ in range(counts)]
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if i in range_numbers])
|
||||
|
||||
# query `in`
|
||||
res, _ = self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
# count `in`
|
||||
self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
# query `not in`
|
||||
not_in_count = self.nb - expr_count
|
||||
res, _ = self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(not_in_count, limit), f"actual: {len(res)} == expect: {min(not_in_count, limit)}"
|
||||
|
||||
# count `not in`
|
||||
self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": not_in_count}]})
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestHybridIndexDQLExpr")
|
||||
class TestHybridIndexDQLExpr(TestCaseClassBase):
|
||||
|
@ -173,13 +215,14 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
|||
|
||||
# init params
|
||||
self.primary_field, self.nb = "int64_pk", 3000
|
||||
self.all_fields = [self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name,
|
||||
DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields]
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_hybrid_index_dql_expr"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name,
|
||||
DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields],
|
||||
fields=self.all_fields,
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict,
|
||||
DataType.FLOAT16_VECTOR.name: FieldParams(dim=3).to_dict,
|
||||
|
@ -190,7 +233,10 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
|||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb, default_values={
|
||||
'VARCHAR': cf.gen_varchar_data(3, self.nb),
|
||||
'ARRAY_VARCHAR': [cf.gen_varchar_data(length=2, nb=random.randint(0, 10)) for _ in range(self.nb)]
|
||||
})
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
|
@ -280,6 +326,114 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
|||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("range_num, counts", [([-100, 200], 10), ([2000, 5000], 10), ([3000, 4000], 5)])
|
||||
@pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64'])
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_hybrid_index_query_with_int_in(self, range_num, counts, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation `in` and `not in`, calculate total number via expr
|
||||
method:
|
||||
1. prepare some data and build `Hybrid index` on scalar fields
|
||||
2. query with the different expr(in, not in) and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# random set expr list
|
||||
range_numbers = [random.randint(*range_num) for _ in range(counts)]
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if i in range_numbers])
|
||||
|
||||
# query `in`
|
||||
res, _ = self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
# count `in`
|
||||
self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
# query `not in`
|
||||
not_in_count = self.nb - expr_count
|
||||
res, _ = self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(not_in_count, limit), f"actual: {len(res)} == expect: {min(not_in_count, limit)}"
|
||||
|
||||
# count `not in`
|
||||
self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": not_in_count}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("range_num, counts", [([1, 3], 50), ([2, 5], 50), ([3, 3], 100)])
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
@pytest.mark.parametrize("expr_field", ['VARCHAR'])
|
||||
def test_hybrid_index_query_with_varchar_in(self, range_num, counts, limit, expr_field):
|
||||
"""
|
||||
target:
|
||||
1. check varchar operation `in` and `not in`, calculate total number via expr
|
||||
method:
|
||||
1. prepare some data and build `Hybrid index` on scalar fields
|
||||
2. query with the different expr(in, not in) and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# random set expr list
|
||||
range_numbers = [cf.gen_varchar_data(random.randint(*range_num), 1)[0] for _ in range(counts)]
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if i in range_numbers])
|
||||
|
||||
# query `in`
|
||||
res, _ = self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
# count `in`
|
||||
self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
# query `not in`
|
||||
not_in_count = self.nb - expr_count
|
||||
res, _ = self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(not_in_count, limit), f"actual: {len(res)} == expect: {min(not_in_count, limit)}"
|
||||
|
||||
# count `not in`
|
||||
self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": not_in_count}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("length", [0, 5, 11])
|
||||
@pytest.mark.parametrize("expr_obj", [Expr.array_length, Expr.ARRAY_LENGTH])
|
||||
@pytest.mark.parametrize("expr_field", ['ARRAY_VARCHAR'])
|
||||
def test_hybrid_index_query_array_length_count(self, length, expr_obj, expr_field):
|
||||
"""
|
||||
target:
|
||||
1. check query with count(*) via expr `array length`
|
||||
method:
|
||||
1. prepare some data and build `Hybrid index` on scalar fields
|
||||
2. query with count(*) via expr
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to insert nb
|
||||
"""
|
||||
expr = Expr.EQ(expr_obj(expr_field).value, length).value
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if len(i) == length])
|
||||
|
||||
# query count(*)
|
||||
self.collection_wrap.query(expr=expr, output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_hybrid_index_query_count(self):
|
||||
"""
|
||||
|
@ -296,6 +450,25 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
|||
self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": self.nb}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_hybrid_index_search_output_fields(self):
|
||||
"""
|
||||
target:
|
||||
1. check search output fields with Hybrid index built on scalar fields
|
||||
method:
|
||||
1. prepare some data and build `Hybrid index` on scalar fields
|
||||
2. search output fields and check result
|
||||
expected:
|
||||
1. search output fields with Hybrid index
|
||||
"""
|
||||
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR.name, 3, 1
|
||||
|
||||
self.collection_wrap.search(
|
||||
cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field, search_params, limit,
|
||||
output_fields=['*'], check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
|
||||
"limit": limit, "output_fields": self.all_fields})
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestInvertedIndexDQLExpr")
|
||||
class TestInvertedIndexDQLExpr(TestCaseClassBase):
|
||||
|
@ -312,14 +485,15 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
|
|||
self._connect(self)
|
||||
|
||||
# init params
|
||||
self.primary_field, nb = "int64_pk", 3000
|
||||
self.primary_field, self.nb = "int64_pk", 3000
|
||||
self.all_fields = [self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name,
|
||||
DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields]
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_inverted_index_dql_expr"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name,
|
||||
DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields],
|
||||
fields=self.all_fields,
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict,
|
||||
DataType.FLOAT16_VECTOR.name: FieldParams(dim=3).to_dict,
|
||||
|
@ -330,7 +504,10 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
|
|||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb, default_values={
|
||||
'VARCHAR': cf.gen_varchar_data(3, self.nb),
|
||||
'ARRAY_VARCHAR': [cf.gen_varchar_data(length=2, nb=random.randint(0, 10)) for _ in range(self.nb)]
|
||||
})
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
|
@ -420,6 +597,114 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
|
|||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("range_num, counts", [([-100, 200], 10), ([2000, 5000], 10), ([3000, 4000], 5)])
|
||||
@pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64'])
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_inverted_index_query_with_int_in(self, range_num, counts, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation `in` and `not in`, calculate total number via expr
|
||||
method:
|
||||
1. prepare some data and build `INVERTED index` on scalar fields
|
||||
2. query with the different expr(in, not in) and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# random set expr list
|
||||
range_numbers = [random.randint(*range_num) for _ in range(counts)]
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if i in range_numbers])
|
||||
|
||||
# query `in`
|
||||
res, _ = self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
# count `in`
|
||||
self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
# query `not in`
|
||||
not_in_count = self.nb - expr_count
|
||||
res, _ = self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(not_in_count, limit), f"actual: {len(res)} == expect: {min(not_in_count, limit)}"
|
||||
|
||||
# count `not in`
|
||||
self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": not_in_count}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("range_num, counts", [([1, 3], 50), ([2, 5], 50), ([3, 3], 100)])
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
@pytest.mark.parametrize("expr_field", ['VARCHAR'])
|
||||
def test_inverted_index_query_with_varchar_in(self, range_num, counts, limit, expr_field):
|
||||
"""
|
||||
target:
|
||||
1. check varchar operation `in` and `not in`, calculate total number via expr
|
||||
method:
|
||||
1. prepare some data and build `INVERTED index` on scalar fields
|
||||
2. query with the different expr(in, not in) and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# random set expr list
|
||||
range_numbers = [cf.gen_varchar_data(random.randint(*range_num), 1)[0] for _ in range(counts)]
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if i in range_numbers])
|
||||
|
||||
# query `in`
|
||||
res, _ = self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
# count `in`
|
||||
self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
# query `not in`
|
||||
not_in_count = self.nb - expr_count
|
||||
res, _ = self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(not_in_count, limit), f"actual: {len(res)} == expect: {min(not_in_count, limit)}"
|
||||
|
||||
# count `not in`
|
||||
self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": not_in_count}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("length", [0, 5, 11])
|
||||
@pytest.mark.parametrize("expr_obj", [Expr.array_length, Expr.ARRAY_LENGTH])
|
||||
@pytest.mark.parametrize("expr_field", ['ARRAY_VARCHAR'])
|
||||
def test_inverted_index_query_array_length_count(self, length, expr_obj, expr_field):
|
||||
"""
|
||||
target:
|
||||
1. check query with count(*) via expr `array length`
|
||||
method:
|
||||
1. prepare some data and build `INVERTED index` on scalar fields
|
||||
2. query with count(*) via expr
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to insert nb
|
||||
"""
|
||||
expr = Expr.EQ(expr_obj(expr_field).value, length).value
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if len(i) == length])
|
||||
|
||||
# query count(*)
|
||||
self.collection_wrap.query(expr=expr, output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestBitmapIndexDQLExpr")
|
||||
class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
||||
|
@ -437,13 +722,14 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
|
||||
# init params
|
||||
self.primary_field, self.nb = "int64_pk", 3000
|
||||
self.all_fields = [self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name,
|
||||
DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields]
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_bitmap_index_dql_expr"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name,
|
||||
DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields],
|
||||
fields=self.all_fields,
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict,
|
||||
DataType.FLOAT16_VECTOR.name: FieldParams(dim=3).to_dict,
|
||||
|
@ -454,38 +740,10 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("range_num, counts", [
|
||||
([-100, 200], 10),
|
||||
([2000, 5000], 10),
|
||||
([3000, 4000], 5),
|
||||
([0, 0], 1)])
|
||||
@pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64'])
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_bitmap_index_query_with_int_in(self, range_num, counts, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr(in) and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# random set expr list
|
||||
range_numbers = [random.randint(*range_num) for _ in range(counts)]
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_data = [i for i in self.insert_data.get(expr_field, []) if i in range_numbers]
|
||||
expr_count = len(expr_data)
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=f"{expr_field} in {range_numbers}", limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}, {expr_data}"
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb, default_values={
|
||||
'VARCHAR': cf.gen_varchar_data(3, self.nb),
|
||||
'ARRAY_VARCHAR': [cf.gen_varchar_data(length=2, nb=random.randint(0, 10)) for _ in range(self.nb)]
|
||||
})
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
|
@ -600,6 +858,114 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("range_num, counts", [([-100, 200], 10), ([2000, 5000], 10), ([3000, 4000], 5)])
|
||||
@pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64'])
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_bitmap_index_query_with_int_in(self, range_num, counts, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation `in` and `not in`, calculate total number via expr
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr(in, not in) and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# random set expr list
|
||||
range_numbers = [random.randint(*range_num) for _ in range(counts)]
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if i in range_numbers])
|
||||
|
||||
# query `in`
|
||||
res, _ = self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
# count `in`
|
||||
self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
# query `not in`
|
||||
not_in_count = self.nb - expr_count
|
||||
res, _ = self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(not_in_count, limit), f"actual: {len(res)} == expect: {min(not_in_count, limit)}"
|
||||
|
||||
# count `not in`
|
||||
self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": not_in_count}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("range_num, counts", [([1, 3], 50), ([2, 5], 50), ([3, 3], 100)])
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
@pytest.mark.parametrize("expr_field", ['VARCHAR'])
|
||||
def test_bitmap_index_query_with_varchar_in(self, range_num, counts, limit, expr_field):
|
||||
"""
|
||||
target:
|
||||
1. check varchar operation `in` and `not in`, calculate total number via expr
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr(in, not in) and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# random set expr list
|
||||
range_numbers = [cf.gen_varchar_data(random.randint(*range_num), 1)[0] for _ in range(counts)]
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if i in range_numbers])
|
||||
|
||||
# query `in`
|
||||
res, _ = self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
# count `in`
|
||||
self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
# query `not in`
|
||||
not_in_count = self.nb - expr_count
|
||||
res, _ = self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(not_in_count, limit), f"actual: {len(res)} == expect: {min(not_in_count, limit)}"
|
||||
|
||||
# count `not in`
|
||||
self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": not_in_count}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("length", [0, 5, 11])
|
||||
@pytest.mark.parametrize("expr_obj", [Expr.array_length, Expr.ARRAY_LENGTH])
|
||||
@pytest.mark.parametrize("expr_field", ['ARRAY_VARCHAR'])
|
||||
def test_bitmap_index_query_array_length_count(self, length, expr_obj, expr_field):
|
||||
"""
|
||||
target:
|
||||
1. check query with count(*) via expr `array length`
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with count(*) via expr
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to insert nb
|
||||
"""
|
||||
expr = Expr.EQ(expr_obj(expr_field).value, length).value
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if len(i) == length])
|
||||
|
||||
# query count(*)
|
||||
self.collection_wrap.query(expr=expr, output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_bitmap_index_query_count(self):
|
||||
"""
|
||||
|
@ -663,6 +1029,25 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
cf.gen_vectors(nb=1, dim=3, vector_data_type=vector_field), vector_field, search_params, batch_size,
|
||||
expr='INT16 > 15', check_task=CheckTasks.check_search_iterator, check_items={"batch_size": batch_size})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_index_search_output_fields(self):
|
||||
"""
|
||||
target:
|
||||
1. check search output fields with BITMAP index built on scalar fields
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. search output fields and check result
|
||||
expected:
|
||||
1. search output fields with BITMAP index
|
||||
"""
|
||||
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR.name, 3, 1
|
||||
|
||||
self.collection_wrap.search(
|
||||
cf.gen_vectors(nb=nq, dim=3, vector_data_type=vector_field), vector_field, search_params, limit,
|
||||
output_fields=['*'], check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
|
||||
"limit": limit, "output_fields": self.all_fields})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_index_hybrid_search(self):
|
||||
"""
|
||||
|
@ -697,7 +1082,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
]
|
||||
self.collection_wrap.hybrid_search(
|
||||
req_list, RRFRanker(), limit, check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
|
||||
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), "limit": limit})
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestBitmapIndexOffsetCacheDQL")
|
||||
|
@ -716,12 +1101,13 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase):
|
|||
|
||||
# init params
|
||||
self.primary_field, self.nb = "int64_pk", 3000
|
||||
self.all_fields = [self.primary_field, DataType.FLOAT_VECTOR.name, *self().all_scalar_fields]
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_bitmap_index_offset_cache"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[self.primary_field, DataType.FLOAT_VECTOR.name, *self().all_scalar_fields],
|
||||
fields=self.all_fields,
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict
|
||||
},
|
||||
|
@ -729,7 +1115,10 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase):
|
|||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb, default_values={
|
||||
'VARCHAR': cf.gen_varchar_data(3, self.nb),
|
||||
'ARRAY_VARCHAR': [cf.gen_varchar_data(length=2, nb=random.randint(0, 10)) for _ in range(self.nb)]
|
||||
})
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
|
@ -819,6 +1208,114 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase):
|
|||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=['*'])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("range_num, counts", [([-100, 200], 10), ([2000, 5000], 10), ([3000, 4000], 5)])
|
||||
@pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64'])
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_bitmap_offset_cache_query_with_int_in(self, range_num, counts, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation `in` and `not in`, calculate total number via expr
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr(in, not in) and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# random set expr list
|
||||
range_numbers = [random.randint(*range_num) for _ in range(counts)]
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if i in range_numbers])
|
||||
|
||||
# query `in`
|
||||
res, _ = self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
# count `in`
|
||||
self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
# query `not in`
|
||||
not_in_count = self.nb - expr_count
|
||||
res, _ = self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(not_in_count, limit), f"actual: {len(res)} == expect: {min(not_in_count, limit)}"
|
||||
|
||||
# count `not in`
|
||||
self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": not_in_count}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("range_num, counts", [([1, 3], 50), ([2, 5], 50), ([3, 3], 100)])
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
@pytest.mark.parametrize("expr_field", ['VARCHAR'])
|
||||
def test_bitmap_offset_cache_query_with_varchar_in(self, range_num, counts, limit, expr_field):
|
||||
"""
|
||||
target:
|
||||
1. check varchar operation `in` and `not in`, calculate total number via expr
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr(in, not in) and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# random set expr list
|
||||
range_numbers = [cf.gen_varchar_data(random.randint(*range_num), 1)[0] for _ in range(counts)]
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if i in range_numbers])
|
||||
|
||||
# query `in`
|
||||
res, _ = self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
# count `in`
|
||||
self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
# query `not in`
|
||||
not_in_count = self.nb - expr_count
|
||||
res, _ = self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(not_in_count, limit), f"actual: {len(res)} == expect: {min(not_in_count, limit)}"
|
||||
|
||||
# count `not in`
|
||||
self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": not_in_count}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("length", [0, 5, 11])
|
||||
@pytest.mark.parametrize("expr_obj", [Expr.array_length, Expr.ARRAY_LENGTH])
|
||||
@pytest.mark.parametrize("expr_field", ['ARRAY_VARCHAR'])
|
||||
def test_bitmap_offset_cache_query_array_length_count(self, length, expr_obj, expr_field):
|
||||
"""
|
||||
target:
|
||||
1. check query with count(*) via expr `array length`
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with count(*) via expr
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to insert nb
|
||||
"""
|
||||
expr = Expr.EQ(expr_obj(expr_field).value, length).value
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if len(i) == length])
|
||||
|
||||
# query count(*)
|
||||
self.collection_wrap.query(expr=expr, output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_offset_cache_query_count(self):
|
||||
"""
|
||||
|
@ -835,6 +1332,25 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase):
|
|||
self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": self.nb}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_offset_cache_search_output_fields(self):
|
||||
"""
|
||||
target:
|
||||
1. check search output fields with BITMAP index built on scalar fields
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. search output fields and check result
|
||||
expected:
|
||||
1. search output fields with BITMAP index
|
||||
"""
|
||||
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR.name, 3, 1
|
||||
|
||||
self.collection_wrap.search(
|
||||
cf.gen_vectors(nb=nq, dim=ct.default_dim, vector_data_type=vector_field),
|
||||
vector_field, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
|
||||
"limit": limit, "output_fields": self.all_fields})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_offset_cache_hybrid_search(self):
|
||||
"""
|
||||
|
@ -863,7 +1379,7 @@ class TestBitmapIndexOffsetCache(TestCaseClassBase):
|
|||
]
|
||||
self.collection_wrap.hybrid_search(
|
||||
req_list, RRFRanker(), limit, check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
|
||||
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), "limit": limit})
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestBitmapIndexOffsetCacheDQL")
|
||||
|
@ -882,12 +1398,13 @@ class TestBitmapIndexMmap(TestCaseClassBase):
|
|||
|
||||
# init params
|
||||
self.primary_field, self.nb = "int64_pk", 3000
|
||||
self.all_fields = [self.primary_field, DataType.FLOAT_VECTOR.name, *self().all_scalar_fields]
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_bitmap_index_bitmap"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[self.primary_field, DataType.FLOAT_VECTOR.name, *self().all_scalar_fields],
|
||||
fields=self.all_fields,
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict
|
||||
},
|
||||
|
@ -985,6 +1502,48 @@ class TestBitmapIndexMmap(TestCaseClassBase):
|
|||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("range_num, counts", [([-100, 200], 10), ([2000, 5000], 10), ([3000, 4000], 5)])
|
||||
@pytest.mark.parametrize("expr_field", ['INT8', 'INT16', 'INT32', 'INT64'])
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_bitmap_mmap_query_with_int_in(self, range_num, counts, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation `in` and `not in`, calculate total number via expr
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr(in, not in) and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# random set expr list
|
||||
range_numbers = [random.randint(*range_num) for _ in range(counts)]
|
||||
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if i in range_numbers])
|
||||
|
||||
# query `in`
|
||||
res, _ = self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
# count `in`
|
||||
self.collection_wrap.query(expr=Expr.In(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": expr_count}]})
|
||||
|
||||
# query `not in`
|
||||
not_in_count = self.nb - expr_count
|
||||
res, _ = self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, limit=limit,
|
||||
output_fields=[expr_field])
|
||||
assert len(res) == min(not_in_count, limit), f"actual: {len(res)} == expect: {min(not_in_count, limit)}"
|
||||
|
||||
# count `not in`
|
||||
self.collection_wrap.query(expr=Expr.Nin(expr_field, range_numbers).value, output_fields=['count(*)'],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": not_in_count}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_mmap_query_count(self):
|
||||
"""
|
||||
|
@ -1001,6 +1560,25 @@ class TestBitmapIndexMmap(TestCaseClassBase):
|
|||
self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": self.nb}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_mmap_search_output_fields(self):
|
||||
"""
|
||||
target:
|
||||
1. check search output fields with BITMAP index built on scalar fields
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. search output fields and check result
|
||||
expected:
|
||||
1. search output fields with BITMAP index
|
||||
"""
|
||||
search_params, vector_field, limit, nq = {"metric_type": "L2", "ef": 32}, DataType.FLOAT_VECTOR.name, 3, 1
|
||||
|
||||
self.collection_wrap.search(
|
||||
cf.gen_vectors(nb=nq, dim=ct.default_dim, vector_data_type=vector_field),
|
||||
vector_field, search_params, limit, output_fields=['*'], check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field),
|
||||
"limit": limit, "output_fields": self.all_fields})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_mmap_hybrid_search(self):
|
||||
"""
|
||||
|
@ -1029,7 +1607,7 @@ class TestBitmapIndexMmap(TestCaseClassBase):
|
|||
]
|
||||
self.collection_wrap.hybrid_search(
|
||||
req_list, RRFRanker(), limit, check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
|
||||
check_items={"nq": nq, "ids": self.insert_data.get(self.primary_field), "limit": limit})
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestIndexUnicodeString")
|
||||
|
|
Loading…
Reference in New Issue