test: add more bitmap test cases (#36131)

1. verified issues #36054 and #35971 
2. add mix scenes test cases for BITMAP index

Signed-off-by: wangting0128 <ting.wang@zilliz.com>
pull/36139/head
wt 2024-09-10 10:55:07 +08:00 committed by GitHub
parent 5aedc169cd
commit 53a87825f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 672 additions and 29 deletions

View File

@ -38,6 +38,7 @@ class IndexErrorMessage(ExceptionsMessage):
VectorMetricTypeExist = "metric type not set for vector index"
CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field"
CheckBitmapOnPK = "create bitmap index on primary key not supported"
CheckBitmapCardinality = "failed to check bitmap cardinality limit, should be larger than 0 and smaller than 1000"
class QueryErrorMessage(ExceptionsMessage):

View File

@ -377,3 +377,8 @@ class DefaultScalarIndexParams:
@staticmethod
def list_bitmap(fields: List[str]) -> Dict[str, IndexPrams]:
return {n: IndexPrams(index_type=IndexName.BITMAP) for n in fields}
class AlterIndexParams:
IndexOffsetCache = {'indexoffsetcache.enabled': True}
IndexMmap = {'mmap.enabled': True}

View File

@ -14,7 +14,7 @@ from common.common_type import CaseLabel, CheckTasks
from common.code_mapping import CollectionErrorMessage as clem
from common.code_mapping import IndexErrorMessage as iem
from common.common_params import (
IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType
IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType, AlterIndexParams
)
from utils.util_pymilvus import *
@ -2299,7 +2299,8 @@ class TestInvertedIndexValid(TestcaseBase):
def test_binary_arith_expr_on_inverted_index(self):
prefix = "test_binary_arith_expr_on_inverted_index"
nb = 5000
collection_w, _, _, insert_ids, _ = self.init_collection_general(prefix, insert_data=True, is_index=True, is_all_data_type=True)
collection_w, _, _, insert_ids, _ = self.init_collection_general(prefix, insert_data=True, is_index=True,
is_all_data_type=True)
index_name = "test_binary_arith_expr_on_inverted_index"
scalar_index_params = {"index_type": "INVERTED"}
collection_w.release()
@ -2723,3 +2724,144 @@ class TestBitmapIndex(TestcaseBase):
# check segment row number
counts = [int(n.num_rows) for n in segment_info]
assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
@pytest.mark.tags(CaseLabel.L2)
def test_bitmap_offset_cache_enable(self, request):
"""
target:
1. alter index `{indexoffsetcache.enabled: true}` and rebuild index again
method:
1. create a collection with scalar fields
2. build BITMAP index on scalar fields
3. altering index `indexoffsetcache` enable
4. insert some data and flush
5. rebuild indexes with the same params again
6. load collection
expected:
1. alter index not failed
2. rebuild index not failed
3. load not failed
"""
# init params
collection_name, primary_field, nb = f"{request.function.__name__}", "int64_pk", 3000
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# build `BITMAP` index on empty collection
index_params = {
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# enable offset cache
for index_name in self.get_bitmap_support_dtype_names:
self.collection_wrap.alter_index(index_name=index_name, extra_params=AlterIndexParams.IndexOffsetCache)
# prepare data (> 1024 triggering index building)
self.collection_wrap.insert(data=cf.gen_values(self.collection_wrap.schema, nb=nb),
check_task=CheckTasks.check_insert_result)
# flush collection, segment sealed
self.collection_wrap.flush()
# rebuild `BITMAP` index
index_params = {
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# load collection
self.collection_wrap.load()
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("bitmap_cardinality_limit", [-10, 0, 1001])
def test_bitmap_cardinality_limit_invalid(self, request, bitmap_cardinality_limit):
"""
target:
1. check auto index setting `bitmap_cardinality_limit` param
method:
1. create a collection with scalar fields
4. build scalar index with `bitmap_cardinality_limit`
expected:
1. build index failed
"""
# init params
collection_name = f"{request.function.__name__}_{str(bitmap_cardinality_limit).replace('-', '_')}"
primary_field, nb = "int64_pk", 3000
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, DataType.INT64.name],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# build scalar index and check failed
self.collection_wrap.create_index(
field_name=DataType.INT64.name, index_name=DataType.INT64.name,
index_params={"index_type": IndexName.AUTOINDEX, "bitmap_cardinality_limit": bitmap_cardinality_limit},
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapCardinality})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("bitmap_cardinality_limit", [1, 1000])
def test_bitmap_cardinality_limit_enable(self, request, bitmap_cardinality_limit):
"""
target:
1. check auto index setting `bitmap_cardinality_limit` not failed
method:
1. create a collection with scalar fields
2. insert some data and flush
3. build vector index
4. build scalar index with `bitmap_cardinality_limit`
expected:
1. alter index not failed
2. rebuild index not failed
3. load not failed
Notice:
This parameter setting does not automatically check whether the result meets expectations,
but is only used to verify that the index is successfully built.
"""
# init params
collection_name, primary_field, nb = f"{request.function.__name__}_{bitmap_cardinality_limit}", "int64_pk", 3000
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# prepare data (> 1024 triggering index building)
self.collection_wrap.insert(data=cf.gen_values(self.collection_wrap.schema, nb=nb),
check_task=CheckTasks.check_insert_result)
# flush collection, segment sealed
self.collection_wrap.flush()
# build vector index
self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name))
# build scalar index
for scalar_field in self.get_bitmap_support_dtype_names:
self.collection_wrap.create_index(
field_name=scalar_field, index_name=scalar_field,
index_params={"index_type": IndexName.AUTOINDEX, "bitmap_cardinality_limit": bitmap_cardinality_limit})
# load collection
self.collection_wrap.load()

View File

@ -1,13 +1,14 @@
import re
import math # do not remove `math`
import pytest
from pymilvus import DataType
from pymilvus import DataType, AnnSearchRequest, RRFRanker
from common.common_type import CaseLabel, CheckTasks
from common import common_type as ct
from common import common_func as cf
from common.code_mapping import QueryErrorMessage as qem
from common.common_params import (
IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType, Expr
FieldParams, MetricType, DefaultVectorIndexParams, DefaultScalarIndexParams, Expr, AlterIndexParams
)
from base.client_base import TestcaseBase, TestCaseClassBase
@ -54,7 +55,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
# flush collection, segment sealed
self.collection_wrap.flush()
# build `Hybrid index` on empty collection
# build vectors index
index_params = {
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name),
**DefaultVectorIndexParams.IVF_FLAT(DataType.BFLOAT16_VECTOR.name),
@ -67,7 +68,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
# load collection
self.collection_wrap.load()
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expr, output_fields", [
(Expr.In(Expr.MOD('INT8', 13).subset, [0, 1, 2]).value, ['INT8']),
(Expr.Nin(Expr.MOD('INT16', 100).subset, [10, 20, 30, 40]).value, ['INT16']),
@ -86,7 +87,6 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
self.collection_wrap.query(expr=expr, check_task=CheckTasks.err_res,
check_items={ct.err_code: 1100, ct.err_msg: qem.ParseExpressionFailed})
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize(
"expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64']))
@ -103,13 +103,14 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
1. query response equal to min(insert data, limit)
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
@pytest.mark.parametrize("limit", [1, 10, 3000])
def test_no_index_query_with_string(self, expr, expr_field, limit, rex):
@ -130,7 +131,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize(
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
@pytest.mark.parametrize("limit", [1, 10, 3000])
@ -168,7 +169,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
self._connect(self)
# init params
self.primary_field, nb = "int64_pk", 3000
self.primary_field, self.nb = "int64_pk", 3000
# create a collection with fields
self.collection_wrap.init_collection(
@ -186,7 +187,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
)
# prepare data (> 1024 triggering index building)
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
@pytest.fixture(scope="class", autouse=True)
def prepare_data(self):
@ -195,7 +196,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
# flush collection, segment sealed
self.collection_wrap.flush()
# build `Hybrid index` on empty collection
# build `Hybrid index`
index_params = {
**DefaultVectorIndexParams.DISKANN(DataType.FLOAT16_VECTOR.name),
**DefaultVectorIndexParams.IVF_SQ8(DataType.BFLOAT16_VECTOR.name),
@ -210,7 +211,6 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
# load collection
self.collection_wrap.load()
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize(
"expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64']))
@ -227,13 +227,14 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
1. query response equal to min(insert data, limit)
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
@pytest.mark.parametrize("limit", [1, 10, 3000])
def test_hybrid_index_query_with_string(self, expr, expr_field, limit, rex):
@ -254,7 +255,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize(
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
@pytest.mark.parametrize("limit", [1, 10, 3000])
@ -276,6 +277,22 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L1)
def test_hybrid_index_query_count(self):
"""
target:
1. check query with count(*)
method:
1. prepare some data and build `Hybrid index` on scalar fields
2. query with count(*)
3. check query result
expected:
1. query response equal to insert nb
"""
# query count(*)
self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
check_items={"exp_res": [{"count(*)": self.nb}]})
@pytest.mark.xdist_group("TestInvertedIndexDQLExpr")
class TestInvertedIndexDQLExpr(TestCaseClassBase):
@ -319,7 +336,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
# flush collection, segment sealed
self.collection_wrap.flush()
# build `Hybrid index` on empty collection
# build `INVERTED index`
index_params = {
**DefaultVectorIndexParams.IVF_FLAT(DataType.FLOAT16_VECTOR.name),
**DefaultVectorIndexParams.HNSW(DataType.BFLOAT16_VECTOR.name),
@ -334,7 +351,6 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
# load collection
self.collection_wrap.load()
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize(
"expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64']))
@ -351,13 +367,14 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
1. query response equal to min(insert data, limit)
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
@pytest.mark.parametrize("limit", [1, 10, 3000])
def test_inverted_index_query_with_string(self, expr, expr_field, limit, rex):
@ -378,7 +395,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize(
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
@pytest.mark.parametrize("limit", [1, 10, 3000])
@ -416,7 +433,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
self._connect(self)
# init params
self.primary_field, nb = "int64_pk", 3000
self.primary_field, self.nb = "int64_pk", 3000
# create a collection with fields
self.collection_wrap.init_collection(
@ -434,7 +451,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
)
# prepare data (> 1024 triggering index building)
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
@pytest.fixture(scope="class", autouse=True)
def prepare_data(self):
@ -443,7 +460,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
# flush collection, segment sealed
self.collection_wrap.flush()
# build `Hybrid index` on empty collection
# build `BITMAP index`
index_params = {
**DefaultVectorIndexParams.HNSW(DataType.FLOAT16_VECTOR.name),
**DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name),
@ -458,7 +475,6 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
# load collection
self.collection_wrap.load()
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64']))
@pytest.mark.parametrize("limit", [1, 10, 3000])
@ -474,13 +490,14 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
1. query response equal to min(insert data, limit)
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
@pytest.mark.parametrize("limit", [1, 10, 3000])
def test_bitmap_index_query_with_string(self, expr, expr_field, limit, rex):
@ -501,7 +518,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize(
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
@pytest.mark.parametrize("limit", [1, 10, 3000])
@ -522,3 +539,481 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L1)
def test_bitmap_index_query_count(self):
"""
target:
1. check query with count(*)
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. query with count(*)
3. check query result
expected:
1. query response equal to insert nb
"""
# query count(*)
self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
check_items={"exp_res": [{"count(*)": self.nb}]})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("batch_size", [10, 1000])
def test_bitmap_index_search_iterator(self, batch_size):
"""
target:
1. check search iterator with BITMAP index built on scalar fields
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. search iterator and check result
expected:
1. search iterator with BITMAP index
"""
search_params, vector_field = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR.name
self.collection_wrap.search_iterator(
cf.gen_vectors(nb=1, dim=3, vector_data_type=vector_field), vector_field, search_params, batch_size,
expr='int64_pk > 15', check_task=CheckTasks.check_search_iterator, check_items={"batch_size": batch_size})
@pytest.mark.tags(CaseLabel.L2)
def test_bitmap_index_hybrid_search(self):
"""
target:
1. check hybrid search with expr
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. hybrid search with expr
expected:
1. hybrid search with expr
"""
nq, limit = 10, 10
vectors = cf.gen_field_values(self.collection_wrap.schema, nb=nq)
req_list = [
AnnSearchRequest(
data=vectors.get(DataType.FLOAT16_VECTOR.name), anns_field=DataType.FLOAT16_VECTOR.name,
param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
expr=Expr.In('INT64', [i for i in range(10, 30)]).value
),
AnnSearchRequest(
data=vectors.get(DataType.BFLOAT16_VECTOR.name), anns_field=DataType.BFLOAT16_VECTOR.name,
param={"metric_type": MetricType.L2, "search_list": 30}, limit=limit,
expr=Expr.OR(Expr.GT(Expr.SUB('INT8', 30).subset, 10), Expr.LIKE('VARCHAR', 'a%')).value
),
AnnSearchRequest(
data=vectors.get(DataType.SPARSE_FLOAT_VECTOR.name), anns_field=DataType.SPARSE_FLOAT_VECTOR.name,
param={"metric_type": MetricType.IP, "drop_ratio_search": 0.2}, limit=limit),
AnnSearchRequest(
data=vectors.get(DataType.BINARY_VECTOR.name), anns_field=DataType.BINARY_VECTOR.name,
param={"metric_type": MetricType.JACCARD, "nprobe": 128}, limit=limit)
]
self.collection_wrap.hybrid_search(
req_list, RRFRanker(), limit, check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
@pytest.mark.xdist_group("TestBitmapIndexOffsetCacheDQL")
class TestBitmapIndexOffsetCache(TestCaseClassBase):
"""
Scalar fields build BITMAP index, and altering index indexoffsetcache
Author: Ting.Wang
"""
def setup_class(self):
super().setup_class(self)
# connect to server before testing
self._connect(self)
# init params
self.primary_field, self.nb = "int64_pk", 3000
# create a collection with fields
self.collection_wrap.init_collection(
name=cf.gen_unique_str("test_bitmap_index_dql_expr"),
schema=cf.set_collection_schema(
fields=[self.primary_field, DataType.FLOAT_VECTOR.name, *self().all_scalar_fields],
field_params={
self.primary_field: FieldParams(is_primary=True).to_dict
},
)
)
# prepare data (> 1024 triggering index building)
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
@pytest.fixture(scope="class", autouse=True)
def prepare_data(self):
self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result)
# flush collection, segment sealed
self.collection_wrap.flush()
# build `BITMAP index`
index_params = {
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
# build BITMAP index
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# enable offset cache
for index_name in self.bitmap_support_dtype_names:
self.collection_wrap.alter_index(index_name=index_name, extra_params=AlterIndexParams.IndexOffsetCache)
# load collection
self.collection_wrap.load()
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64']))
@pytest.mark.parametrize("limit", [1, 10])
def test_bitmap_offset_cache_query_with_modulo(self, expr, expr_field, limit):
"""
target:
1. check modulo expression
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. query with the different expr and limit
3. check query result
expected:
1. query response equal to min(insert data, limit)
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=['*'])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
@pytest.mark.parametrize("limit", [1, 10])
def test_bitmap_offset_cache_query_with_string(self, expr, expr_field, limit, rex):
"""
target:
1. check string expression
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. query with the different expr and limit
3. check query result
expected:
1. query response equal to min(insert data, limit)
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=['*'])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize(
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
@pytest.mark.parametrize("limit", [1, 10])
def test_bitmap_offset_cache_query_with_operation(self, expr, expr_field, limit):
"""
target:
1. check number operation
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. query with the different expr and limit
3. check query result
expected:
1. query response equal to min(insert data, limit)
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=['*'])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L2)
def test_bitmap_offset_cache_query_count(self):
"""
target:
1. check query with count(*)
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. query with count(*)
3. check query result
expected:
1. query response equal to insert nb
"""
# query count(*)
self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
check_items={"exp_res": [{"count(*)": self.nb}]})
@pytest.mark.tags(CaseLabel.L2)
def test_bitmap_offset_cache_hybrid_search(self):
"""
target:
1. check hybrid search with expr
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. hybrid search with expr
expected:
1. hybrid search with expr
"""
nq, limit = 10, 10
vectors = cf.gen_field_values(self.collection_wrap.schema, nb=nq)
req_list = [
AnnSearchRequest(
data=vectors.get(DataType.FLOAT_VECTOR.name), anns_field=DataType.FLOAT_VECTOR.name,
param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
expr=Expr.In('INT64', [i for i in range(10, 30)]).value
),
AnnSearchRequest(
data=vectors.get(DataType.FLOAT_VECTOR.name), anns_field=DataType.FLOAT_VECTOR.name,
param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
expr=Expr.OR(Expr.GT(Expr.SUB('INT8', 30).subset, 10), Expr.LIKE('VARCHAR', 'a%')).value
)
]
self.collection_wrap.hybrid_search(
req_list, RRFRanker(), limit, check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
@pytest.mark.xdist_group("TestBitmapIndexOffsetCacheDQL")
class TestBitmapIndexMmap(TestCaseClassBase):
"""
Scalar fields build BITMAP index, and altering index Mmap
Author: Ting.Wang
"""
def setup_class(self):
super().setup_class(self)
# connect to server before testing
self._connect(self)
# init params
self.primary_field, self.nb = "int64_pk", 3000
# create a collection with fields
self.collection_wrap.init_collection(
name=cf.gen_unique_str("test_bitmap_index_dql_expr"),
schema=cf.set_collection_schema(
fields=[self.primary_field, DataType.FLOAT_VECTOR.name, *self().all_scalar_fields],
field_params={
self.primary_field: FieldParams(is_primary=True).to_dict
},
)
)
# prepare data (> 1024 triggering index building)
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
@pytest.fixture(scope="class", autouse=True)
def prepare_data(self):
self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result)
# flush collection, segment sealed
self.collection_wrap.flush()
# build `BITMAP index`
index_params = {
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
# build BITMAP index
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
}
self.build_multi_index(index_params=index_params)
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
# enable offset cache
for index_name in self.bitmap_support_dtype_names:
self.collection_wrap.alter_index(index_name=index_name, extra_params=AlterIndexParams.IndexMmap)
# load collection
self.collection_wrap.load()
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64']))
@pytest.mark.parametrize("limit", [1, 10])
def test_bitmap_mmap_query_with_modulo(self, expr, expr_field, limit):
"""
target:
1. check modulo expression
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. query with the different expr and limit
3. check query result
expected:
1. query response equal to min(insert data, limit)
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
@pytest.mark.parametrize("limit", [1, 10])
def test_bitmap_mmap_query_with_string(self, expr, expr_field, limit, rex):
"""
target:
1. check string expression
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. query with the different expr and limit
3. check query result
expected:
1. query response equal to min(insert data, limit)
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize(
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
@pytest.mark.parametrize("limit", [1, 10])
def test_bitmap_mmap_query_with_operation(self, expr, expr_field, limit):
"""
target:
1. check number operation
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. query with the different expr and limit
3. check query result
expected:
1. query response equal to min(insert data, limit)
"""
# the total number of inserted data that matches the expression
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
# query
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
@pytest.mark.tags(CaseLabel.L2)
def test_bitmap_mmap_query_count(self):
"""
target:
1. check query with count(*)
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. query with count(*)
3. check query result
expected:
1. query response equal to insert nb
"""
# query count(*)
self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
check_items={"exp_res": [{"count(*)": self.nb}]})
@pytest.mark.tags(CaseLabel.L2)
def test_bitmap_mmap_hybrid_search(self):
"""
target:
1. check hybrid search with expr
method:
1. prepare some data and build `BITMAP index` on scalar fields
2. hybrid search with expr
expected:
1. hybrid search with expr
"""
nq, limit = 10, 10
vectors = cf.gen_field_values(self.collection_wrap.schema, nb=nq)
req_list = [
AnnSearchRequest(
data=vectors.get(DataType.FLOAT_VECTOR.name), anns_field=DataType.FLOAT_VECTOR.name,
param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
expr=Expr.In('INT64', [i for i in range(10, 30)]).value
),
AnnSearchRequest(
data=vectors.get(DataType.FLOAT_VECTOR.name), anns_field=DataType.FLOAT_VECTOR.name,
param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
expr=Expr.OR(Expr.GT(Expr.SUB('INT8', 30).subset, 10), Expr.LIKE('VARCHAR', 'a%')).value
)
]
self.collection_wrap.hybrid_search(
req_list, RRFRanker(), limit, check_task=CheckTasks.check_search_results,
check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
class TestMixScenes(TestcaseBase):
"""
Testing cross-combination scenarios
Author: Ting.Wang
"""
@pytest.mark.tags(CaseLabel.L2)
def test_bitmap_upsert_and_delete(self, request):
"""
target:
1. upsert data and query returns the updated data
method:
1. create a collection with scalar fields
2. insert some data and build BITMAP index
3. query the data of the specified primary key value
4. upsert the specified primary key value
5. re-query and check data equal to the updated data
6. delete the specified primary key value
7. re-query and check result is []
expected:
1. check whether the upsert and delete data is effective
"""
# init params
collection_name, primary_field, nb = f"{request.function.__name__}", "int64_pk", 3000
# scalar fields
scalar_fields, expr = [DataType.INT64.name, f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}"], 'int64_pk == 10'
# connect to server before testing
self._connect()
# create a collection with fields that can build `BITMAP` index
self.collection_wrap.init_collection(
name=collection_name,
schema=cf.set_collection_schema(
fields=[primary_field, DataType.FLOAT_VECTOR.name, *scalar_fields],
field_params={primary_field: FieldParams(is_primary=True).to_dict},
)
)
# prepare data (> 1024 triggering index building)
insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
self.collection_wrap.insert(data=list(insert_data.values()), check_task=CheckTasks.check_insert_result)
# flush collection, segment sealed
self.collection_wrap.flush()
# rebuild `BITMAP` index
self.build_multi_index(index_params={
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
**DefaultScalarIndexParams.list_bitmap(scalar_fields)
})
# load collection
self.collection_wrap.load()
# query before upsert
expected_res = [{k: v[10] for k, v in insert_data.items() if k != DataType.FLOAT_VECTOR.name}]
self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results,
check_items={"exp_res": expected_res, "primary_field": primary_field})
# upsert int64_pk = 10
upsert_data = cf.gen_field_values(self.collection_wrap.schema, nb=1,
default_values={primary_field: [10]}, start_id=10)
self.collection_wrap.upsert(data=list(upsert_data.values()))
# re-query
expected_upsert_res = [{k: v[0] for k, v in upsert_data.items() if k != DataType.FLOAT_VECTOR.name}]
self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results,
check_items={"exp_res": expected_upsert_res, "primary_field": primary_field})
# delete int64_pk = 10
self.collection_wrap.delete(expr=expr)
# re-query
self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results,
check_items={"exp_res": []})