mirror of https://github.com/milvus-io/milvus.git
test: add more bitmap test cases (#36131)
1. verified issues #36054 and #35971 2. add mix scenes test cases for BITMAP index Signed-off-by: wangting0128 <ting.wang@zilliz.com>pull/36139/head
parent
5aedc169cd
commit
53a87825f3
|
@ -38,6 +38,7 @@ class IndexErrorMessage(ExceptionsMessage):
|
|||
VectorMetricTypeExist = "metric type not set for vector index"
|
||||
CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field"
|
||||
CheckBitmapOnPK = "create bitmap index on primary key not supported"
|
||||
CheckBitmapCardinality = "failed to check bitmap cardinality limit, should be larger than 0 and smaller than 1000"
|
||||
|
||||
|
||||
class QueryErrorMessage(ExceptionsMessage):
|
||||
|
|
|
@ -377,3 +377,8 @@ class DefaultScalarIndexParams:
|
|||
@staticmethod
|
||||
def list_bitmap(fields: List[str]) -> Dict[str, IndexPrams]:
|
||||
return {n: IndexPrams(index_type=IndexName.BITMAP) for n in fields}
|
||||
|
||||
|
||||
class AlterIndexParams:
|
||||
IndexOffsetCache = {'indexoffsetcache.enabled': True}
|
||||
IndexMmap = {'mmap.enabled': True}
|
||||
|
|
|
@ -14,7 +14,7 @@ from common.common_type import CaseLabel, CheckTasks
|
|||
from common.code_mapping import CollectionErrorMessage as clem
|
||||
from common.code_mapping import IndexErrorMessage as iem
|
||||
from common.common_params import (
|
||||
IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType
|
||||
IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType, AlterIndexParams
|
||||
)
|
||||
|
||||
from utils.util_pymilvus import *
|
||||
|
@ -2299,7 +2299,8 @@ class TestInvertedIndexValid(TestcaseBase):
|
|||
def test_binary_arith_expr_on_inverted_index(self):
|
||||
prefix = "test_binary_arith_expr_on_inverted_index"
|
||||
nb = 5000
|
||||
collection_w, _, _, insert_ids, _ = self.init_collection_general(prefix, insert_data=True, is_index=True, is_all_data_type=True)
|
||||
collection_w, _, _, insert_ids, _ = self.init_collection_general(prefix, insert_data=True, is_index=True,
|
||||
is_all_data_type=True)
|
||||
index_name = "test_binary_arith_expr_on_inverted_index"
|
||||
scalar_index_params = {"index_type": "INVERTED"}
|
||||
collection_w.release()
|
||||
|
@ -2723,3 +2724,144 @@ class TestBitmapIndex(TestcaseBase):
|
|||
# check segment row number
|
||||
counts = [int(n.num_rows) for n in segment_info]
|
||||
assert sum(counts) == nb, f"`{collection_name}` Segment row count:{sum(counts)} != insert:{nb}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_offset_cache_enable(self, request):
|
||||
"""
|
||||
target:
|
||||
1. alter index `{indexoffsetcache.enabled: true}` and rebuild index again
|
||||
method:
|
||||
1. create a collection with scalar fields
|
||||
2. build BITMAP index on scalar fields
|
||||
3. altering index `indexoffsetcache` enable
|
||||
4. insert some data and flush
|
||||
5. rebuild indexes with the same params again
|
||||
6. load collection
|
||||
expected:
|
||||
1. alter index not failed
|
||||
2. rebuild index not failed
|
||||
3. load not failed
|
||||
"""
|
||||
# init params
|
||||
collection_name, primary_field, nb = f"{request.function.__name__}", "int64_pk", 3000
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
)
|
||||
)
|
||||
|
||||
# build `BITMAP` index on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# enable offset cache
|
||||
for index_name in self.get_bitmap_support_dtype_names:
|
||||
self.collection_wrap.alter_index(index_name=index_name, extra_params=AlterIndexParams.IndexOffsetCache)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.collection_wrap.insert(data=cf.gen_values(self.collection_wrap.schema, nb=nb),
|
||||
check_task=CheckTasks.check_insert_result)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# rebuild `BITMAP` index
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("bitmap_cardinality_limit", [-10, 0, 1001])
|
||||
def test_bitmap_cardinality_limit_invalid(self, request, bitmap_cardinality_limit):
|
||||
"""
|
||||
target:
|
||||
1. check auto index setting `bitmap_cardinality_limit` param
|
||||
method:
|
||||
1. create a collection with scalar fields
|
||||
4. build scalar index with `bitmap_cardinality_limit`
|
||||
expected:
|
||||
1. build index failed
|
||||
"""
|
||||
# init params
|
||||
collection_name = f"{request.function.__name__}_{str(bitmap_cardinality_limit).replace('-', '_')}"
|
||||
primary_field, nb = "int64_pk", 3000
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, DataType.INT64.name],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
)
|
||||
)
|
||||
|
||||
# build scalar index and check failed
|
||||
self.collection_wrap.create_index(
|
||||
field_name=DataType.INT64.name, index_name=DataType.INT64.name,
|
||||
index_params={"index_type": IndexName.AUTOINDEX, "bitmap_cardinality_limit": bitmap_cardinality_limit},
|
||||
check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapCardinality})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("bitmap_cardinality_limit", [1, 1000])
|
||||
def test_bitmap_cardinality_limit_enable(self, request, bitmap_cardinality_limit):
|
||||
"""
|
||||
target:
|
||||
1. check auto index setting `bitmap_cardinality_limit` not failed
|
||||
method:
|
||||
1. create a collection with scalar fields
|
||||
2. insert some data and flush
|
||||
3. build vector index
|
||||
4. build scalar index with `bitmap_cardinality_limit`
|
||||
expected:
|
||||
1. alter index not failed
|
||||
2. rebuild index not failed
|
||||
3. load not failed
|
||||
|
||||
Notice:
|
||||
This parameter setting does not automatically check whether the result meets expectations,
|
||||
but is only used to verify that the index is successfully built.
|
||||
"""
|
||||
# init params
|
||||
collection_name, primary_field, nb = f"{request.function.__name__}_{bitmap_cardinality_limit}", "int64_pk", 3000
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
)
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.collection_wrap.insert(data=cf.gen_values(self.collection_wrap.schema, nb=nb),
|
||||
check_task=CheckTasks.check_insert_result)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build vector index
|
||||
self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name))
|
||||
|
||||
# build scalar index
|
||||
for scalar_field in self.get_bitmap_support_dtype_names:
|
||||
self.collection_wrap.create_index(
|
||||
field_name=scalar_field, index_name=scalar_field,
|
||||
index_params={"index_type": IndexName.AUTOINDEX, "bitmap_cardinality_limit": bitmap_cardinality_limit})
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
import re
|
||||
import math # do not remove `math`
|
||||
import pytest
|
||||
from pymilvus import DataType
|
||||
from pymilvus import DataType, AnnSearchRequest, RRFRanker
|
||||
|
||||
from common.common_type import CaseLabel, CheckTasks
|
||||
from common import common_type as ct
|
||||
from common import common_func as cf
|
||||
from common.code_mapping import QueryErrorMessage as qem
|
||||
from common.common_params import (
|
||||
IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType, Expr
|
||||
FieldParams, MetricType, DefaultVectorIndexParams, DefaultScalarIndexParams, Expr, AlterIndexParams
|
||||
)
|
||||
from base.client_base import TestcaseBase, TestCaseClassBase
|
||||
|
||||
|
@ -54,7 +55,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
|
|||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `Hybrid index` on empty collection
|
||||
# build vectors index
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.IVF_FLAT(DataType.BFLOAT16_VECTOR.name),
|
||||
|
@ -67,7 +68,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
|
|||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expr, output_fields", [
|
||||
(Expr.In(Expr.MOD('INT8', 13).subset, [0, 1, 2]).value, ['INT8']),
|
||||
(Expr.Nin(Expr.MOD('INT16', 100).subset, [10, 20, 30, 40]).value, ['INT16']),
|
||||
|
@ -86,7 +87,6 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
|
|||
self.collection_wrap.query(expr=expr, check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1100, ct.err_msg: qem.ParseExpressionFailed})
|
||||
|
||||
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64']))
|
||||
|
@ -103,13 +103,14 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
|
|||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
|
||||
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_no_index_query_with_string(self, expr, expr_field, limit, rex):
|
||||
|
@ -130,7 +131,7 @@ class TestNoIndexDQLExpr(TestCaseClassBase):
|
|||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
|
@ -168,7 +169,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
|||
self._connect(self)
|
||||
|
||||
# init params
|
||||
self.primary_field, nb = "int64_pk", 3000
|
||||
self.primary_field, self.nb = "int64_pk", 3000
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
|
@ -186,7 +187,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
|||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
|
@ -195,7 +196,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
|||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `Hybrid index` on empty collection
|
||||
# build `Hybrid index`
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.DISKANN(DataType.FLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.IVF_SQ8(DataType.BFLOAT16_VECTOR.name),
|
||||
|
@ -210,7 +211,6 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
|||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64']))
|
||||
|
@ -227,13 +227,14 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
|||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
|
||||
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_hybrid_index_query_with_string(self, expr, expr_field, limit, rex):
|
||||
|
@ -254,7 +255,7 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
|||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
|
@ -276,6 +277,22 @@ class TestHybridIndexDQLExpr(TestCaseClassBase):
|
|||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_hybrid_index_query_count(self):
|
||||
"""
|
||||
target:
|
||||
1. check query with count(*)
|
||||
method:
|
||||
1. prepare some data and build `Hybrid index` on scalar fields
|
||||
2. query with count(*)
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to insert nb
|
||||
"""
|
||||
# query count(*)
|
||||
self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": self.nb}]})
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestInvertedIndexDQLExpr")
|
||||
class TestInvertedIndexDQLExpr(TestCaseClassBase):
|
||||
|
@ -319,7 +336,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
|
|||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `Hybrid index` on empty collection
|
||||
# build `INVERTED index`
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.IVF_FLAT(DataType.FLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.HNSW(DataType.BFLOAT16_VECTOR.name),
|
||||
|
@ -334,7 +351,6 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
|
|||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64']))
|
||||
|
@ -351,13 +367,14 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
|
|||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
|
||||
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_inverted_index_query_with_string(self, expr, expr_field, limit, rex):
|
||||
|
@ -378,7 +395,7 @@ class TestInvertedIndexDQLExpr(TestCaseClassBase):
|
|||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
|
@ -416,7 +433,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
self._connect(self)
|
||||
|
||||
# init params
|
||||
self.primary_field, nb = "int64_pk", 3000
|
||||
self.primary_field, self.nb = "int64_pk", 3000
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
|
@ -434,7 +451,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
|
@ -443,7 +460,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `Hybrid index` on empty collection
|
||||
# build `BITMAP index`
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.HNSW(DataType.FLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name),
|
||||
|
@ -458,7 +475,6 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
|
@ -474,13 +490,14 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
|
||||
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_bitmap_index_query_with_string(self, expr, expr_field, limit, rex):
|
||||
|
@ -501,7 +518,7 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
|
@ -522,3 +539,481 @@ class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
|||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_bitmap_index_query_count(self):
|
||||
"""
|
||||
target:
|
||||
1. check query with count(*)
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with count(*)
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to insert nb
|
||||
"""
|
||||
# query count(*)
|
||||
self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": self.nb}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("batch_size", [10, 1000])
|
||||
def test_bitmap_index_search_iterator(self, batch_size):
|
||||
"""
|
||||
target:
|
||||
1. check search iterator with BITMAP index built on scalar fields
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. search iterator and check result
|
||||
expected:
|
||||
1. search iterator with BITMAP index
|
||||
"""
|
||||
search_params, vector_field = {"metric_type": "L2", "ef": 32}, DataType.FLOAT16_VECTOR.name
|
||||
self.collection_wrap.search_iterator(
|
||||
cf.gen_vectors(nb=1, dim=3, vector_data_type=vector_field), vector_field, search_params, batch_size,
|
||||
expr='int64_pk > 15', check_task=CheckTasks.check_search_iterator, check_items={"batch_size": batch_size})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_index_hybrid_search(self):
|
||||
"""
|
||||
target:
|
||||
1. check hybrid search with expr
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. hybrid search with expr
|
||||
expected:
|
||||
1. hybrid search with expr
|
||||
"""
|
||||
nq, limit = 10, 10
|
||||
vectors = cf.gen_field_values(self.collection_wrap.schema, nb=nq)
|
||||
|
||||
req_list = [
|
||||
AnnSearchRequest(
|
||||
data=vectors.get(DataType.FLOAT16_VECTOR.name), anns_field=DataType.FLOAT16_VECTOR.name,
|
||||
param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
|
||||
expr=Expr.In('INT64', [i for i in range(10, 30)]).value
|
||||
),
|
||||
AnnSearchRequest(
|
||||
data=vectors.get(DataType.BFLOAT16_VECTOR.name), anns_field=DataType.BFLOAT16_VECTOR.name,
|
||||
param={"metric_type": MetricType.L2, "search_list": 30}, limit=limit,
|
||||
expr=Expr.OR(Expr.GT(Expr.SUB('INT8', 30).subset, 10), Expr.LIKE('VARCHAR', 'a%')).value
|
||||
),
|
||||
AnnSearchRequest(
|
||||
data=vectors.get(DataType.SPARSE_FLOAT_VECTOR.name), anns_field=DataType.SPARSE_FLOAT_VECTOR.name,
|
||||
param={"metric_type": MetricType.IP, "drop_ratio_search": 0.2}, limit=limit),
|
||||
AnnSearchRequest(
|
||||
data=vectors.get(DataType.BINARY_VECTOR.name), anns_field=DataType.BINARY_VECTOR.name,
|
||||
param={"metric_type": MetricType.JACCARD, "nprobe": 128}, limit=limit)
|
||||
]
|
||||
self.collection_wrap.hybrid_search(
|
||||
req_list, RRFRanker(), limit, check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestBitmapIndexOffsetCacheDQL")
|
||||
class TestBitmapIndexOffsetCache(TestCaseClassBase):
|
||||
"""
|
||||
Scalar fields build BITMAP index, and altering index indexoffsetcache
|
||||
|
||||
Author: Ting.Wang
|
||||
"""
|
||||
|
||||
def setup_class(self):
|
||||
super().setup_class(self)
|
||||
|
||||
# connect to server before testing
|
||||
self._connect(self)
|
||||
|
||||
# init params
|
||||
self.primary_field, self.nb = "int64_pk", 3000
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_bitmap_index_dql_expr"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[self.primary_field, DataType.FLOAT_VECTOR.name, *self().all_scalar_fields],
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `BITMAP index`
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
|
||||
# build BITMAP index
|
||||
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# enable offset cache
|
||||
for index_name in self.bitmap_support_dtype_names:
|
||||
self.collection_wrap.alter_index(index_name=index_name, extra_params=AlterIndexParams.IndexOffsetCache)
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64']))
|
||||
@pytest.mark.parametrize("limit", [1, 10])
|
||||
def test_bitmap_offset_cache_query_with_modulo(self, expr, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check modulo expression
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
|
||||
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=['*'])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
|
||||
@pytest.mark.parametrize("limit", [1, 10])
|
||||
def test_bitmap_offset_cache_query_with_string(self, expr, expr_field, limit, rex):
|
||||
"""
|
||||
target:
|
||||
1. check string expression
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=['*'])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
|
||||
@pytest.mark.parametrize("limit", [1, 10])
|
||||
def test_bitmap_offset_cache_query_with_operation(self, expr, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=['*'])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_offset_cache_query_count(self):
|
||||
"""
|
||||
target:
|
||||
1. check query with count(*)
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with count(*)
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to insert nb
|
||||
"""
|
||||
# query count(*)
|
||||
self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": self.nb}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_offset_cache_hybrid_search(self):
|
||||
"""
|
||||
target:
|
||||
1. check hybrid search with expr
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. hybrid search with expr
|
||||
expected:
|
||||
1. hybrid search with expr
|
||||
"""
|
||||
nq, limit = 10, 10
|
||||
vectors = cf.gen_field_values(self.collection_wrap.schema, nb=nq)
|
||||
|
||||
req_list = [
|
||||
AnnSearchRequest(
|
||||
data=vectors.get(DataType.FLOAT_VECTOR.name), anns_field=DataType.FLOAT_VECTOR.name,
|
||||
param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
|
||||
expr=Expr.In('INT64', [i for i in range(10, 30)]).value
|
||||
),
|
||||
AnnSearchRequest(
|
||||
data=vectors.get(DataType.FLOAT_VECTOR.name), anns_field=DataType.FLOAT_VECTOR.name,
|
||||
param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
|
||||
expr=Expr.OR(Expr.GT(Expr.SUB('INT8', 30).subset, 10), Expr.LIKE('VARCHAR', 'a%')).value
|
||||
)
|
||||
]
|
||||
self.collection_wrap.hybrid_search(
|
||||
req_list, RRFRanker(), limit, check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestBitmapIndexOffsetCacheDQL")
|
||||
class TestBitmapIndexMmap(TestCaseClassBase):
|
||||
"""
|
||||
Scalar fields build BITMAP index, and altering index Mmap
|
||||
|
||||
Author: Ting.Wang
|
||||
"""
|
||||
|
||||
def setup_class(self):
|
||||
super().setup_class(self)
|
||||
|
||||
# connect to server before testing
|
||||
self._connect(self)
|
||||
|
||||
# init params
|
||||
self.primary_field, self.nb = "int64_pk", 3000
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_bitmap_index_dql_expr"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[self.primary_field, DataType.FLOAT_VECTOR.name, *self().all_scalar_fields],
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb)
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `BITMAP index`
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
|
||||
# build BITMAP index
|
||||
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# enable offset cache
|
||||
for index_name in self.bitmap_support_dtype_names:
|
||||
self.collection_wrap.alter_index(index_name=index_name, extra_params=AlterIndexParams.IndexMmap)
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64']))
|
||||
@pytest.mark.parametrize("limit", [1, 10])
|
||||
def test_bitmap_mmap_query_with_modulo(self, expr, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check modulo expression
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if
|
||||
eval('math.fmod' + expr.replace(expr_field, str(i)).replace('%', ','))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
|
||||
@pytest.mark.parametrize("limit", [1, 10])
|
||||
def test_bitmap_mmap_query_with_string(self, expr, expr_field, limit, rex):
|
||||
"""
|
||||
target:
|
||||
1. check string expression
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
|
||||
@pytest.mark.parametrize("limit", [1, 10])
|
||||
def test_bitmap_mmap_query_with_operation(self, expr, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_mmap_query_count(self):
|
||||
"""
|
||||
target:
|
||||
1. check query with count(*)
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with count(*)
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to insert nb
|
||||
"""
|
||||
# query count(*)
|
||||
self.collection_wrap.query(expr='', output_fields=['count(*)'], check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{"count(*)": self.nb}]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_mmap_hybrid_search(self):
|
||||
"""
|
||||
target:
|
||||
1. check hybrid search with expr
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. hybrid search with expr
|
||||
expected:
|
||||
1. hybrid search with expr
|
||||
"""
|
||||
nq, limit = 10, 10
|
||||
vectors = cf.gen_field_values(self.collection_wrap.schema, nb=nq)
|
||||
|
||||
req_list = [
|
||||
AnnSearchRequest(
|
||||
data=vectors.get(DataType.FLOAT_VECTOR.name), anns_field=DataType.FLOAT_VECTOR.name,
|
||||
param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
|
||||
expr=Expr.In('INT64', [i for i in range(10, 30)]).value
|
||||
),
|
||||
AnnSearchRequest(
|
||||
data=vectors.get(DataType.FLOAT_VECTOR.name), anns_field=DataType.FLOAT_VECTOR.name,
|
||||
param={"metric_type": MetricType.L2, "ef": 32}, limit=limit,
|
||||
expr=Expr.OR(Expr.GT(Expr.SUB('INT8', 30).subset, 10), Expr.LIKE('VARCHAR', 'a%')).value
|
||||
)
|
||||
]
|
||||
self.collection_wrap.hybrid_search(
|
||||
req_list, RRFRanker(), limit, check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit})
|
||||
|
||||
|
||||
class TestMixScenes(TestcaseBase):
|
||||
"""
|
||||
Testing cross-combination scenarios
|
||||
|
||||
Author: Ting.Wang
|
||||
"""
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_bitmap_upsert_and_delete(self, request):
|
||||
"""
|
||||
target:
|
||||
1. upsert data and query returns the updated data
|
||||
method:
|
||||
1. create a collection with scalar fields
|
||||
2. insert some data and build BITMAP index
|
||||
3. query the data of the specified primary key value
|
||||
4. upsert the specified primary key value
|
||||
5. re-query and check data equal to the updated data
|
||||
6. delete the specified primary key value
|
||||
7. re-query and check result is []
|
||||
expected:
|
||||
1. check whether the upsert and delete data is effective
|
||||
"""
|
||||
# init params
|
||||
collection_name, primary_field, nb = f"{request.function.__name__}", "int64_pk", 3000
|
||||
# scalar fields
|
||||
scalar_fields, expr = [DataType.INT64.name, f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}"], 'int64_pk == 10'
|
||||
|
||||
# connect to server before testing
|
||||
self._connect()
|
||||
|
||||
# create a collection with fields that can build `BITMAP` index
|
||||
self.collection_wrap.init_collection(
|
||||
name=collection_name,
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[primary_field, DataType.FLOAT_VECTOR.name, *scalar_fields],
|
||||
field_params={primary_field: FieldParams(is_primary=True).to_dict},
|
||||
)
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
|
||||
self.collection_wrap.insert(data=list(insert_data.values()), check_task=CheckTasks.check_insert_result)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# rebuild `BITMAP` index
|
||||
self.build_multi_index(index_params={
|
||||
**DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name),
|
||||
**DefaultScalarIndexParams.list_bitmap(scalar_fields)
|
||||
})
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
# query before upsert
|
||||
expected_res = [{k: v[10] for k, v in insert_data.items() if k != DataType.FLOAT_VECTOR.name}]
|
||||
self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": expected_res, "primary_field": primary_field})
|
||||
|
||||
# upsert int64_pk = 10
|
||||
upsert_data = cf.gen_field_values(self.collection_wrap.schema, nb=1,
|
||||
default_values={primary_field: [10]}, start_id=10)
|
||||
self.collection_wrap.upsert(data=list(upsert_data.values()))
|
||||
# re-query
|
||||
expected_upsert_res = [{k: v[0] for k, v in upsert_data.items() if k != DataType.FLOAT_VECTOR.name}]
|
||||
self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": expected_upsert_res, "primary_field": primary_field})
|
||||
|
||||
# delete int64_pk = 10
|
||||
self.collection_wrap.delete(expr=expr)
|
||||
# re-query
|
||||
self.collection_wrap.query(expr=expr, output_fields=scalar_fields, check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": []})
|
||||
|
|
Loading…
Reference in New Issue