mirror of https://github.com/milvus-io/milvus.git
test: add query expr test cases (#36073)
1. query with expr under different scalar index types 2. test framework supports preparing one piece of data and multiple parameter queries Signed-off-by: wangting0128 <ting.wang@zilliz.com>pull/36128/head
parent
3a381bc247
commit
c916407f37
|
@ -18,7 +18,7 @@ from common import common_func as cf
|
|||
from common import common_type as ct
|
||||
from common.common_params import IndexPrams
|
||||
|
||||
from pymilvus import ResourceGroupInfo
|
||||
from pymilvus import ResourceGroupInfo, DataType
|
||||
|
||||
|
||||
class Base:
|
||||
|
@ -35,6 +35,7 @@ class Base:
|
|||
resource_group_list = []
|
||||
high_level_api_wrap = None
|
||||
skip_connection = False
|
||||
|
||||
def setup_class(self):
|
||||
log.info("[setup_class] Start setup class...")
|
||||
|
||||
|
@ -44,6 +45,9 @@ class Base:
|
|||
def setup_method(self, method):
|
||||
log.info(("*" * 35) + " setup " + ("*" * 35))
|
||||
log.info("[setup_method] Start setup test case %s." % method.__name__)
|
||||
self._setup_objects()
|
||||
|
||||
def _setup_objects(self):
|
||||
self.connection_wrap = ApiConnectionsWrapper()
|
||||
self.utility_wrap = ApiUtilityWrapper()
|
||||
self.collection_wrap = ApiCollectionWrapper()
|
||||
|
@ -57,7 +61,9 @@ class Base:
|
|||
def teardown_method(self, method):
|
||||
log.info(("*" * 35) + " teardown " + ("*" * 35))
|
||||
log.info("[teardown_method] Start teardown test case %s..." % method.__name__)
|
||||
self._teardown_objects()
|
||||
|
||||
def _teardown_objects(self):
|
||||
try:
|
||||
""" Drop collection before disconnect """
|
||||
if not self.connection_wrap.has_connection(alias=DefaultConfig.DEFAULT_USING)[0]:
|
||||
|
@ -80,7 +86,8 @@ class Base:
|
|||
rgs_list = self.utility_wrap.list_resource_groups()[0]
|
||||
for rg_name in self.resource_group_list:
|
||||
if rg_name is not None and rg_name in rgs_list:
|
||||
rg = self.utility_wrap.describe_resource_group(name=rg_name, check_task=ct.CheckTasks.check_nothing)[0]
|
||||
rg = \
|
||||
self.utility_wrap.describe_resource_group(name=rg_name, check_task=ct.CheckTasks.check_nothing)[0]
|
||||
if isinstance(rg, ResourceGroupInfo):
|
||||
if rg.num_available_node > 0:
|
||||
self.utility_wrap.transfer_node(source=rg_name,
|
||||
|
@ -266,9 +273,9 @@ class TestcaseBase(Base):
|
|||
primary_field=primary_field)
|
||||
if vector_data_type == ct.sparse_vector:
|
||||
default_schema = cf.gen_default_sparse_schema(auto_id=auto_id, primary_field=primary_field,
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
with_json=with_json,
|
||||
multiple_dim_array=multiple_dim_array)
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
with_json=with_json,
|
||||
multiple_dim_array=multiple_dim_array)
|
||||
if is_all_data_type:
|
||||
default_schema = cf.gen_collection_schema_all_datatype(auto_id=auto_id, dim=dim,
|
||||
primary_field=primary_field,
|
||||
|
@ -390,7 +397,8 @@ class TestcaseBase(Base):
|
|||
self.utility_wrap.create_role()
|
||||
|
||||
# grant privilege to the role
|
||||
self.utility_wrap.role_grant(object=privilege_object, object_name=object_name, privilege=privilege, db_name=db_name)
|
||||
self.utility_wrap.role_grant(object=privilege_object, object_name=object_name, privilege=privilege,
|
||||
db_name=db_name)
|
||||
|
||||
# bind the role to the user
|
||||
self.utility_wrap.role_add_user(tmp_user)
|
||||
|
@ -417,3 +425,54 @@ class TestcaseBase(Base):
|
|||
indexes = {n.field_name: n.params for n in self.collection_wrap.indexes}
|
||||
log.info("[TestcaseBase] Collection: `{0}` index: {1}".format(collection_obj.name, indexes))
|
||||
return indexes
|
||||
|
||||
|
||||
class TestCaseClassBase(TestcaseBase):
|
||||
"""
|
||||
Setup objects on class
|
||||
"""
|
||||
|
||||
def setup_class(self):
|
||||
log.info("[setup_class] " + " Start setup class ".center(100, "~"))
|
||||
self._setup_objects(self)
|
||||
|
||||
def teardown_class(self):
|
||||
log.info("[teardown_class]" + " Start teardown class ".center(100, "~"))
|
||||
self._teardown_objects(self)
|
||||
|
||||
def setup_method(self, method):
|
||||
log.info(" setup ".center(80, "*"))
|
||||
log.info("[setup_method] Start setup test case %s." % method.__name__)
|
||||
|
||||
def teardown_method(self, method):
|
||||
log.info(" teardown ".center(80, "*"))
|
||||
log.info("[teardown_method] Start teardown test case %s..." % method.__name__)
|
||||
|
||||
@property
|
||||
def all_scalar_fields(self):
|
||||
dtypes = [DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64, DataType.VARCHAR, DataType.BOOL,
|
||||
DataType.FLOAT, DataType.DOUBLE]
|
||||
dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes] + [DataType.JSON.name]
|
||||
return dtype_names
|
||||
|
||||
@property
|
||||
def all_index_scalar_fields(self):
|
||||
return list(set(self.all_scalar_fields) - {DataType.JSON.name})
|
||||
|
||||
@property
|
||||
def inverted_support_dtype_names(self):
|
||||
return self.all_index_scalar_fields
|
||||
|
||||
@property
|
||||
def inverted_not_support_dtype_names(self):
|
||||
return [DataType.JSON.name]
|
||||
|
||||
@property
|
||||
def bitmap_support_dtype_names(self):
|
||||
dtypes = [DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64, DataType.BOOL, DataType.VARCHAR]
|
||||
dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes]
|
||||
return dtype_names
|
||||
|
||||
@property
|
||||
def bitmap_not_support_dtype_names(self):
|
||||
return list(set(self.all_scalar_fields) - set(self.bitmap_support_dtype_names))
|
||||
|
|
|
@ -38,3 +38,7 @@ class IndexErrorMessage(ExceptionsMessage):
|
|||
VectorMetricTypeExist = "metric type not set for vector index"
|
||||
CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field"
|
||||
CheckBitmapOnPK = "create bitmap index on primary key not supported"
|
||||
|
||||
|
||||
class QueryErrorMessage(ExceptionsMessage):
|
||||
ParseExpressionFailed = "failed to create query plan: cannot parse expression: "
|
||||
|
|
|
@ -21,6 +21,8 @@ from utils.util_log import test_log as log
|
|||
from customize.milvus_operator import MilvusOperator
|
||||
import pickle
|
||||
fake = Faker()
|
||||
|
||||
from common.common_params import Expr
|
||||
"""" Methods of processing data """
|
||||
|
||||
|
||||
|
@ -1293,6 +1295,11 @@ def gen_data_by_collection_field(field, nb=None, start=None):
|
|||
if nb is None:
|
||||
return [np.float32(random.random()) for _ in range(max_capacity)]
|
||||
return [[np.float32(random.random()) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
if element_type == DataType.DOUBLE:
|
||||
if nb is None:
|
||||
return [np.float64(random.random()) for _ in range(max_capacity)]
|
||||
return [[np.float64(random.random()) for _ in range(max_capacity)] for _ in range(nb)]
|
||||
|
||||
if element_type == DataType.VARCHAR:
|
||||
max_length = field.params['max_length']
|
||||
max_length = min(20, max_length - 1)
|
||||
|
@ -1335,6 +1342,24 @@ def gen_values(schema: CollectionSchema, nb, start_id=0, default_values: dict =
|
|||
return data
|
||||
|
||||
|
||||
def gen_field_values(schema: CollectionSchema, nb, start_id=0, default_values: dict = {}) -> dict:
|
||||
"""
|
||||
generate default value according to the collection fields,
|
||||
which can replace the value of the specified field
|
||||
|
||||
return: <dict>
|
||||
<field name>: <value list>
|
||||
"""
|
||||
data = {}
|
||||
for field in schema.fields:
|
||||
default_value = default_values.get(field.name, None)
|
||||
if default_value is not None:
|
||||
data[field.name] = default_value
|
||||
elif field.auto_id is False:
|
||||
data[field.name] = gen_data_by_collection_field(field, nb, start_id * nb)
|
||||
return data
|
||||
|
||||
|
||||
def gen_json_files_for_bulk_insert(data, schema, data_dir):
|
||||
for d in data:
|
||||
if len(d) > 0:
|
||||
|
@ -1746,6 +1771,48 @@ def gen_integer_overflow_expressions():
|
|||
return expressions
|
||||
|
||||
|
||||
def gen_modulo_expression(expr_fields):
|
||||
exprs = []
|
||||
for field in expr_fields:
|
||||
exprs.extend([
|
||||
(Expr.EQ(Expr.MOD(field, 10).subset, 1).value, field),
|
||||
(Expr.LT(Expr.MOD(field, 17).subset, 9).value, field),
|
||||
(Expr.LE(Expr.MOD(field, 100).subset, 50).value, field),
|
||||
(Expr.GT(Expr.MOD(field, 50).subset, 40).value, field),
|
||||
(Expr.GE(Expr.MOD(field, 29).subset, 15).value, field),
|
||||
(Expr.NE(Expr.MOD(field, 29).subset, 10).value, field),
|
||||
])
|
||||
return exprs
|
||||
|
||||
|
||||
def gen_varchar_expression(expr_fields):
|
||||
exprs = []
|
||||
for field in expr_fields:
|
||||
exprs.extend([
|
||||
(Expr.like(field, "a%").value, field, r'^a.*'),
|
||||
(Expr.LIKE(field, "%b").value, field, r'.*b$'),
|
||||
(Expr.AND(Expr.like(field, "%b").subset, Expr.LIKE(field, "z%").subset).value, field, r'^z.*b$'),
|
||||
(Expr.And(Expr.like(field, "i%").subset, Expr.LIKE(field, "%j").subset).value, field, r'^i.*j$'),
|
||||
(Expr.OR(Expr.like(field, "%h%").subset, Expr.LIKE(field, "%jo").subset).value, field, fr'(?:h.*|.*jo$)'),
|
||||
(Expr.Or(Expr.like(field, "ip%").subset, Expr.LIKE(field, "%yu%").subset).value, field, fr'(?:^ip.*|.*yu)'),
|
||||
])
|
||||
return exprs
|
||||
|
||||
|
||||
def gen_number_operation(expr_fields):
|
||||
exprs = []
|
||||
for field in expr_fields:
|
||||
exprs.extend([
|
||||
(Expr.LT(Expr.ADD(field, 23), 100).value, field),
|
||||
(Expr.LT(Expr.ADD(-23, field), 121).value, field),
|
||||
(Expr.LE(Expr.SUB(field, 123), 99).value, field),
|
||||
(Expr.GT(Expr.MUL(field, 2), 88).value, field),
|
||||
(Expr.GT(Expr.MUL(3, field), 137).value, field),
|
||||
(Expr.GE(Expr.DIV(field, 30), 20).value, field),
|
||||
])
|
||||
return exprs
|
||||
|
||||
|
||||
def l2(x, y):
|
||||
return np.linalg.norm(np.array(x) - np.array(y))
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import List, Dict
|
||||
|
||||
from pymilvus import DataType
|
||||
|
||||
""" Define param names"""
|
||||
|
||||
|
||||
|
@ -55,6 +57,10 @@ class ExprBase:
|
|||
def __repr__(self):
|
||||
return self.expr
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self.expr
|
||||
|
||||
|
||||
class Expr:
|
||||
# BooleanConstant: 'true' | 'True' | 'TRUE' | 'false' | 'False' | 'FALSE'
|
||||
|
@ -344,6 +350,10 @@ class DefaultScalarIndexParams:
|
|||
def Default(field: str):
|
||||
return {field: IndexPrams()}
|
||||
|
||||
@staticmethod
|
||||
def list_default(fields: List[str]) -> Dict[str, IndexPrams]:
|
||||
return {n: IndexPrams() for n in fields}
|
||||
|
||||
@staticmethod
|
||||
def Trie(field: str):
|
||||
return {field: IndexPrams(index_type=IndexName.Trie)}
|
||||
|
@ -356,6 +366,10 @@ class DefaultScalarIndexParams:
|
|||
def INVERTED(field: str):
|
||||
return {field: IndexPrams(index_type=IndexName.INVERTED)}
|
||||
|
||||
@staticmethod
|
||||
def list_inverted(fields: List[str]) -> Dict[str, IndexPrams]:
|
||||
return {n: IndexPrams(index_type=IndexName.INVERTED) for n in fields}
|
||||
|
||||
@staticmethod
|
||||
def BITMAP(field: str):
|
||||
return {field: IndexPrams(index_type=IndexName.BITMAP)}
|
||||
|
|
|
@ -0,0 +1,524 @@
|
|||
import re
|
||||
import pytest
|
||||
from pymilvus import DataType
|
||||
|
||||
from common.common_type import CaseLabel, CheckTasks
|
||||
from common import common_type as ct
|
||||
from common import common_func as cf
|
||||
from common.code_mapping import QueryErrorMessage as qem
|
||||
from common.common_params import (
|
||||
IndexName, FieldParams, IndexPrams, DefaultVectorIndexParams, DefaultScalarIndexParams, MetricType, Expr
|
||||
)
|
||||
from base.client_base import TestcaseBase, TestCaseClassBase
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestNoIndexDQLExpr")
|
||||
class TestNoIndexDQLExpr(TestCaseClassBase):
|
||||
"""
|
||||
Scalar fields are not indexed, and verify DQL requests
|
||||
|
||||
Author: Ting.Wang
|
||||
"""
|
||||
|
||||
def setup_class(self):
|
||||
super().setup_class(self)
|
||||
|
||||
# connect to server before testing
|
||||
self._connect(self)
|
||||
|
||||
# init params
|
||||
self.primary_field, nb = "int64_pk", 3000
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_no_index_dql_expr"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name,
|
||||
DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields],
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict,
|
||||
DataType.FLOAT16_VECTOR.name: FieldParams(dim=3).to_dict,
|
||||
DataType.BFLOAT16_VECTOR.name: FieldParams(dim=6).to_dict,
|
||||
DataType.BINARY_VECTOR.name: FieldParams(dim=16).to_dict
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `Hybrid index` on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.IVF_FLAT(DataType.BFLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name),
|
||||
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("expr, output_fields", [
|
||||
(Expr.In(Expr.MOD('INT8', 13).subset, [0, 1, 2]).value, ['INT8']),
|
||||
(Expr.Nin(Expr.MOD('INT16', 100).subset, [10, 20, 30, 40]).value, ['INT16']),
|
||||
])
|
||||
def test_no_index_query_with_invalid_expr(self, expr, output_fields):
|
||||
"""
|
||||
target:
|
||||
1. check invalid expr
|
||||
method:
|
||||
1. prepare some data
|
||||
2. query with the invalid expr
|
||||
expected:
|
||||
1. raises expected error
|
||||
"""
|
||||
# query
|
||||
self.collection_wrap.query(expr=expr, check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1100, ct.err_msg: qem.ParseExpressionFailed})
|
||||
|
||||
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_no_index_query_with_modulo(self, expr, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check modulo expression
|
||||
method:
|
||||
1. prepare some data
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_no_index_query_with_string(self, expr, expr_field, limit, rex):
|
||||
"""
|
||||
target:
|
||||
1. check string expression
|
||||
method:
|
||||
1. prepare some data
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_no_index_query_with_operation(self, expr, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation
|
||||
method:
|
||||
1. prepare some data
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestHybridIndexDQLExpr")
|
||||
class TestHybridIndexDQLExpr(TestCaseClassBase):
|
||||
"""
|
||||
Scalar fields build Hybrid index, and verify DQL requests
|
||||
|
||||
Author: Ting.Wang
|
||||
"""
|
||||
|
||||
def setup_class(self):
|
||||
super().setup_class(self)
|
||||
|
||||
# connect to server before testing
|
||||
self._connect(self)
|
||||
|
||||
# init params
|
||||
self.primary_field, nb = "int64_pk", 3000
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_hybrid_index_dql_expr"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name,
|
||||
DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields],
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict,
|
||||
DataType.FLOAT16_VECTOR.name: FieldParams(dim=3).to_dict,
|
||||
DataType.BFLOAT16_VECTOR.name: FieldParams(dim=6).to_dict,
|
||||
DataType.BINARY_VECTOR.name: FieldParams(dim=16).to_dict
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `Hybrid index` on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.DISKANN(DataType.FLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.IVF_SQ8(DataType.BFLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.SPARSE_INVERTED_INDEX(DataType.SPARSE_FLOAT_VECTOR.name),
|
||||
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
|
||||
# build Hybrid index
|
||||
**DefaultScalarIndexParams.list_default([self.primary_field] + self.all_index_scalar_fields)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_hybrid_index_query_with_modulo(self, expr, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check modulo expression
|
||||
method:
|
||||
1. prepare some data and build `Hybrid index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_hybrid_index_query_with_string(self, expr, expr_field, limit, rex):
|
||||
"""
|
||||
target:
|
||||
1. check string expression
|
||||
method:
|
||||
1. prepare some data and build `Hybrid index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_hybrid_index_query_with_operation(self, expr, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation
|
||||
method:
|
||||
1. prepare some data and build `Hybrid index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestInvertedIndexDQLExpr")
|
||||
class TestInvertedIndexDQLExpr(TestCaseClassBase):
|
||||
"""
|
||||
Scalar fields build INVERTED index, and verify DQL requests
|
||||
|
||||
Author: Ting.Wang
|
||||
"""
|
||||
|
||||
def setup_class(self):
|
||||
super().setup_class(self)
|
||||
|
||||
# connect to server before testing
|
||||
self._connect(self)
|
||||
|
||||
# init params
|
||||
self.primary_field, nb = "int64_pk", 3000
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_inverted_index_dql_expr"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name,
|
||||
DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields],
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict,
|
||||
DataType.FLOAT16_VECTOR.name: FieldParams(dim=3).to_dict,
|
||||
DataType.BFLOAT16_VECTOR.name: FieldParams(dim=6).to_dict,
|
||||
DataType.BINARY_VECTOR.name: FieldParams(dim=16).to_dict
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `Hybrid index` on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.IVF_FLAT(DataType.FLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.HNSW(DataType.BFLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name),
|
||||
**DefaultVectorIndexParams.BIN_FLAT(DataType.BINARY_VECTOR.name),
|
||||
# build Hybrid index
|
||||
**DefaultScalarIndexParams.list_inverted([self.primary_field] + self.inverted_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_modulo_expression(['int64_pk', 'INT8', 'INT16', 'INT32', 'INT64']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_inverted_index_query_with_modulo(self, expr, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check modulo expression
|
||||
method:
|
||||
1. prepare some data and build `INVERTED index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_inverted_index_query_with_string(self, expr, expr_field, limit, rex):
|
||||
"""
|
||||
target:
|
||||
1. check string expression
|
||||
method:
|
||||
1. prepare some data and build `INVERTED index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_inverted_index_query_with_operation(self, expr, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation
|
||||
method:
|
||||
1. prepare some data and build `INVERTED index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
|
||||
@pytest.mark.xdist_group("TestBitmapIndexDQLExpr")
|
||||
class TestBitmapIndexDQLExpr(TestCaseClassBase):
|
||||
"""
|
||||
Scalar fields build BITMAP index, and verify DQL requests
|
||||
|
||||
Author: Ting.Wang
|
||||
"""
|
||||
|
||||
def setup_class(self):
|
||||
super().setup_class(self)
|
||||
|
||||
# connect to server before testing
|
||||
self._connect(self)
|
||||
|
||||
# init params
|
||||
self.primary_field, nb = "int64_pk", 3000
|
||||
|
||||
# create a collection with fields
|
||||
self.collection_wrap.init_collection(
|
||||
name=cf.gen_unique_str("test_bitmap_index_dql_expr"),
|
||||
schema=cf.set_collection_schema(
|
||||
fields=[self.primary_field, DataType.FLOAT16_VECTOR.name, DataType.BFLOAT16_VECTOR.name,
|
||||
DataType.SPARSE_FLOAT_VECTOR.name, DataType.BINARY_VECTOR.name, *self().all_scalar_fields],
|
||||
field_params={
|
||||
self.primary_field: FieldParams(is_primary=True).to_dict,
|
||||
DataType.FLOAT16_VECTOR.name: FieldParams(dim=3).to_dict,
|
||||
DataType.BFLOAT16_VECTOR.name: FieldParams(dim=6).to_dict,
|
||||
DataType.BINARY_VECTOR.name: FieldParams(dim=16).to_dict
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# prepare data (> 1024 triggering index building)
|
||||
self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=nb)
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def prepare_data(self):
|
||||
self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result)
|
||||
|
||||
# flush collection, segment sealed
|
||||
self.collection_wrap.flush()
|
||||
|
||||
# build `Hybrid index` on empty collection
|
||||
index_params = {
|
||||
**DefaultVectorIndexParams.HNSW(DataType.FLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name),
|
||||
**DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name),
|
||||
**DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name),
|
||||
# build Hybrid index
|
||||
**DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names)
|
||||
}
|
||||
self.build_multi_index(index_params=index_params)
|
||||
assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys())
|
||||
|
||||
# load collection
|
||||
self.collection_wrap.load()
|
||||
|
||||
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/36054")
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_bitmap_index_query_with_modulo(self, expr, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check modulo expression
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("expr, expr_field, rex", cf.gen_varchar_expression(['VARCHAR']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_bitmap_index_query_with_string(self, expr, expr_field, limit, rex):
|
||||
"""
|
||||
target:
|
||||
1. check string expression
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expr_field", cf.gen_number_operation(['INT8', 'INT16', 'INT32', 'INT64', 'FLOAT', 'DOUBLE']))
|
||||
@pytest.mark.parametrize("limit", [1, 10, 3000])
|
||||
def test_bitmap_index_query_with_operation(self, expr, expr_field, limit):
|
||||
"""
|
||||
target:
|
||||
1. check number operation
|
||||
method:
|
||||
1. prepare some data and build `BITMAP index` on scalar fields
|
||||
2. query with the different expr and limit
|
||||
3. check query result
|
||||
expected:
|
||||
1. query response equal to min(insert data, limit)
|
||||
"""
|
||||
# the total number of inserted data that matches the expression
|
||||
expr_count = len([i for i in self.insert_data.get(expr_field, []) if eval(expr.replace(expr_field, str(i)))])
|
||||
|
||||
# query
|
||||
res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field])
|
||||
assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}"
|
|
@ -132,10 +132,10 @@ cd ${ROOT}/tests/python_client
|
|||
if [[ -n "${TEST_TIMEOUT:-}" ]]; then
|
||||
|
||||
timeout "${TEST_TIMEOUT}" pytest --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} \
|
||||
--html=${CI_LOG_PATH}/report.html --self-contained-html ${@:-}
|
||||
--html=${CI_LOG_PATH}/report.html --self-contained-html --dist loadgroup ${@:-}
|
||||
else
|
||||
pytest --host ${MILVUS_SERVICE_NAME} --port ${MILVUS_SERVICE_PORT} \
|
||||
--html=${CI_LOG_PATH}/report.html --self-contained-html ${@:-}
|
||||
--html=${CI_LOG_PATH}/report.html --self-contained-html --dist loadgroup ${@:-}
|
||||
fi
|
||||
|
||||
# # Run concurrent test with 5 processes
|
||||
|
|
Loading…
Reference in New Issue