mirror of https://github.com/milvus-io/milvus.git
test: add hybrid search cases (#29830)
issue: #29799 Signed-off-by: binbin lv <binbin.lv@zilliz.com>pull/30684/head
parent
43e8cd531d
commit
a556671119
|
@ -229,7 +229,9 @@ class TestcaseBase(Base):
|
||||||
partition_num=0, is_binary=False, is_all_data_type=False,
|
partition_num=0, is_binary=False, is_all_data_type=False,
|
||||||
auto_id=False, dim=ct.default_dim, is_index=True,
|
auto_id=False, dim=ct.default_dim, is_index=True,
|
||||||
primary_field=ct.default_int64_field_name, is_flush=True, name=None,
|
primary_field=ct.default_int64_field_name, is_flush=True, name=None,
|
||||||
enable_dynamic_field=False, with_json=True, random_primary_key=False, **kwargs):
|
enable_dynamic_field=False, with_json=True, random_primary_key=False,
|
||||||
|
multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR",
|
||||||
|
**kwargs):
|
||||||
"""
|
"""
|
||||||
target: create specified collections
|
target: create specified collections
|
||||||
method: 1. create collections (binary/non-binary, default/all data type, auto_id or not)
|
method: 1. create collections (binary/non-binary, default/all data type, auto_id or not)
|
||||||
|
@ -251,7 +253,9 @@ class TestcaseBase(Base):
|
||||||
# 1 create collection
|
# 1 create collection
|
||||||
default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field,
|
default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field,
|
||||||
enable_dynamic_field=enable_dynamic_field,
|
enable_dynamic_field=enable_dynamic_field,
|
||||||
with_json=with_json)
|
with_json=with_json, multiple_dim_array=multiple_dim_array,
|
||||||
|
is_partition_key=is_partition_key,
|
||||||
|
vector_data_type=vector_data_type)
|
||||||
if is_binary:
|
if is_binary:
|
||||||
default_schema = cf.gen_default_binary_collection_schema(auto_id=auto_id, dim=dim,
|
default_schema = cf.gen_default_binary_collection_schema(auto_id=auto_id, dim=dim,
|
||||||
primary_field=primary_field)
|
primary_field=primary_field)
|
||||||
|
@ -262,6 +266,7 @@ class TestcaseBase(Base):
|
||||||
with_json=with_json)
|
with_json=with_json)
|
||||||
log.info("init_collection_general: collection creation")
|
log.info("init_collection_general: collection creation")
|
||||||
collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs)
|
collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs)
|
||||||
|
vector_name_list = cf.extract_vector_field_name_list(collection_w)
|
||||||
# 2 add extra partitions if specified (default is 1 partition named "_default")
|
# 2 add extra partitions if specified (default is 1 partition named "_default")
|
||||||
if partition_num > 0:
|
if partition_num > 0:
|
||||||
cf.gen_partitions(collection_w, partition_num)
|
cf.gen_partitions(collection_w, partition_num)
|
||||||
|
@ -270,22 +275,22 @@ class TestcaseBase(Base):
|
||||||
collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp = \
|
collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp = \
|
||||||
cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id,
|
cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id,
|
||||||
dim=dim, enable_dynamic_field=enable_dynamic_field, with_json=with_json,
|
dim=dim, enable_dynamic_field=enable_dynamic_field, with_json=with_json,
|
||||||
random_primary_key=random_primary_key)
|
random_primary_key=random_primary_key, multiple_dim_array=multiple_dim_array,
|
||||||
|
primary_field=primary_field, vector_data_type=vector_data_type)
|
||||||
if is_flush:
|
if is_flush:
|
||||||
assert collection_w.is_empty is False
|
assert collection_w.is_empty is False
|
||||||
assert collection_w.num_entities == nb
|
assert collection_w.num_entities == nb
|
||||||
|
# 4 create default index if specified
|
||||||
|
if is_index:
|
||||||
# This condition will be removed after auto index feature
|
# This condition will be removed after auto index feature
|
||||||
if is_index:
|
|
||||||
if is_binary:
|
|
||||||
collection_w.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index)
|
|
||||||
else:
|
|
||||||
collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index)
|
|
||||||
collection_w.load()
|
|
||||||
elif is_index:
|
|
||||||
if is_binary:
|
if is_binary:
|
||||||
collection_w.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index)
|
collection_w.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index)
|
||||||
else:
|
else:
|
||||||
collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index)
|
collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index)
|
||||||
|
if len(multiple_dim_array) != 0 or is_all_data_type:
|
||||||
|
for vector_name in vector_name_list:
|
||||||
|
collection_w.create_index(vector_name, ct.default_flat_index)
|
||||||
|
collection_w.load()
|
||||||
|
|
||||||
return collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp
|
return collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp
|
||||||
|
|
||||||
|
|
|
@ -176,6 +176,22 @@ class ApiCollectionWrapper:
|
||||||
timeout=timeout, **kwargs).run()
|
timeout=timeout, **kwargs).run()
|
||||||
return res, check_result
|
return res, check_result
|
||||||
|
|
||||||
|
@trace()
|
||||||
|
def hybrid_search(self, reqs, rerank, limit, partition_names=None,
|
||||||
|
output_fields=None, timeout=None, round_decimal=-1,
|
||||||
|
check_task=None, check_items=None, **kwargs):
|
||||||
|
timeout = TIMEOUT if timeout is None else timeout
|
||||||
|
|
||||||
|
func_name = sys._getframe().f_code.co_name
|
||||||
|
res, check = api_request([self.collection.hybrid_search, reqs, rerank, limit, partition_names,
|
||||||
|
output_fields, timeout, round_decimal], **kwargs)
|
||||||
|
check_result = ResponseChecker(res, func_name, check_task, check_items, check,
|
||||||
|
reqs=reqs, rerank=rerank, limit=limit,
|
||||||
|
partition_names=partition_names,
|
||||||
|
output_fields=output_fields,
|
||||||
|
timeout=timeout, **kwargs).run()
|
||||||
|
return res, check_result
|
||||||
|
|
||||||
@trace()
|
@trace()
|
||||||
def search_iterator(self, data, anns_field, param, batch_size, limit=-1, expr=None,
|
def search_iterator(self, data, anns_field, param, batch_size, limit=-1, expr=None,
|
||||||
partition_names=None, output_fields=None, timeout=None, round_decimal=-1,
|
partition_names=None, output_fields=None, timeout=None, round_decimal=-1,
|
||||||
|
|
|
@ -293,8 +293,8 @@ class ResponseChecker:
|
||||||
expected: check the search is ok
|
expected: check the search is ok
|
||||||
"""
|
"""
|
||||||
log.info("search_results_check: checking the searching results")
|
log.info("search_results_check: checking the searching results")
|
||||||
if func_name != 'search':
|
if func_name != 'search' or func_name != 'hybrid_search':
|
||||||
log.warning("The function name is {} rather than {}".format(func_name, "search"))
|
log.warning("The function name is {} rather than {} or {}".format(func_name, "search", "hybrid_search"))
|
||||||
if len(check_items) == 0:
|
if len(check_items) == 0:
|
||||||
raise Exception("No expect values found in the check task")
|
raise Exception("No expect values found in the check task")
|
||||||
if check_items.get("_async", None):
|
if check_items.get("_async", None):
|
||||||
|
|
|
@ -18,6 +18,7 @@ from base.schema_wrapper import ApiCollectionSchemaWrapper, ApiFieldSchemaWrappe
|
||||||
from common import common_type as ct
|
from common import common_type as ct
|
||||||
from utils.util_log import test_log as log
|
from utils.util_log import test_log as log
|
||||||
from customize.milvus_operator import MilvusOperator
|
from customize.milvus_operator import MilvusOperator
|
||||||
|
import tensorflow as tf
|
||||||
fake = Faker()
|
fake = Faker()
|
||||||
"""" Methods of processing data """
|
"""" Methods of processing data """
|
||||||
|
|
||||||
|
@ -142,8 +143,14 @@ def gen_double_field(name=ct.default_double_field_name, is_primary=False, descri
|
||||||
|
|
||||||
|
|
||||||
def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim,
|
def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim,
|
||||||
description=ct.default_desc, **kwargs):
|
description=ct.default_desc, vector_data_type="FLOAT_VECTOR", **kwargs):
|
||||||
float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.FLOAT_VECTOR,
|
if vector_data_type == "FLOAT_VECTOR":
|
||||||
|
dtype = DataType.FLOAT_VECTOR
|
||||||
|
elif vector_data_type == "FLOAT16_VECTOR":
|
||||||
|
dtype = DataType.FLOAT16_VECTOR
|
||||||
|
elif vector_data_type == "BFLOAT16_VECTOR":
|
||||||
|
dtype = DataType.BFLOAT16_VECTOR
|
||||||
|
float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=dtype,
|
||||||
description=description, dim=dim,
|
description=description, dim=dim,
|
||||||
is_primary=is_primary, **kwargs)
|
is_primary=is_primary, **kwargs)
|
||||||
return float_vec_field
|
return float_vec_field
|
||||||
|
@ -157,28 +164,60 @@ def gen_binary_vec_field(name=ct.default_binary_vec_field_name, is_primary=False
|
||||||
return binary_vec_field
|
return binary_vec_field
|
||||||
|
|
||||||
|
|
||||||
|
def gen_float16_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim,
|
||||||
|
description=ct.default_desc, **kwargs):
|
||||||
|
float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.FLOAT16_VECTOR,
|
||||||
|
description=description, dim=dim,
|
||||||
|
is_primary=is_primary, **kwargs)
|
||||||
|
return float_vec_field
|
||||||
|
|
||||||
|
|
||||||
|
def gen_bfloat16_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim,
|
||||||
|
description=ct.default_desc, **kwargs):
|
||||||
|
float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.BFLOAT16_VECTOR,
|
||||||
|
description=description, dim=dim,
|
||||||
|
is_primary=is_primary, **kwargs)
|
||||||
|
return float_vec_field
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
|
def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
|
||||||
auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True,
|
auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True,
|
||||||
multiple_dim_array=[], **kwargs):
|
multiple_dim_array=[], is_partition_key=None, vector_data_type="FLOAT_VECTOR",
|
||||||
|
**kwargs):
|
||||||
if enable_dynamic_field:
|
if enable_dynamic_field:
|
||||||
if primary_field is ct.default_int64_field_name:
|
if primary_field is ct.default_int64_field_name:
|
||||||
fields = [gen_int64_field(), gen_float_vec_field(dim=dim)]
|
if is_partition_key is None:
|
||||||
|
fields = [gen_int64_field(), gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)]
|
||||||
|
else:
|
||||||
|
fields = [gen_int64_field(is_partition_key=(is_partition_key == ct.default_int64_field_name)),
|
||||||
|
gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)]
|
||||||
elif primary_field is ct.default_string_field_name:
|
elif primary_field is ct.default_string_field_name:
|
||||||
fields = [gen_string_field(), gen_float_vec_field(dim=dim)]
|
if is_partition_key is None:
|
||||||
|
fields = [gen_string_field(), gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)]
|
||||||
|
else:
|
||||||
|
fields = [gen_string_field(is_partition_key=(is_partition_key == ct.default_string_field_name)),
|
||||||
|
gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)]
|
||||||
else:
|
else:
|
||||||
log.error("Primary key only support int or varchar")
|
log.error("Primary key only support int or varchar")
|
||||||
assert False
|
assert False
|
||||||
if len(multiple_dim_array) != 0:
|
|
||||||
for other_dim in multiple_dim_array:
|
|
||||||
fields.append(gen_float_vec_field(gen_unique_str("multiple_vector"), dim=other_dim))
|
|
||||||
else:
|
else:
|
||||||
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(),
|
if is_partition_key is None:
|
||||||
gen_float_vec_field(dim=dim)]
|
int64_field = gen_int64_field()
|
||||||
|
vchar_field = gen_string_field()
|
||||||
|
else:
|
||||||
|
int64_field = gen_int64_field(is_partition_key=(is_partition_key == ct.default_int64_field_name))
|
||||||
|
vchar_field = gen_string_field(is_partition_key=(is_partition_key == ct.default_string_field_name))
|
||||||
|
fields = [int64_field, gen_float_field(), vchar_field, gen_json_field(),
|
||||||
|
gen_float_vec_field(dim=dim, vector_data_type=vector_data_type)]
|
||||||
if with_json is False:
|
if with_json is False:
|
||||||
fields.remove(gen_json_field())
|
fields.remove(gen_json_field())
|
||||||
if len(multiple_dim_array) != 0:
|
|
||||||
for other_dim in multiple_dim_array:
|
if len(multiple_dim_array) != 0:
|
||||||
fields.append(gen_float_vec_field(gen_unique_str("multiple_vector"), dim=other_dim))
|
for other_dim in multiple_dim_array:
|
||||||
|
fields.append(gen_float_vec_field(gen_unique_str("multiple_vector"), dim=other_dim,
|
||||||
|
vector_data_type=vector_data_type))
|
||||||
|
|
||||||
|
|
||||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||||
primary_field=primary_field, auto_id=auto_id,
|
primary_field=primary_field, auto_id=auto_id,
|
||||||
|
@ -278,11 +317,15 @@ def gen_collection_schema_all_datatype(description=ct.default_desc,
|
||||||
auto_id=False, dim=ct.default_dim,
|
auto_id=False, dim=ct.default_dim,
|
||||||
enable_dynamic_field=False, with_json=True, **kwargs):
|
enable_dynamic_field=False, with_json=True, **kwargs):
|
||||||
if enable_dynamic_field:
|
if enable_dynamic_field:
|
||||||
fields = [gen_int64_field(), gen_float_vec_field(dim=dim)]
|
fields = [gen_int64_field(), gen_float_vec_field(dim=dim),
|
||||||
|
gen_float_vec_field(name=ct.default_float16_vec_field_name, dim=dim, vector_data_type="FLOAT16_VECTOR"),
|
||||||
|
gen_float_vec_field(name=ct.default_bfloat16_vec_field_name, dim=dim, vector_data_type="BFLOAT16_VECTOR")]
|
||||||
else:
|
else:
|
||||||
fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(),
|
fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(),
|
||||||
gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(),
|
gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(),
|
||||||
gen_json_field(), gen_float_vec_field(dim=dim)]
|
gen_json_field(), gen_float_vec_field(dim=dim),
|
||||||
|
gen_float_vec_field(name=ct.default_float16_vec_field_name, dim=dim, vector_data_type="FLOAT16_VECTOR"),
|
||||||
|
gen_float_vec_field(name=ct.default_bfloat16_vec_field_name, dim=dim, vector_data_type="BFLOAT16_VECTOR")]
|
||||||
if with_json is False:
|
if with_json is False:
|
||||||
fields.remove(gen_json_field())
|
fields.remove(gen_json_field())
|
||||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||||
|
@ -324,11 +367,18 @@ def gen_schema_multi_string_fields(string_fields):
|
||||||
return schema
|
return schema
|
||||||
|
|
||||||
|
|
||||||
def gen_vectors(nb, dim):
|
def gen_vectors(nb, dim, vector_data_type="FLOAT_VECTOR"):
|
||||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
|
if vector_data_type == "FLOAT_VECTOR":
|
||||||
|
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
|
||||||
|
elif vector_data_type == "FLOAT16_VECTOR":
|
||||||
|
vectors = gen_fp16_vectors(nb, dim)[1]
|
||||||
|
elif vector_data_type == "BFLOAT16_VECTOR":
|
||||||
|
vectors = gen_bf16_vectors(nb, dim)[1]
|
||||||
|
|
||||||
if dim > 1:
|
if dim > 1:
|
||||||
vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
|
if vector_data_type=="FLOAT_VECTOR":
|
||||||
vectors = vectors.tolist()
|
vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
|
||||||
|
vectors = vectors.tolist()
|
||||||
return vectors
|
return vectors
|
||||||
|
|
||||||
|
|
||||||
|
@ -349,7 +399,8 @@ def gen_binary_vectors(num, dim):
|
||||||
|
|
||||||
|
|
||||||
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
|
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
|
||||||
random_primary_key=False):
|
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
|
||||||
|
vector_data_type="FLOAT_VECTOR"):
|
||||||
if not random_primary_key:
|
if not random_primary_key:
|
||||||
int_values = pd.Series(data=[i for i in range(start, start + nb)])
|
int_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||||
else:
|
else:
|
||||||
|
@ -357,7 +408,7 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi
|
||||||
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
|
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
|
||||||
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
|
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
|
||||||
json_values = [{"number": i, "float": i*1.0} for i in range(start, start + nb)]
|
json_values = [{"number": i, "float": i*1.0} for i in range(start, start + nb)]
|
||||||
float_vec_values = gen_vectors(nb, dim)
|
float_vec_values = gen_vectors(nb, dim, vector_data_type=vector_data_type)
|
||||||
df = pd.DataFrame({
|
df = pd.DataFrame({
|
||||||
ct.default_int64_field_name: int_values,
|
ct.default_int64_field_name: int_values,
|
||||||
ct.default_float_field_name: float_values,
|
ct.default_float_field_name: float_values,
|
||||||
|
@ -365,24 +416,38 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi
|
||||||
ct.default_json_field_name: json_values,
|
ct.default_json_field_name: json_values,
|
||||||
ct.default_float_vec_field_name: float_vec_values
|
ct.default_float_vec_field_name: float_vec_values
|
||||||
})
|
})
|
||||||
|
|
||||||
if with_json is False:
|
if with_json is False:
|
||||||
df.drop(ct.default_json_field_name, axis=1, inplace=True)
|
df.drop(ct.default_json_field_name, axis=1, inplace=True)
|
||||||
|
if len(multiple_dim_array) != 0:
|
||||||
|
if len(multiple_vector_field_name) != len(multiple_dim_array):
|
||||||
|
log.error("multiple vector feature is enabled, please input the vector field name list "
|
||||||
|
"not including the default vector field")
|
||||||
|
assert len(multiple_vector_field_name) == len(multiple_dim_array)
|
||||||
|
for i in range(len(multiple_dim_array)):
|
||||||
|
new_float_vec_values = gen_vectors(nb, multiple_dim_array[i], vector_data_type=vector_data_type)
|
||||||
|
df[multiple_vector_field_name[i]] = new_float_vec_values
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True):
|
def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True, multiple_dim_array=[],
|
||||||
|
multiple_vector_field_name=[], vector_data_type="FLOAT_VECTOR"):
|
||||||
array = []
|
array = []
|
||||||
for i in range(start, start + nb):
|
for i in range(start, start + nb):
|
||||||
dict = {ct.default_int64_field_name: i,
|
dict = {ct.default_int64_field_name: i,
|
||||||
ct.default_float_field_name: i*1.0,
|
ct.default_float_field_name: i*1.0,
|
||||||
ct.default_string_field_name: str(i),
|
ct.default_string_field_name: str(i),
|
||||||
ct.default_json_field_name: {"number": i, "float": i*1.0},
|
ct.default_json_field_name: {"number": i, "float": i*1.0},
|
||||||
ct.default_float_vec_field_name: gen_vectors(1, dim)[0]
|
ct.default_float_vec_field_name: gen_vectors(1, dim, vector_data_type=vector_data_type)[0]
|
||||||
}
|
}
|
||||||
if with_json is False:
|
if with_json is False:
|
||||||
dict.pop(ct.default_json_field_name, None)
|
dict.pop(ct.default_json_field_name, None)
|
||||||
array.append(dict)
|
array.append(dict)
|
||||||
|
if len(multiple_dim_array) != 0:
|
||||||
|
for i in range(len(multiple_dim_array)):
|
||||||
|
dict[multiple_vector_field_name[i]] = gen_vectors(1, multiple_dim_array[i],
|
||||||
|
vector_data_type=vector_data_type)[0]
|
||||||
|
|
||||||
return array
|
return array
|
||||||
|
|
||||||
|
@ -497,6 +562,8 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, w
|
||||||
json_values = [{"number": i, "string": str(i), "bool": bool(i),
|
json_values = [{"number": i, "string": str(i), "bool": bool(i),
|
||||||
"list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(start, start + nb)]
|
"list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(start, start + nb)]
|
||||||
float_vec_values = gen_vectors(nb, dim)
|
float_vec_values = gen_vectors(nb, dim)
|
||||||
|
float16_vec_values = gen_vectors(nb, dim, "FLOAT16_VECTOR")
|
||||||
|
bfloat16_vec_values = gen_vectors(nb, dim, "BFLOAT16_VECTOR")
|
||||||
df = pd.DataFrame({
|
df = pd.DataFrame({
|
||||||
ct.default_int64_field_name: int64_values,
|
ct.default_int64_field_name: int64_values,
|
||||||
ct.default_int32_field_name: int32_values,
|
ct.default_int32_field_name: int32_values,
|
||||||
|
@ -507,8 +574,9 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, w
|
||||||
ct.default_double_field_name: double_values,
|
ct.default_double_field_name: double_values,
|
||||||
ct.default_string_field_name: string_values,
|
ct.default_string_field_name: string_values,
|
||||||
ct.default_json_field_name: json_values,
|
ct.default_json_field_name: json_values,
|
||||||
ct.default_float_vec_field_name: float_vec_values
|
ct.default_float_vec_field_name: float_vec_values,
|
||||||
|
ct.default_float16_vec_field_name: float16_vec_values,
|
||||||
|
ct.default_bfloat16_vec_field_name: bfloat16_vec_values
|
||||||
})
|
})
|
||||||
if with_json is False:
|
if with_json is False:
|
||||||
df.drop(ct.default_json_field_name, axis=1, inplace=True)
|
df.drop(ct.default_json_field_name, axis=1, inplace=True)
|
||||||
|
@ -531,7 +599,9 @@ def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, st
|
||||||
ct.default_string_field_name: str(i),
|
ct.default_string_field_name: str(i),
|
||||||
ct.default_json_field_name: {"number": i, "string": str(i), "bool": bool(i),
|
ct.default_json_field_name: {"number": i, "string": str(i), "bool": bool(i),
|
||||||
"list": [j for j in range(i, i + ct.default_json_list_length)]},
|
"list": [j for j in range(i, i + ct.default_json_list_length)]},
|
||||||
ct.default_float_vec_field_name: gen_vectors(1, dim)[0]
|
ct.default_float_vec_field_name: gen_vectors(1, dim)[0],
|
||||||
|
ct.default_float16_vec_field_name: gen_vectors(1, dim, "FLOAT16_VECTOR")[0],
|
||||||
|
ct.default_bfloat16_vec_field_name: gen_vectors(1, dim, "BFLOAT16_VECTOR")[0]
|
||||||
}
|
}
|
||||||
if with_json is False:
|
if with_json is False:
|
||||||
dict.pop(ct.default_json_field_name, None)
|
dict.pop(ct.default_json_field_name, None)
|
||||||
|
@ -1384,7 +1454,8 @@ def gen_partitions(collection_w, partition_num=1):
|
||||||
|
|
||||||
def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_type=False,
|
def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_type=False,
|
||||||
auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True,
|
auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True,
|
||||||
random_primary_key=False):
|
random_primary_key=False, multiple_dim_array=[], primary_field=ct.default_int64_field_name,
|
||||||
|
vector_data_type="FLOAT_VECTOR"):
|
||||||
"""
|
"""
|
||||||
target: insert non-binary/binary data
|
target: insert non-binary/binary data
|
||||||
method: insert non-binary/binary data into partitions if any
|
method: insert non-binary/binary data into partitions if any
|
||||||
|
@ -1396,13 +1467,23 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
|
||||||
binary_raw_vectors = []
|
binary_raw_vectors = []
|
||||||
insert_ids = []
|
insert_ids = []
|
||||||
start = insert_offset
|
start = insert_offset
|
||||||
log.info(f"inserted {nb} data into collection {collection_w.name}")
|
log.info(f"inserting {nb} data into collection {collection_w.name}")
|
||||||
|
# extract the vector field name list
|
||||||
|
vector_name_list = extract_vector_field_name_list(collection_w)
|
||||||
|
# prepare data
|
||||||
for i in range(num):
|
for i in range(num):
|
||||||
log.debug("Dynamic field is enabled: %s" % enable_dynamic_field)
|
log.debug("Dynamic field is enabled: %s" % enable_dynamic_field)
|
||||||
default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json,
|
if not enable_dynamic_field:
|
||||||
random_primary_key=random_primary_key)
|
default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json,
|
||||||
if enable_dynamic_field:
|
random_primary_key=random_primary_key,
|
||||||
default_data = gen_default_rows_data(nb // num, dim=dim, start=start, with_json=with_json)
|
multiple_dim_array=multiple_dim_array,
|
||||||
|
multiple_vector_field_name=vector_name_list,
|
||||||
|
vector_data_type=vector_data_type)
|
||||||
|
else:
|
||||||
|
default_data = gen_default_rows_data(nb // num, dim=dim, start=start, with_json=with_json,
|
||||||
|
multiple_dim_array=multiple_dim_array,
|
||||||
|
multiple_vector_field_name=vector_name_list,
|
||||||
|
vector_data_type=vector_data_type)
|
||||||
if is_binary:
|
if is_binary:
|
||||||
default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num, dim=dim, start=start)
|
default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num, dim=dim, start=start)
|
||||||
binary_raw_vectors.extend(binary_raw_data)
|
binary_raw_vectors.extend(binary_raw_data)
|
||||||
|
@ -1414,10 +1495,18 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
|
||||||
if auto_id:
|
if auto_id:
|
||||||
if enable_dynamic_field:
|
if enable_dynamic_field:
|
||||||
for data in default_data:
|
for data in default_data:
|
||||||
data.pop(ct.default_int64_field_name, None)
|
if primary_field == ct.default_int64_field_name:
|
||||||
|
data.pop(ct.default_int64_field_name, None)
|
||||||
|
elif primary_field == ct.default_string_field_name:
|
||||||
|
data.pop(ct.default_string_field_name, None)
|
||||||
else:
|
else:
|
||||||
default_data.drop(ct.default_int64_field_name, axis=1, inplace=True)
|
if primary_field == ct.default_int64_field_name:
|
||||||
|
default_data.drop(ct.default_int64_field_name, axis=1, inplace=True)
|
||||||
|
elif primary_field == ct.default_string_field_name:
|
||||||
|
default_data.drop(ct.default_string_field_name, axis=1, inplace=True)
|
||||||
|
# insert
|
||||||
insert_res = collection_w.insert(default_data, par[i].name)[0]
|
insert_res = collection_w.insert(default_data, par[i].name)[0]
|
||||||
|
log.info(f"inserted {nb} data into collection {collection_w.name}")
|
||||||
time_stamp = insert_res.timestamp
|
time_stamp = insert_res.timestamp
|
||||||
insert_ids.extend(insert_res.primary_keys)
|
insert_ids.extend(insert_res.primary_keys)
|
||||||
vectors.append(default_data)
|
vectors.append(default_data)
|
||||||
|
@ -1559,3 +1648,104 @@ def get_wildcard_output_field_names(collection_w, output_fields):
|
||||||
output_fields.remove("*")
|
output_fields.remove("*")
|
||||||
output_fields.extend(all_fields)
|
output_fields.extend(all_fields)
|
||||||
return output_fields
|
return output_fields
|
||||||
|
|
||||||
|
|
||||||
|
def extract_vector_field_name_list(collection_w):
|
||||||
|
"""
|
||||||
|
extract the vector field name list
|
||||||
|
collection_w : the collection object to be extracted thea name of all the vector fields
|
||||||
|
return: the vector field name list without the default float vector field name
|
||||||
|
"""
|
||||||
|
schema_dict = collection_w.schema.to_dict()
|
||||||
|
fields = schema_dict.get('fields')
|
||||||
|
vector_name_list = []
|
||||||
|
for field in fields:
|
||||||
|
if str(field['type']) == 'DataType.FLOAT_VECTOR' \
|
||||||
|
or str(field['type']) == 'DataType.FLOAT16_VECTOR' \
|
||||||
|
or str(field['type']) == 'DataType.BFLOAT16_VECTOR':
|
||||||
|
if field['name'] != ct.default_float_vec_field_name:
|
||||||
|
vector_name_list.append(field['name'])
|
||||||
|
|
||||||
|
return vector_name_list
|
||||||
|
|
||||||
|
|
||||||
|
def get_hybrid_search_base_results(search_res_dict_array):
|
||||||
|
"""
|
||||||
|
merge the element in the dicts array
|
||||||
|
search_res_dict_array : the dict array in which the elements to be merged
|
||||||
|
return: the sorted id and score answer
|
||||||
|
"""
|
||||||
|
# calculate hybrid search base line
|
||||||
|
search_res_dict_merge = {}
|
||||||
|
ids_answer = []
|
||||||
|
score_answer = []
|
||||||
|
for i in range(len(search_res_dict_array) - 1):
|
||||||
|
for key in search_res_dict_array[i]:
|
||||||
|
if search_res_dict_array[i + 1].get(key):
|
||||||
|
search_res_dict_merge[key] = search_res_dict_array[i][key] + search_res_dict_array[i + 1][key]
|
||||||
|
else:
|
||||||
|
search_res_dict_merge[key] = search_res_dict_array[i][key]
|
||||||
|
for key in search_res_dict_array[i + 1]:
|
||||||
|
if not search_res_dict_array[i].get(key):
|
||||||
|
search_res_dict_merge[key] = search_res_dict_array[i + 1][key]
|
||||||
|
sorted_list = sorted(search_res_dict_merge.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
|
for sort in sorted_list:
|
||||||
|
ids_answer.append(int(sort[0]))
|
||||||
|
score_answer.append(float(sort[1]))
|
||||||
|
|
||||||
|
return ids_answer, score_answer
|
||||||
|
|
||||||
|
|
||||||
|
def gen_bf16_vectors(num, dim):
|
||||||
|
"""
|
||||||
|
generate brain float16 vector data
|
||||||
|
raw_vectors : the vectors
|
||||||
|
bf16_vectors: the bytes used for insert
|
||||||
|
return: raw_vectors and bf16_vectors
|
||||||
|
"""
|
||||||
|
raw_vectors = []
|
||||||
|
bf16_vectors = []
|
||||||
|
for _ in range(num):
|
||||||
|
raw_vector = [random.random() for _ in range(dim)]
|
||||||
|
raw_vectors.append(raw_vector)
|
||||||
|
# bf16_vector = np.array(raw_vector, dtype=tf.bfloat16).view(np.uint8).tolist()
|
||||||
|
bf16_vector = tf.cast(raw_vector, dtype=tf.bfloat16).numpy().view(np.uint8).tolist()
|
||||||
|
bf16_vectors.append(bytes(bf16_vector))
|
||||||
|
|
||||||
|
return raw_vectors, bf16_vectors
|
||||||
|
|
||||||
|
|
||||||
|
def gen_fp16_vectors(num, dim):
|
||||||
|
"""
|
||||||
|
generate float16 vector data
|
||||||
|
raw_vectors : the vectors
|
||||||
|
fp16_vectors: the bytes used for insert
|
||||||
|
return: raw_vectors and fp16_vectors
|
||||||
|
"""
|
||||||
|
raw_vectors = []
|
||||||
|
fp16_vectors = []
|
||||||
|
for _ in range(num):
|
||||||
|
raw_vector = [random.random() for _ in range(dim)]
|
||||||
|
raw_vectors.append(raw_vector)
|
||||||
|
fp16_vector = np.array(raw_vector, dtype=np.float16).view(np.uint8).tolist()
|
||||||
|
fp16_vectors.append(bytes(fp16_vector))
|
||||||
|
|
||||||
|
return raw_vectors, fp16_vectors
|
||||||
|
|
||||||
|
|
||||||
|
def gen_vectors_based_on_vector_type(num, dim, vector_data_type):
|
||||||
|
"""
|
||||||
|
generate float16 vector data
|
||||||
|
raw_vectors : the vectors
|
||||||
|
fp16_vectors: the bytes used for insert
|
||||||
|
return: raw_vectors and fp16_vectors
|
||||||
|
"""
|
||||||
|
if vector_data_type == "FLOAT_VECTOR":
|
||||||
|
vectors = [[random.random() for _ in range(dim)] for _ in range(num)]
|
||||||
|
elif vector_data_type == "FLOAT16_VECTOR":
|
||||||
|
vectors = gen_fp16_vectors(num, dim)[1]
|
||||||
|
elif vector_data_type == "BFLOAT16_VECTOR":
|
||||||
|
vectors = gen_bf16_vectors(num, dim)[1]
|
||||||
|
|
||||||
|
return vectors
|
|
@ -44,6 +44,8 @@ default_int32_array_field_name = "int32_array"
|
||||||
default_float_array_field_name = "float_array"
|
default_float_array_field_name = "float_array"
|
||||||
default_string_array_field_name = "string_array"
|
default_string_array_field_name = "string_array"
|
||||||
default_float_vec_field_name = "float_vector"
|
default_float_vec_field_name = "float_vector"
|
||||||
|
default_float16_vec_field_name = "float16_vector"
|
||||||
|
default_bfloat16_vec_field_name = "bfloat16_vector"
|
||||||
another_float_vec_field_name = "float_vector1"
|
another_float_vec_field_name = "float_vector1"
|
||||||
default_binary_vec_field_name = "binary_vector"
|
default_binary_vec_field_name = "binary_vector"
|
||||||
default_partition_name = "_default"
|
default_partition_name = "_default"
|
||||||
|
@ -81,6 +83,7 @@ default_db = "default"
|
||||||
max_database_num = 64
|
max_database_num = 64
|
||||||
max_collections_per_db = 65536
|
max_collections_per_db = 65536
|
||||||
max_collection_num = 65536
|
max_collection_num = 65536
|
||||||
|
max_hybrid_search_req_num = 1024
|
||||||
|
|
||||||
|
|
||||||
IMAGE_REPOSITORY_MILVUS = "harbor.milvus.io/dockerhub/milvusdb/milvus"
|
IMAGE_REPOSITORY_MILVUS = "harbor.milvus.io/dockerhub/milvusdb/milvus"
|
||||||
|
|
|
@ -53,3 +53,6 @@ deepdiff==6.7.1
|
||||||
prettytable==3.8.0
|
prettytable==3.8.0
|
||||||
pyarrow==14.0.1
|
pyarrow==14.0.1
|
||||||
fastparquet==2023.7.0
|
fastparquet==2023.7.0
|
||||||
|
|
||||||
|
# for generating bfloat16 data
|
||||||
|
tensorflow==2.13.1
|
||||||
|
|
|
@ -1252,6 +1252,14 @@ class TestIndexInvalid(TestcaseBase):
|
||||||
Test create / describe / drop index interfaces with invalid collection names
|
Test create / describe / drop index interfaces with invalid collection names
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@pytest.fixture(scope="function", params=["Trie", "STL_SORT", "INVERTED"])
|
||||||
|
def scalar_index(self, request):
|
||||||
|
yield request.param
|
||||||
|
|
||||||
|
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
||||||
|
def vector_data_type(self, request):
|
||||||
|
yield request.param
|
||||||
|
|
||||||
@pytest.fixture(
|
@pytest.fixture(
|
||||||
scope="function",
|
scope="function",
|
||||||
params=gen_invalid_strs()
|
params=gen_invalid_strs()
|
||||||
|
@ -1346,6 +1354,107 @@ class TestIndexInvalid(TestcaseBase):
|
||||||
check_items={ct.err_code: 1100,
|
check_items={ct.err_code: 1100,
|
||||||
ct.err_msg: "create index on JSON field is not supported"})
|
ct.err_msg: "create index on JSON field is not supported"})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
def test_create_scalar_index_on_vector_field(self, scalar_index, vector_data_type):
|
||||||
|
"""
|
||||||
|
target: test create scalar index on vector field
|
||||||
|
method: 1.create collection, and create index
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True,
|
||||||
|
dim=ct.default_dim, is_index=False,
|
||||||
|
vector_data_type=vector_data_type)[0:4]
|
||||||
|
scalar_index_params = {"index_type": scalar_index}
|
||||||
|
collection_w.create_index(ct.default_float_vec_field_name, index_params=scalar_index_params,
|
||||||
|
check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 65535,
|
||||||
|
ct.err_msg: f"invalid index type: {scalar_index}"})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
def test_create_scalar_index_on_binary_vector_field(self, scalar_index):
|
||||||
|
"""
|
||||||
|
target: test create scalar index on binary vector field
|
||||||
|
method: 1.create collection, and create index
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
collection_w = self.init_collection_general(prefix, is_binary=True, is_index=False)[0]
|
||||||
|
scalar_index_params = {"index_type": scalar_index}
|
||||||
|
collection_w.create_index(ct.default_binary_vec_field_name, index_params=scalar_index_params,
|
||||||
|
check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 65535,
|
||||||
|
ct.err_msg: f"invalid index type: {scalar_index}"})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
def test_create_inverted_index_on_json_field(self, vector_data_type):
|
||||||
|
"""
|
||||||
|
target: test create scalar index on json field
|
||||||
|
method: 1.create collection, and create index
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
collection_w = self.init_collection_general(prefix, is_index=False, vector_data_type=vector_data_type)[0]
|
||||||
|
scalar_index_params = {"index_type": "INVERTED"}
|
||||||
|
collection_w.create_index(ct.default_json_field_name, index_params=scalar_index_params,
|
||||||
|
check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 1100,
|
||||||
|
ct.err_msg: "create index on JSON field is not supported"})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
def test_create_inverted_index_on_array_field(self):
|
||||||
|
"""
|
||||||
|
target: test create scalar index on array field
|
||||||
|
method: 1.create collection, and create index
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
# 1. create a collection
|
||||||
|
schema = cf.gen_array_collection_schema()
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema)
|
||||||
|
# 2. create index
|
||||||
|
scalar_index_params = {"index_type": "INVERTED"}
|
||||||
|
collection_w.create_index(ct.default_int32_array_field_name, index_params=scalar_index_params,
|
||||||
|
check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 1100,
|
||||||
|
ct.err_msg: "create index on Array field is not supported"})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
def test_create_inverted_index_no_vector_index(self):
|
||||||
|
"""
|
||||||
|
target: test create scalar index on array field
|
||||||
|
method: 1.create collection, and create index
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
# 1. create a collection
|
||||||
|
collection_w = self.init_collection_general(prefix, is_index=False)[0]
|
||||||
|
# 2. create index
|
||||||
|
scalar_index_params = {"index_type": "INVERTED"}
|
||||||
|
collection_w.create_index(ct.default_float_field_name, index_params=scalar_index_params)
|
||||||
|
collection_w.load(check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 65535,
|
||||||
|
ct.err_msg: "there is no vector index on field: [float_vector], "
|
||||||
|
"please create index firstly"})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
@pytest.mark.parametrize("scalar_index", ["STL_SORT", "INVERTED"])
|
||||||
|
def test_create_inverted_index_no_all_vector_index(self, scalar_index):
|
||||||
|
"""
|
||||||
|
target: test create scalar index on array field
|
||||||
|
method: 1.create collection, and create index
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
# 1. create a collection
|
||||||
|
multiple_dim_array = [ct.default_dim, ct.default_dim]
|
||||||
|
collection_w = self.init_collection_general(prefix, is_index=False, multiple_dim_array=multiple_dim_array)[0]
|
||||||
|
# 2. create index
|
||||||
|
scalar_index_params = {"index_type": scalar_index}
|
||||||
|
collection_w.create_index(ct.default_float_field_name, index_params=scalar_index_params)
|
||||||
|
vector_name_list = cf.extract_vector_field_name_list(collection_w)
|
||||||
|
flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "L2"}
|
||||||
|
collection_w.create_index(ct.default_float_vec_field_name, flat_index)
|
||||||
|
collection_w.load(check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 65535,
|
||||||
|
ct.err_msg: f"there is no vector index on field: "
|
||||||
|
f"[{vector_name_list[0]} {vector_name_list[1]}], "
|
||||||
|
f"please create index firstly"})
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.GPU)
|
@pytest.mark.tags(CaseLabel.GPU)
|
||||||
class TestNewIndexAsync(TestcaseBase):
|
class TestNewIndexAsync(TestcaseBase):
|
||||||
|
@ -2024,3 +2133,100 @@ class TestScaNNIndex(TestcaseBase):
|
||||||
ct.err_msg: f"dimension must be able to be divided by 2, dimension: {dim}"}
|
ct.err_msg: f"dimension must be able to be divided by 2, dimension: {dim}"}
|
||||||
collection_w.create_index(default_field_name, index_params,
|
collection_w.create_index(default_field_name, index_params,
|
||||||
check_task=CheckTasks.err_res, check_items=error)
|
check_task=CheckTasks.err_res, check_items=error)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.GPU)
|
||||||
|
class TestInvertedIndexValid(TestcaseBase):
|
||||||
|
"""
|
||||||
|
Test create / describe / drop index interfaces with inverted index
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.fixture(scope="function", params=["Trie", "STL_SORT", "INVERTED"])
|
||||||
|
def scalar_index(self, request):
|
||||||
|
yield request.param
|
||||||
|
|
||||||
|
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
||||||
|
def vector_data_type(self, request):
|
||||||
|
yield request.param
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
@pytest.mark.parametrize("scalar_field_name", [ct.default_int8_field_name, ct.default_int16_field_name,
|
||||||
|
ct.default_int32_field_name, ct.default_int64_field_name,
|
||||||
|
ct.default_float_field_name, ct.default_double_field_name,
|
||||||
|
ct.default_string_field_name, ct.default_bool_field_name])
|
||||||
|
def test_create_inverted_index_on_all_supported_scalar_field(self, scalar_field_name):
|
||||||
|
"""
|
||||||
|
target: test create scalar index all supported scalar field
|
||||||
|
method: 1.create collection, and create index
|
||||||
|
expected: create index successfully
|
||||||
|
"""
|
||||||
|
collection_w = self.init_collection_general(prefix, insert_data=True, is_index=False, is_all_data_type=True)[0]
|
||||||
|
scalar_index_params = {"index_type": "INVERTED"}
|
||||||
|
index_name = "scalar_index_name"
|
||||||
|
collection_w.create_index(scalar_field_name, index_params=scalar_index_params, index_name=index_name)
|
||||||
|
assert collection_w.has_index(index_name=index_name)[0] is True
|
||||||
|
index_list = self.utility_wrap.list_indexes(collection_w.name)[0]
|
||||||
|
assert index_name in index_list
|
||||||
|
collection_w.flush()
|
||||||
|
result = self.utility_wrap.index_building_progress(collection_w.name, index_name)[0]
|
||||||
|
# assert False
|
||||||
|
start = time.time()
|
||||||
|
while True:
|
||||||
|
time.sleep(1)
|
||||||
|
res, _ = self.utility_wrap.index_building_progress(collection_w.name, index_name)
|
||||||
|
if 0 < res['indexed_rows'] <= default_nb:
|
||||||
|
break
|
||||||
|
if time.time() - start > 5:
|
||||||
|
raise MilvusException(1, f"Index build completed in more than 5s")
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_create_multiple_inverted_index(self):
|
||||||
|
"""
|
||||||
|
target: test create multiple scalar index
|
||||||
|
method: 1.create collection, and create index
|
||||||
|
expected: create index successfully
|
||||||
|
"""
|
||||||
|
collection_w = self.init_collection_general(prefix, is_index=False, is_all_data_type=True)[0]
|
||||||
|
scalar_index_params = {"index_type": "INVERTED"}
|
||||||
|
index_name = "scalar_index_name_0"
|
||||||
|
collection_w.create_index(ct.default_int8_field_name, index_params=scalar_index_params, index_name=index_name)
|
||||||
|
assert collection_w.has_index(index_name=index_name)[0] is True
|
||||||
|
index_name = "scalar_index_name_1"
|
||||||
|
collection_w.create_index(ct.default_int32_field_name, index_params=scalar_index_params, index_name=index_name)
|
||||||
|
assert collection_w.has_index(index_name=index_name)[0] is True
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_create_all_inverted_index(self):
|
||||||
|
"""
|
||||||
|
target: test create multiple scalar index
|
||||||
|
method: 1.create collection, and create index
|
||||||
|
expected: create index successfully
|
||||||
|
"""
|
||||||
|
collection_w = self.init_collection_general(prefix, is_index=False, is_all_data_type=True)[0]
|
||||||
|
scalar_index_params = {"index_type": "INVERTED"}
|
||||||
|
scalar_fields = [ct.default_int8_field_name, ct.default_int16_field_name,
|
||||||
|
ct.default_int32_field_name, ct.default_int64_field_name,
|
||||||
|
ct.default_float_field_name, ct.default_double_field_name,
|
||||||
|
ct.default_string_field_name, ct.default_bool_field_name]
|
||||||
|
for i in range(len(scalar_fields)):
|
||||||
|
index_name = f"scalar_index_name_{i}"
|
||||||
|
collection_w.create_index(scalar_fields[i], index_params=scalar_index_params, index_name=index_name)
|
||||||
|
assert collection_w.has_index(index_name=index_name)[0] is True
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_create_all_scalar_index(self):
|
||||||
|
"""
|
||||||
|
target: test create multiple scalar index
|
||||||
|
method: 1.create collection, and create index
|
||||||
|
expected: create index successfully
|
||||||
|
"""
|
||||||
|
collection_w = self.init_collection_general(prefix, is_index=False, is_all_data_type=True)[0]
|
||||||
|
scalar_index = ["Trie", "STL_SORT", "INVERTED"]
|
||||||
|
scalar_fields = [ct.default_string_field_name, ct.default_int16_field_name,
|
||||||
|
ct.default_int32_field_name]
|
||||||
|
for i in range(len(scalar_fields)):
|
||||||
|
index_name = f"scalar_index_name_{i}"
|
||||||
|
scalar_index_params = {"index_type": f"{scalar_index[i]}"}
|
||||||
|
collection_w.create_index(scalar_fields[i], index_params=scalar_index_params, index_name=index_name)
|
||||||
|
assert collection_w.has_index(index_name=index_name)[0] is True
|
||||||
|
|
|
@ -1333,12 +1333,14 @@ class TestQueryParams(TestcaseBase):
|
||||||
assert set(res[0].keys()) == {ct.default_int64_field_name, ct.default_float_field_name}
|
assert set(res[0].keys()) == {ct.default_int64_field_name, ct.default_float_field_name}
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
@pytest.mark.xfail(reason="issue 30437")
|
||||||
def test_query_output_all_fields(self, enable_dynamic_field, random_primary_key):
|
def test_query_output_all_fields(self, enable_dynamic_field, random_primary_key):
|
||||||
"""
|
"""
|
||||||
target: test query with none output field
|
target: test query with none output field
|
||||||
method: query with output field=None
|
method: query with output field=None
|
||||||
expected: return all fields
|
expected: return all fields
|
||||||
"""
|
"""
|
||||||
|
enable_dynamic_field = False
|
||||||
# 1. initialize with data
|
# 1. initialize with data
|
||||||
collection_w, df, _, insert_ids = \
|
collection_w, df, _, insert_ids = \
|
||||||
self.init_collection_general(prefix, True, nb=10, is_all_data_type=True,
|
self.init_collection_general(prefix, True, nb=10, is_all_data_type=True,
|
||||||
|
@ -1347,7 +1349,8 @@ class TestQueryParams(TestcaseBase):
|
||||||
all_fields = [ct.default_int64_field_name, ct.default_int32_field_name, ct.default_int16_field_name,
|
all_fields = [ct.default_int64_field_name, ct.default_int32_field_name, ct.default_int16_field_name,
|
||||||
ct.default_int8_field_name, ct.default_bool_field_name, ct.default_float_field_name,
|
ct.default_int8_field_name, ct.default_bool_field_name, ct.default_float_field_name,
|
||||||
ct.default_double_field_name, ct.default_string_field_name, ct.default_json_field_name,
|
ct.default_double_field_name, ct.default_string_field_name, ct.default_json_field_name,
|
||||||
ct.default_float_vec_field_name]
|
ct.default_float_vec_field_name, ct.default_float16_vec_field_name,
|
||||||
|
ct.default_bfloat16_vec_field_name]
|
||||||
if enable_dynamic_field:
|
if enable_dynamic_field:
|
||||||
res = df[0][:2]
|
res = df[0][:2]
|
||||||
else:
|
else:
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue