mirror of https://github.com/milvus-io/milvus.git
[test]Update common func with string (#16803)
Signed-off-by: jingkl <jingjing.jia@zilliz.com>pull/16817/head
parent
b60998038f
commit
a0110998e8
|
@ -48,6 +48,10 @@ def gen_bool_field(name=ct.default_bool_field_name, description=ct.default_desc,
|
|||
is_primary=is_primary, **kwargs)
|
||||
return bool_field
|
||||
|
||||
def gen_string_field(name=ct.default_string_field_name, description=ct.default_desc, is_primary=False, max_length_per_row=ct.default_length, **kwargs):
|
||||
string_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.VARCHAR, description=description, max_length_per_row=max_length_per_row,
|
||||
is_primary=is_primary, **kwargs)
|
||||
return string_field
|
||||
|
||||
def gen_int8_field(name=ct.default_int8_field_name, description=ct.default_desc, is_primary=False, **kwargs):
|
||||
int8_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT8, description=description,
|
||||
|
@ -103,7 +107,16 @@ def gen_binary_vec_field(name=ct.default_binary_vec_field_name, is_primary=False
|
|||
|
||||
def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
|
||||
auto_id=False, dim=ct.default_dim):
|
||||
fields = [gen_int64_field(), gen_float_field(), gen_float_vec_field(dim=dim)]
|
||||
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_float_vec_field(dim=dim)]
|
||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||
primary_field=primary_field, auto_id=auto_id)
|
||||
return schema
|
||||
|
||||
|
||||
|
||||
def gen_string_pk_default_collection_schema(description=ct.default_desc, primary_field=ct.default_string_field_name,
|
||||
auto_id=False, dim=ct.default_dim):
|
||||
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_float_vec_field(dim=dim)]
|
||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||
primary_field=primary_field, auto_id=auto_id)
|
||||
return schema
|
||||
|
@ -113,7 +126,7 @@ def gen_collection_schema_all_datatype(description=ct.default_desc,
|
|||
primary_field=ct.default_int64_field_name,
|
||||
auto_id=False, dim=ct.default_dim):
|
||||
fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(),
|
||||
gen_bool_field(), gen_float_field(), gen_double_field(), gen_float_vec_field(dim=dim)]
|
||||
gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(), gen_float_vec_field(dim=dim)]
|
||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||
primary_field=primary_field, auto_id=auto_id)
|
||||
return schema
|
||||
|
@ -127,7 +140,7 @@ def gen_collection_schema(fields, primary_field=None, description=ct.default_des
|
|||
|
||||
def gen_default_binary_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
|
||||
auto_id=False, dim=ct.default_dim):
|
||||
fields = [gen_int64_field(), gen_float_field(), gen_binary_vec_field(dim=dim)]
|
||||
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_binary_vec_field(dim=dim)]
|
||||
binary_schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||
primary_field=primary_field,
|
||||
auto_id=auto_id)
|
||||
|
@ -135,19 +148,32 @@ def gen_default_binary_collection_schema(description=ct.default_desc, primary_fi
|
|||
|
||||
|
||||
def gen_schema_multi_vector_fields(vec_fields):
|
||||
fields = [gen_int64_field(), gen_float_field(), gen_float_vec_field()]
|
||||
fields = [gen_int64_field(), gen_float_field(),gen_string_field(), gen_float_vec_field()]
|
||||
fields.extend(vec_fields)
|
||||
primary_field = ct.default_int64_field_name
|
||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=ct.default_desc,
|
||||
primary_field=primary_field, auto_id=False)
|
||||
return schema
|
||||
|
||||
def gen_schema_multi_string_fields(string_fields):
|
||||
fields =[gen_int64_field(), gen_float_field(),gen_string_field(),gen_float_vec_field()]
|
||||
fields.extend(string_fields)
|
||||
primary_field = ct.default_int64_field_name
|
||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=ct.default_desc,
|
||||
primary_field=primary_field, auto_id=False)
|
||||
return schema
|
||||
|
||||
|
||||
|
||||
def gen_vectors(nb, dim):
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
|
||||
vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
|
||||
return vectors.tolist()
|
||||
|
||||
def gen_string(nb):
|
||||
string_values = [str(random.random()) for _ in range(nb)]
|
||||
return string_values
|
||||
|
||||
|
||||
def gen_binary_vectors(num, dim):
|
||||
raw_vectors = []
|
||||
|
@ -162,11 +188,13 @@ def gen_binary_vectors(num, dim):
|
|||
|
||||
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0):
|
||||
int_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||
float_values = pd.Series(data=[float(i) for i in range(start, start + nb)], dtype="float32")
|
||||
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
|
||||
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
|
||||
float_vec_values = gen_vectors(nb, dim)
|
||||
df = pd.DataFrame({
|
||||
ct.default_int64_field_name: int_values,
|
||||
ct.default_float_field_name: float_values,
|
||||
ct.default_string_field_name: string_values,
|
||||
ct.default_float_vec_field_name: float_vec_values
|
||||
})
|
||||
return df
|
||||
|
@ -181,9 +209,11 @@ def gen_dataframe_multi_vec_fields(vec_fields, nb=ct.default_nb):
|
|||
"""
|
||||
int_values = pd.Series(data=[i for i in range(0, nb)])
|
||||
float_values = pd.Series(data=[float(i) for i in range(nb)], dtype="float32")
|
||||
string_values = pd.Series(data=[str(i) for i in range(nb)], dtype="string")
|
||||
df = pd.DataFrame({
|
||||
ct.default_int64_field_name: int_values,
|
||||
ct.default_float_field_name: float_values,
|
||||
ct.default_string_field_name: string_values,
|
||||
ct.default_float_vec_field_name: gen_vectors(nb, ct.default_dim)
|
||||
})
|
||||
for field in vec_fields:
|
||||
|
@ -195,6 +225,28 @@ def gen_dataframe_multi_vec_fields(vec_fields, nb=ct.default_nb):
|
|||
df[field.name] = vec_values
|
||||
return df
|
||||
|
||||
def gen_dataframe_multi_string_fields(string_fields, nb=ct.default_nb):
|
||||
"""
|
||||
gen dataframe data for fields: int64, float, float_vec and vec_fields
|
||||
:param nb: num of entities, default default_nb
|
||||
:param vec_fields: list of FieldSchema
|
||||
:return: dataframe
|
||||
"""
|
||||
int_values = pd.Series(data=[i for i in range(0, nb)])
|
||||
float_values = pd.Series(data=[float(i) for i in range(nb)], dtype="float32")
|
||||
string_values = pd.Series(data=[str(i) for i in range(nb)], dtype="string")
|
||||
df = pd.DataFrame({
|
||||
ct.default_int64_field_name: int_values,
|
||||
ct.default_float_field_name: float_values,
|
||||
ct.default_string_field_name: string_values,
|
||||
ct.default_float_vec_field_name: gen_vectors(nb, ct.default_dim)
|
||||
})
|
||||
for field in string_fields:
|
||||
if field.dtype == DataType.VARCHAR:
|
||||
string_values = gen_string(nb)
|
||||
df[field.name] = string_values
|
||||
return df
|
||||
|
||||
|
||||
def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
|
||||
int64_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||
|
@ -202,9 +254,9 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
|
|||
int16_values = pd.Series(data=[np.int16(i) for i in range(start, start + nb)], dtype="int16")
|
||||
int8_values = pd.Series(data=[np.int8(i) for i in range(start, start + nb)], dtype="int8")
|
||||
bool_values = pd.Series(data=[np.bool(i) for i in range(start, start + nb)], dtype="bool")
|
||||
float_values = pd.Series(data=[float(i) for i in range(start, start + nb)], dtype="float32")
|
||||
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
|
||||
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
|
||||
double_values = pd.Series(data=[np.double(i) for i in range(start, start + nb)], dtype="double")
|
||||
# string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
|
||||
float_vec_values = gen_vectors(nb, dim)
|
||||
df = pd.DataFrame({
|
||||
ct.default_int64_field_name: int64_values,
|
||||
|
@ -213,7 +265,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
|
|||
ct.default_int8_field_name: int8_values,
|
||||
ct.default_bool_field_name: bool_values,
|
||||
ct.default_float_field_name: float_values,
|
||||
# ct.default_string_field_name: string_values,
|
||||
ct.default_string_field_name: string_values,
|
||||
ct.default_double_field_name: double_values,
|
||||
ct.default_float_vec_field_name: float_vec_values
|
||||
})
|
||||
|
@ -223,10 +275,12 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
|
|||
def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0):
|
||||
int_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||
float_values = pd.Series(data=[float(i) for i in range(start, start + nb)], dtype="float32")
|
||||
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
|
||||
binary_raw_values, binary_vec_values = gen_binary_vectors(nb, dim)
|
||||
df = pd.DataFrame({
|
||||
ct.default_int64_field_name: int_values,
|
||||
ct.default_float_field_name: float_values,
|
||||
ct.default_string_field_name: string_values,
|
||||
ct.default_binary_vec_field_name: binary_vec_values
|
||||
})
|
||||
return df, binary_raw_values
|
||||
|
@ -235,32 +289,36 @@ def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, star
|
|||
def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim):
|
||||
int_values = [i for i in range(nb)]
|
||||
float_values = [np.float32(i) for i in range(nb)]
|
||||
string_values = [str(i) for i in range(nb)]
|
||||
float_vec_values = gen_vectors(nb, dim)
|
||||
data = [int_values, float_values, float_vec_values]
|
||||
data = [int_values, float_values, string_values, float_vec_values]
|
||||
return data
|
||||
|
||||
|
||||
def gen_default_tuple_data(nb=ct.default_nb, dim=ct.default_dim):
|
||||
int_values = [i for i in range(nb)]
|
||||
float_values = [float(i) for i in range(nb)]
|
||||
float_values = [np.float32(i) for i in range(nb)]
|
||||
string_values = [str(i) for i in range(nb)]
|
||||
float_vec_values = gen_vectors(nb, dim)
|
||||
data = (int_values, float_values, float_vec_values)
|
||||
data = (int_values, float_values, string_values, float_vec_values)
|
||||
return data
|
||||
|
||||
|
||||
def gen_numpy_data(nb=ct.default_nb, dim=ct.default_dim):
|
||||
int_values = np.arange(nb, dtype='int64')
|
||||
float_values = np.arange(nb, dtype='float32')
|
||||
string_values = [np.str(i) for i in range(nb)]
|
||||
float_vec_values = gen_vectors(nb, dim)
|
||||
data = [int_values, float_values, float_vec_values]
|
||||
data = [int_values, float_values, string_values, float_vec_values]
|
||||
return data
|
||||
|
||||
|
||||
def gen_default_binary_list_data(nb=ct.default_nb, dim=ct.default_dim):
|
||||
int_values = [i for i in range(nb)]
|
||||
float_values = [np.float32(i) for i in range(nb)]
|
||||
string_values = [str(i) for i in range(nb)]
|
||||
binary_raw_values, binary_vec_values = gen_binary_vectors(nb, dim)
|
||||
data = [int_values, float_values, binary_vec_values]
|
||||
data = [int_values, float_values, string_values, binary_vec_values]
|
||||
return data, binary_raw_values
|
||||
|
||||
|
||||
|
@ -366,6 +424,27 @@ def gen_normal_expressions():
|
|||
return expressions
|
||||
|
||||
|
||||
def gen_normal_string_expressions(field):
|
||||
expressions = [
|
||||
f"\"0\"< {field} < \"3\"",
|
||||
f"{field} >= \"0\"",
|
||||
f"({field} > \"0\" && {field} < \"100\") or ({field} > \"200\" && {field} < \"300\")",
|
||||
f"\"0\" <= {field} <= \"100\"",
|
||||
f"{field} == \"0\"|| {field} == \"1\"|| {field} ==\"2\"",
|
||||
f"{field} != \"0\"",
|
||||
f"{field} not in [\"0\", \"1\", \"2\"]",
|
||||
f"{field} in [\"0\", \"1\", \"2\"]"
|
||||
]
|
||||
return expressions
|
||||
|
||||
def gen_invaild_string_expressions():
|
||||
expressions = [
|
||||
"varchar in [0, \"1\"]",
|
||||
"varchar not in [\"0\", 1, 2]"
|
||||
]
|
||||
return expressions
|
||||
|
||||
|
||||
def gen_normal_expressions_field(field):
|
||||
expressions = [
|
||||
"",
|
||||
|
|
|
@ -26,13 +26,14 @@ default_int32_field_name = "int32"
|
|||
default_int64_field_name = "int64"
|
||||
default_float_field_name = "float"
|
||||
default_double_field_name = "double"
|
||||
default_string_field_name = "string"
|
||||
default_string_field_name = "varchar"
|
||||
default_float_vec_field_name = "float_vector"
|
||||
another_float_vec_field_name = "float_vector1"
|
||||
default_binary_vec_field_name = "binary_vector"
|
||||
default_partition_name = "_default"
|
||||
default_tag = "1970_01_01"
|
||||
row_count = "row_count"
|
||||
default_length = 65535
|
||||
default_desc = ""
|
||||
default_collection_desc = "default collection"
|
||||
default_index_name = "default_index_name"
|
||||
|
@ -50,7 +51,7 @@ compact_delta_ratio_reciprocal = 5 # compact_delta_binlog_ratio is 0.2
|
|||
compact_retention_duration = 40 # compaction travel time retention range 20s
|
||||
max_compaction_interval = 60 # the max time interval (s) from the last compaction
|
||||
max_field_num = 256 # Maximum number of fields in a collection
|
||||
default_replica_num = 1 # default memory replica number
|
||||
default_replica_num = 1
|
||||
|
||||
Not_Exist = "Not_Exist"
|
||||
Connect_Object_Name = True
|
||||
|
@ -145,6 +146,7 @@ get_dict_without_host_port = [
|
|||
{"": ""}
|
||||
]
|
||||
|
||||
|
||||
get_wrong_format_dict = [
|
||||
{"host": "string_host", "port": {}},
|
||||
{"host": 0, "port": 19520}
|
||||
|
|
|
@ -9,7 +9,7 @@ allure-pytest==2.7.0
|
|||
pytest-print==0.2.1
|
||||
pytest-level==0.1.1
|
||||
pytest-xdist==2.2.1
|
||||
pymilvus==2.1.0.dev44
|
||||
pymilvus==2.1.0.dev47
|
||||
pytest-rerunfailures==9.1.1
|
||||
git+https://github.com/Projectplace/pytest-tags
|
||||
ndg-httpsclient
|
||||
|
|
|
@ -382,7 +382,7 @@ class TestQueryParams(TestcaseBase):
|
|||
values = df[field].tolist()
|
||||
pos = 100
|
||||
term_expr = f'{field} not in {values[pos:]}'
|
||||
res = df.iloc[:pos, :2].to_dict('records')
|
||||
res = df.iloc[:pos, :3].to_dict('records')
|
||||
self.collection_wrap.query(term_expr, output_fields=["*"],
|
||||
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
|
||||
|
||||
|
@ -538,7 +538,7 @@ class TestQueryParams(TestcaseBase):
|
|||
df = cf.gen_default_dataframe_data()
|
||||
collection_w.insert(df)
|
||||
assert collection_w.num_entities == ct.default_nb
|
||||
all_fields = [ct.default_int64_field_name, ct.default_float_field_name, ct.default_float_vec_field_name]
|
||||
all_fields = [ct.default_int64_field_name, ct.default_float_field_name, ct.default_string_field_name, ct.default_float_vec_field_name]
|
||||
res = df.iloc[:2].to_dict('records')
|
||||
collection_w.load()
|
||||
actual_res, _ = collection_w.query(default_term_expr, output_fields=all_fields,
|
||||
|
@ -691,7 +691,7 @@ class TestQueryParams(TestcaseBase):
|
|||
df = vectors[0]
|
||||
|
||||
# query with wildcard scale(*)
|
||||
output_fields = [ct.default_int64_field_name, ct.default_float_field_name]
|
||||
output_fields = [ct.default_int64_field_name, ct.default_float_field_name, ct.default_string_field_name]
|
||||
res = df.loc[:1, output_fields].to_dict('records')
|
||||
collection_w.query(default_term_expr, output_fields=["*"],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
|
@ -723,14 +723,14 @@ class TestQueryParams(TestcaseBase):
|
|||
df = vectors[0]
|
||||
|
||||
# query with output_fields=["*", float_vector)
|
||||
res = df.iloc[:2, :3].to_dict('records')
|
||||
res = df.iloc[:2, :4].to_dict('records')
|
||||
collection_w.load()
|
||||
collection_w.query(default_term_expr, output_fields=["*", ct.default_float_vec_field_name],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={exp_res: res, "with_vec": True})
|
||||
|
||||
# query with output_fields=["*", float)
|
||||
res2 = df.iloc[:2, :2].to_dict('records')
|
||||
res2 = df.iloc[:2, :3].to_dict('records')
|
||||
collection_w.load()
|
||||
collection_w.query(default_term_expr, output_fields=["*", ct.default_float_field_name],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
|
|
Loading…
Reference in New Issue