[test]Update common func with string (#16803)

Signed-off-by: jingkl <jingjing.jia@zilliz.com>
pull/16817/head
jingkl 2022-05-06 20:17:51 +08:00 committed by GitHub
parent b60998038f
commit a0110998e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 102 additions and 21 deletions

View File

@ -48,6 +48,10 @@ def gen_bool_field(name=ct.default_bool_field_name, description=ct.default_desc,
is_primary=is_primary, **kwargs)
return bool_field
def gen_string_field(name=ct.default_string_field_name, description=ct.default_desc, is_primary=False, max_length_per_row=ct.default_length, **kwargs):
string_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.VARCHAR, description=description, max_length_per_row=max_length_per_row,
is_primary=is_primary, **kwargs)
return string_field
def gen_int8_field(name=ct.default_int8_field_name, description=ct.default_desc, is_primary=False, **kwargs):
int8_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT8, description=description,
@ -103,7 +107,16 @@ def gen_binary_vec_field(name=ct.default_binary_vec_field_name, is_primary=False
def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim):
fields = [gen_int64_field(), gen_float_field(), gen_float_vec_field(dim=dim)]
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_float_vec_field(dim=dim)]
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id)
return schema
def gen_string_pk_default_collection_schema(description=ct.default_desc, primary_field=ct.default_string_field_name,
auto_id=False, dim=ct.default_dim):
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_float_vec_field(dim=dim)]
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id)
return schema
@ -113,7 +126,7 @@ def gen_collection_schema_all_datatype(description=ct.default_desc,
primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim):
fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(),
gen_bool_field(), gen_float_field(), gen_double_field(), gen_float_vec_field(dim=dim)]
gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(), gen_float_vec_field(dim=dim)]
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id)
return schema
@ -127,7 +140,7 @@ def gen_collection_schema(fields, primary_field=None, description=ct.default_des
def gen_default_binary_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim):
fields = [gen_int64_field(), gen_float_field(), gen_binary_vec_field(dim=dim)]
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_binary_vec_field(dim=dim)]
binary_schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field,
auto_id=auto_id)
@ -135,19 +148,32 @@ def gen_default_binary_collection_schema(description=ct.default_desc, primary_fi
def gen_schema_multi_vector_fields(vec_fields):
fields = [gen_int64_field(), gen_float_field(), gen_float_vec_field()]
fields = [gen_int64_field(), gen_float_field(),gen_string_field(), gen_float_vec_field()]
fields.extend(vec_fields)
primary_field = ct.default_int64_field_name
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=ct.default_desc,
primary_field=primary_field, auto_id=False)
return schema
def gen_schema_multi_string_fields(string_fields):
fields =[gen_int64_field(), gen_float_field(),gen_string_field(),gen_float_vec_field()]
fields.extend(string_fields)
primary_field = ct.default_int64_field_name
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=ct.default_desc,
primary_field=primary_field, auto_id=False)
return schema
def gen_vectors(nb, dim):
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
return vectors.tolist()
def gen_string(nb):
string_values = [str(random.random()) for _ in range(nb)]
return string_values
def gen_binary_vectors(num, dim):
raw_vectors = []
@ -162,11 +188,13 @@ def gen_binary_vectors(num, dim):
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0):
int_values = pd.Series(data=[i for i in range(start, start + nb)])
float_values = pd.Series(data=[float(i) for i in range(start, start + nb)], dtype="float32")
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
float_vec_values = gen_vectors(nb, dim)
df = pd.DataFrame({
ct.default_int64_field_name: int_values,
ct.default_float_field_name: float_values,
ct.default_string_field_name: string_values,
ct.default_float_vec_field_name: float_vec_values
})
return df
@ -181,9 +209,11 @@ def gen_dataframe_multi_vec_fields(vec_fields, nb=ct.default_nb):
"""
int_values = pd.Series(data=[i for i in range(0, nb)])
float_values = pd.Series(data=[float(i) for i in range(nb)], dtype="float32")
string_values = pd.Series(data=[str(i) for i in range(nb)], dtype="string")
df = pd.DataFrame({
ct.default_int64_field_name: int_values,
ct.default_float_field_name: float_values,
ct.default_string_field_name: string_values,
ct.default_float_vec_field_name: gen_vectors(nb, ct.default_dim)
})
for field in vec_fields:
@ -195,6 +225,28 @@ def gen_dataframe_multi_vec_fields(vec_fields, nb=ct.default_nb):
df[field.name] = vec_values
return df
def gen_dataframe_multi_string_fields(string_fields, nb=ct.default_nb):
"""
gen dataframe data for fields: int64, float, float_vec and vec_fields
:param nb: num of entities, default default_nb
:param vec_fields: list of FieldSchema
:return: dataframe
"""
int_values = pd.Series(data=[i for i in range(0, nb)])
float_values = pd.Series(data=[float(i) for i in range(nb)], dtype="float32")
string_values = pd.Series(data=[str(i) for i in range(nb)], dtype="string")
df = pd.DataFrame({
ct.default_int64_field_name: int_values,
ct.default_float_field_name: float_values,
ct.default_string_field_name: string_values,
ct.default_float_vec_field_name: gen_vectors(nb, ct.default_dim)
})
for field in string_fields:
if field.dtype == DataType.VARCHAR:
string_values = gen_string(nb)
df[field.name] = string_values
return df
def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
int64_values = pd.Series(data=[i for i in range(start, start + nb)])
@ -202,9 +254,9 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
int16_values = pd.Series(data=[np.int16(i) for i in range(start, start + nb)], dtype="int16")
int8_values = pd.Series(data=[np.int8(i) for i in range(start, start + nb)], dtype="int8")
bool_values = pd.Series(data=[np.bool(i) for i in range(start, start + nb)], dtype="bool")
float_values = pd.Series(data=[float(i) for i in range(start, start + nb)], dtype="float32")
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
double_values = pd.Series(data=[np.double(i) for i in range(start, start + nb)], dtype="double")
# string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
float_vec_values = gen_vectors(nb, dim)
df = pd.DataFrame({
ct.default_int64_field_name: int64_values,
@ -213,7 +265,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
ct.default_int8_field_name: int8_values,
ct.default_bool_field_name: bool_values,
ct.default_float_field_name: float_values,
# ct.default_string_field_name: string_values,
ct.default_string_field_name: string_values,
ct.default_double_field_name: double_values,
ct.default_float_vec_field_name: float_vec_values
})
@ -223,10 +275,12 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0):
int_values = pd.Series(data=[i for i in range(start, start + nb)])
float_values = pd.Series(data=[float(i) for i in range(start, start + nb)], dtype="float32")
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
binary_raw_values, binary_vec_values = gen_binary_vectors(nb, dim)
df = pd.DataFrame({
ct.default_int64_field_name: int_values,
ct.default_float_field_name: float_values,
ct.default_string_field_name: string_values,
ct.default_binary_vec_field_name: binary_vec_values
})
return df, binary_raw_values
@ -235,32 +289,36 @@ def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, star
def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim):
int_values = [i for i in range(nb)]
float_values = [np.float32(i) for i in range(nb)]
string_values = [str(i) for i in range(nb)]
float_vec_values = gen_vectors(nb, dim)
data = [int_values, float_values, float_vec_values]
data = [int_values, float_values, string_values, float_vec_values]
return data
def gen_default_tuple_data(nb=ct.default_nb, dim=ct.default_dim):
int_values = [i for i in range(nb)]
float_values = [float(i) for i in range(nb)]
float_values = [np.float32(i) for i in range(nb)]
string_values = [str(i) for i in range(nb)]
float_vec_values = gen_vectors(nb, dim)
data = (int_values, float_values, float_vec_values)
data = (int_values, float_values, string_values, float_vec_values)
return data
def gen_numpy_data(nb=ct.default_nb, dim=ct.default_dim):
int_values = np.arange(nb, dtype='int64')
float_values = np.arange(nb, dtype='float32')
string_values = [np.str(i) for i in range(nb)]
float_vec_values = gen_vectors(nb, dim)
data = [int_values, float_values, float_vec_values]
data = [int_values, float_values, string_values, float_vec_values]
return data
def gen_default_binary_list_data(nb=ct.default_nb, dim=ct.default_dim):
int_values = [i for i in range(nb)]
float_values = [np.float32(i) for i in range(nb)]
string_values = [str(i) for i in range(nb)]
binary_raw_values, binary_vec_values = gen_binary_vectors(nb, dim)
data = [int_values, float_values, binary_vec_values]
data = [int_values, float_values, string_values, binary_vec_values]
return data, binary_raw_values
@ -366,6 +424,27 @@ def gen_normal_expressions():
return expressions
def gen_normal_string_expressions(field):
expressions = [
f"\"0\"< {field} < \"3\"",
f"{field} >= \"0\"",
f"({field} > \"0\" && {field} < \"100\") or ({field} > \"200\" && {field} < \"300\")",
f"\"0\" <= {field} <= \"100\"",
f"{field} == \"0\"|| {field} == \"1\"|| {field} ==\"2\"",
f"{field} != \"0\"",
f"{field} not in [\"0\", \"1\", \"2\"]",
f"{field} in [\"0\", \"1\", \"2\"]"
]
return expressions
def gen_invaild_string_expressions():
expressions = [
"varchar in [0, \"1\"]",
"varchar not in [\"0\", 1, 2]"
]
return expressions
def gen_normal_expressions_field(field):
expressions = [
"",

View File

@ -26,13 +26,14 @@ default_int32_field_name = "int32"
default_int64_field_name = "int64"
default_float_field_name = "float"
default_double_field_name = "double"
default_string_field_name = "string"
default_string_field_name = "varchar"
default_float_vec_field_name = "float_vector"
another_float_vec_field_name = "float_vector1"
default_binary_vec_field_name = "binary_vector"
default_partition_name = "_default"
default_tag = "1970_01_01"
row_count = "row_count"
default_length = 65535
default_desc = ""
default_collection_desc = "default collection"
default_index_name = "default_index_name"
@ -50,7 +51,7 @@ compact_delta_ratio_reciprocal = 5 # compact_delta_binlog_ratio is 0.2
compact_retention_duration = 40 # compaction travel time retention range 20s
max_compaction_interval = 60 # the max time interval (s) from the last compaction
max_field_num = 256 # Maximum number of fields in a collection
default_replica_num = 1 # default memory replica number
default_replica_num = 1
Not_Exist = "Not_Exist"
Connect_Object_Name = True
@ -145,6 +146,7 @@ get_dict_without_host_port = [
{"": ""}
]
get_wrong_format_dict = [
{"host": "string_host", "port": {}},
{"host": 0, "port": 19520}

View File

@ -9,7 +9,7 @@ allure-pytest==2.7.0
pytest-print==0.2.1
pytest-level==0.1.1
pytest-xdist==2.2.1
pymilvus==2.1.0.dev44
pymilvus==2.1.0.dev47
pytest-rerunfailures==9.1.1
git+https://github.com/Projectplace/pytest-tags
ndg-httpsclient

View File

@ -382,7 +382,7 @@ class TestQueryParams(TestcaseBase):
values = df[field].tolist()
pos = 100
term_expr = f'{field} not in {values[pos:]}'
res = df.iloc[:pos, :2].to_dict('records')
res = df.iloc[:pos, :3].to_dict('records')
self.collection_wrap.query(term_expr, output_fields=["*"],
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@ -538,7 +538,7 @@ class TestQueryParams(TestcaseBase):
df = cf.gen_default_dataframe_data()
collection_w.insert(df)
assert collection_w.num_entities == ct.default_nb
all_fields = [ct.default_int64_field_name, ct.default_float_field_name, ct.default_float_vec_field_name]
all_fields = [ct.default_int64_field_name, ct.default_float_field_name, ct.default_string_field_name, ct.default_float_vec_field_name]
res = df.iloc[:2].to_dict('records')
collection_w.load()
actual_res, _ = collection_w.query(default_term_expr, output_fields=all_fields,
@ -691,7 +691,7 @@ class TestQueryParams(TestcaseBase):
df = vectors[0]
# query with wildcard scale(*)
output_fields = [ct.default_int64_field_name, ct.default_float_field_name]
output_fields = [ct.default_int64_field_name, ct.default_float_field_name, ct.default_string_field_name]
res = df.loc[:1, output_fields].to_dict('records')
collection_w.query(default_term_expr, output_fields=["*"],
check_task=CheckTasks.check_query_results,
@ -723,14 +723,14 @@ class TestQueryParams(TestcaseBase):
df = vectors[0]
# query with output_fields=["*", float_vector)
res = df.iloc[:2, :3].to_dict('records')
res = df.iloc[:2, :4].to_dict('records')
collection_w.load()
collection_w.query(default_term_expr, output_fields=["*", ct.default_float_vec_field_name],
check_task=CheckTasks.check_query_results,
check_items={exp_res: res, "with_vec": True})
# query with output_fields=["*", float)
res2 = df.iloc[:2, :2].to_dict('records')
res2 = df.iloc[:2, :3].to_dict('records')
collection_w.load()
collection_w.query(default_term_expr, output_fields=["*", ct.default_float_field_name],
check_task=CheckTasks.check_query_results,