Add test data and auto id (async) support for test search cases (#6616)

Signed-off-by: Binbin Lv <binbin.lv@zilliz.com>
pull/6682/head
binbin 2021-07-20 22:29:53 +08:00 committed by GitHub
parent 744f2c7f51
commit 7eacc31a47
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 1328 additions and 834 deletions

View File

@ -124,8 +124,8 @@ class TestcaseBase(Base):
**kwargs)
return partition_wrap
def init_collection_general(self, prefix, insert_data=False, nb=ct.default_nb,
partition_num=0, is_binary=False, is_all_data_type=False):
def init_collection_general(self, prefix, insert_data=False, nb=ct.default_nb, partition_num=0,
is_binary=False, is_all_data_type=False, auto_id=False, dim=ct.default_dim):
"""
target: create specified collections
method: 1. create collections (binary/non-binary)
@ -141,11 +141,11 @@ class TestcaseBase(Base):
binary_raw_vectors = []
insert_ids = []
# 1 create collection
default_schema = cf.gen_default_collection_schema()
default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim)
if is_binary:
default_schema = cf.gen_default_binary_collection_schema()
default_schema = cf.gen_default_binary_collection_schema(auto_id=auto_id, dim=dim)
if is_all_data_type:
default_schema = cf.gen_collection_schema_all_datatype()
default_schema = cf.gen_collection_schema_all_datatype(auto_id=auto_id, dim=dim)
log.info("init_collection_general: collection creation")
collection_w = self.init_collection_wrap(name=collection_name,
schema=default_schema)
@ -155,7 +155,8 @@ class TestcaseBase(Base):
# 3 insert data if specified
if insert_data:
collection_w, vectors, binary_raw_vectors, insert_ids = \
cf.insert_data(collection_w, nb, is_binary, is_all_data_type)
cf.insert_data(collection_w, nb, is_binary, is_all_data_type,
auto_id=auto_id, dim=dim)
assert collection_w.is_empty is False
assert collection_w.num_entities == nb
collection_w.load()

View File

@ -167,6 +167,10 @@ class ResponseChecker:
log.warning("The function name is {} rather than {}".format(func_name, "search"))
if len(check_items) == 0:
raise Exception("No expect values found in the check task")
if check_items.get("_async", None):
if check_items["_async"]:
search_res.done()
search_res = search_res.result()
if len(search_res) != check_items["nq"]:
log.error("search_results_check: Numbers of query searched (%d) "
"is not equal with expected (%d)"

View File

@ -84,8 +84,8 @@ def gen_binary_vec_field(name=ct.default_binary_vec_field_name, is_primary=False
def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
auto_id=False):
fields = [gen_int64_field(), gen_float_field(), gen_float_vec_field()]
auto_id=False, dim=ct.default_dim):
fields = [gen_int64_field(), gen_float_field(), gen_float_vec_field(dim=dim)]
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id)
return schema
@ -93,9 +93,9 @@ def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.
def gen_collection_schema_all_datatype(description=ct.default_desc,
primary_field=ct.default_int64_field_name,
auto_id=False):
auto_id=False, dim=ct.default_dim):
fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(),
gen_bool_field(), gen_float_field(), gen_double_field(), gen_float_vec_field()]
gen_bool_field(), gen_float_field(), gen_double_field(), gen_float_vec_field(dim=dim)]
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id)
return schema
@ -107,10 +107,12 @@ def gen_collection_schema(fields, primary_field=None, description=ct.default_des
return schema
def gen_default_binary_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name):
fields = [gen_int64_field(), gen_float_field(), gen_binary_vec_field()]
def gen_default_binary_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim):
fields = [gen_int64_field(), gen_float_field(), gen_binary_vec_field(dim=dim)]
binary_schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field)
primary_field=primary_field,
auto_id=auto_id)
return binary_schema
@ -253,6 +255,18 @@ def gen_normal_expressions():
return expressions
def gen_normal_expressions_field(field):
expressions = [
"",
f"{field} > 0",
f"({field} > 0 && {field} < 400) or ({field} > 500 && {field} < 1000)",
f"{field} not in [1, 2, 3]",
f"{field} in [1, 2, 3] and {field} != 2",
f"{field} == 0 || {field} == 1 || {field} == 2",
]
return expressions
def jaccard(x, y):
x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool)
@ -340,7 +354,8 @@ def gen_partitions(collection_w, partition_num=1):
log.info("gen_partitions: created partitions %s" % par)
def insert_data(collection_w, nb=3000, is_binary=False, is_all_data_type=False):
def insert_data(collection_w, nb=3000, is_binary=False, is_all_data_type=False,
auto_id=False, dim=ct.default_dim):
"""
target: insert non-binary/binary data
method: insert non-binary/binary data into partitions if any
@ -354,12 +369,14 @@ def insert_data(collection_w, nb=3000, is_binary=False, is_all_data_type=False):
log.info("insert_data: inserting data into collection %s (num_entities: %s)"
% (collection_w.name, nb))
for i in range(num):
default_data = gen_default_dataframe_data(nb // num)
default_data = gen_default_dataframe_data(nb // num, dim=dim)
if is_binary:
default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num)
default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num, dim=dim)
binary_raw_vectors.extend(binary_raw_data)
if is_all_data_type:
default_data = gen_dataframe_all_data_type(nb // num)
default_data = gen_dataframe_all_data_type(nb // num, dim=dim)
if auto_id:
default_data.drop(ct.default_int64_field_name, axis=1, inplace=True)
insert_res = collection_w.insert(default_data, par[i].name)[0]
insert_ids.extend(insert_res.primary_keys)
vectors.append(default_data)

View File

@ -8,6 +8,7 @@ big_flush_interval = 1000
default_drop_interval = 3
default_dim = 128
default_nb = 1200
default_nb_medium = 5000
default_top_k = 10
default_nq = 2
default_limit = 10
@ -38,6 +39,8 @@ int_field_desc = "int64 type field"
float_field_desc = "float type field"
float_vec_field_desc = "float vector type field"
binary_vec_field_desc = "binary vector type field"
max_dim = 32768
gracefulTime = 1
Not_Exist = "Not_Exist"
Connect_Object_Name = "Milvus"
@ -56,6 +59,8 @@ get_invalid_strs = [
(1,),
{1: 1},
None,
"",
" ",
"12-s",
"12 s",
"(mn)",
@ -74,6 +79,36 @@ get_not_string = [
[1, "2", 3]
]
get_invalid_vectors = [
"1*2",
[1],
[1, 2],
[" "],
['a'],
[None],
None,
(1, 2),
{"a": 1},
" ",
"",
"String",
" siede ",
"中文",
"a".join("a" for i in range(256))
]
get_invalid_ints = [
1.0,
None,
[1, 2, 3],
" ",
"",
-1,
"String",
"=c",
"中文",
"a".join("a" for i in range(256))
]
get_dict_without_host_port = [
{"host": "host"},

File diff suppressed because it is too large Load Diff