mirror of https://github.com/milvus-io/milvus.git
Add json and dynamic support cases (#24525)
Signed-off-by: Binbin Lv <binbin.lv@zilliz.com>pull/24672/head
parent
732fe54775
commit
c9997a2703
|
@ -169,7 +169,7 @@ class TestcaseBase(Base):
|
|||
if is_all_data_type:
|
||||
default_schema = cf.gen_collection_schema_all_datatype(auto_id=auto_id, dim=dim,
|
||||
primary_field=primary_field)
|
||||
log.info("init_collection_general: collection creation")
|
||||
log.info("insert_data_general: collection creation")
|
||||
collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs)
|
||||
pre_entities = collection_w.num_entities
|
||||
if insert_data:
|
||||
|
@ -184,7 +184,8 @@ class TestcaseBase(Base):
|
|||
def init_collection_general(self, prefix="test", insert_data=False, nb=ct.default_nb,
|
||||
partition_num=0, is_binary=False, is_all_data_type=False,
|
||||
auto_id=False, dim=ct.default_dim, is_index=True,
|
||||
primary_field=ct.default_int64_field_name, is_flush=True, name=None, **kwargs):
|
||||
primary_field=ct.default_int64_field_name, is_flush=True, name=None,
|
||||
enable_dynamic_field=False, with_json=True, **kwargs):
|
||||
"""
|
||||
target: create specified collections
|
||||
method: 1. create collections (binary/non-binary, default/all data type, auto_id or not)
|
||||
|
@ -204,13 +205,17 @@ class TestcaseBase(Base):
|
|||
insert_ids = []
|
||||
time_stamp = 0
|
||||
# 1 create collection
|
||||
default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field)
|
||||
default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field,
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
with_json=with_json)
|
||||
if is_binary:
|
||||
default_schema = cf.gen_default_binary_collection_schema(auto_id=auto_id, dim=dim,
|
||||
primary_field=primary_field)
|
||||
if is_all_data_type:
|
||||
default_schema = cf.gen_collection_schema_all_datatype(auto_id=auto_id, dim=dim,
|
||||
primary_field=primary_field)
|
||||
primary_field=primary_field,
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
with_json=with_json)
|
||||
log.info("init_collection_general: collection creation")
|
||||
collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs)
|
||||
# 2 add extra partitions if specified (default is 1 partition named "_default")
|
||||
|
@ -219,7 +224,8 @@ class TestcaseBase(Base):
|
|||
# 3 insert data if specified
|
||||
if insert_data:
|
||||
collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp = \
|
||||
cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id, dim=dim)
|
||||
cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id,
|
||||
dim=dim, enable_dynamic_field=enable_dynamic_field, with_json=with_json)
|
||||
if is_flush:
|
||||
assert collection_w.is_empty is False
|
||||
assert collection_w.num_entities == nb
|
||||
|
|
|
@ -296,7 +296,8 @@ class ResponseChecker:
|
|||
primary_field = check_items.get("primary_field", None)
|
||||
if exp_res is not None:
|
||||
if isinstance(query_res, list):
|
||||
assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=primary_field, with_vec=with_vec)
|
||||
assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=primary_field,
|
||||
with_vec=with_vec)
|
||||
return True
|
||||
else:
|
||||
log.error(f"Query result {query_res} is not list")
|
||||
|
|
|
@ -81,6 +81,12 @@ def gen_string_field(name=ct.default_string_field_name, description=ct.default_d
|
|||
return string_field
|
||||
|
||||
|
||||
def gen_json_field(name=ct.default_json_field_name, description=ct.default_desc, is_primary=False, **kwargs):
|
||||
json_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.JSON, description=description,
|
||||
is_primary=is_primary, **kwargs)
|
||||
return json_field
|
||||
|
||||
|
||||
def gen_int8_field(name=ct.default_int8_field_name, description=ct.default_desc, is_primary=False, **kwargs):
|
||||
int8_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT8, description=description,
|
||||
is_primary=is_primary, **kwargs)
|
||||
|
@ -134,10 +140,24 @@ def gen_binary_vec_field(name=ct.default_binary_vec_field_name, is_primary=False
|
|||
|
||||
|
||||
def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
|
||||
auto_id=False, dim=ct.default_dim):
|
||||
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_float_vec_field(dim=dim)]
|
||||
auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True):
|
||||
if enable_dynamic_field:
|
||||
if primary_field is ct.default_int64_field_name:
|
||||
fields = [gen_int64_field(), gen_float_vec_field(dim=dim)]
|
||||
elif primary_field is ct.default_string_field_name:
|
||||
fields = [gen_string_field(), gen_float_vec_field(dim=dim)]
|
||||
else:
|
||||
log.error("Primary key only support int or varchar")
|
||||
assert False
|
||||
else:
|
||||
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(),
|
||||
gen_float_vec_field(dim=dim)]
|
||||
if with_json is False:
|
||||
fields.remove(gen_json_field())
|
||||
|
||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||
primary_field=primary_field, auto_id=auto_id)
|
||||
primary_field=primary_field, auto_id=auto_id,
|
||||
enable_dynamic_field=enable_dynamic_field)
|
||||
return schema
|
||||
|
||||
|
||||
|
@ -154,7 +174,24 @@ def gen_general_collection_schema(description=ct.default_desc, primary_field=ct.
|
|||
|
||||
def gen_string_pk_default_collection_schema(description=ct.default_desc, primary_field=ct.default_string_field_name,
|
||||
auto_id=False, dim=ct.default_dim):
|
||||
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_float_vec_field(dim=dim)]
|
||||
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(), gen_float_vec_field(dim=dim)]
|
||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||
primary_field=primary_field, auto_id=auto_id)
|
||||
return schema
|
||||
|
||||
|
||||
def gen_json_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
|
||||
auto_id=False, dim=ct.default_dim):
|
||||
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(), gen_float_vec_field(dim=dim)]
|
||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||
primary_field=primary_field, auto_id=auto_id)
|
||||
return schema
|
||||
|
||||
|
||||
def gen_multiple_json_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
|
||||
auto_id=False, dim=ct.default_dim):
|
||||
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(name="json1"),
|
||||
gen_json_field(name="json2"), gen_float_vec_field(dim=dim)]
|
||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||
primary_field=primary_field, auto_id=auto_id)
|
||||
return schema
|
||||
|
@ -162,11 +199,19 @@ def gen_string_pk_default_collection_schema(description=ct.default_desc, primary
|
|||
|
||||
def gen_collection_schema_all_datatype(description=ct.default_desc,
|
||||
primary_field=ct.default_int64_field_name,
|
||||
auto_id=False, dim=ct.default_dim):
|
||||
fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(),
|
||||
gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(), gen_float_vec_field(dim=dim)]
|
||||
auto_id=False, dim=ct.default_dim,
|
||||
enable_dynamic_field=False, with_json=True):
|
||||
if enable_dynamic_field:
|
||||
fields = [gen_int64_field(), gen_float_vec_field(dim=dim)]
|
||||
else:
|
||||
fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(),
|
||||
gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(),
|
||||
gen_json_field(), gen_float_vec_field(dim=dim)]
|
||||
if with_json is False:
|
||||
fields.remove(gen_json_field())
|
||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||
primary_field=primary_field, auto_id=auto_id)
|
||||
primary_field=primary_field, auto_id=auto_id,
|
||||
enable_dynamic_field=enable_dynamic_field)
|
||||
return schema
|
||||
|
||||
|
||||
|
@ -227,29 +272,55 @@ def gen_binary_vectors(num, dim):
|
|||
return raw_vectors, binary_vectors
|
||||
|
||||
|
||||
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0):
|
||||
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True):
|
||||
int_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
|
||||
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
|
||||
json_values = [{"number": i, "float": i*1.0, "string": str(i), "bool": bool(i),
|
||||
"list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(start, start + nb)]
|
||||
float_vec_values = gen_vectors(nb, dim)
|
||||
df = pd.DataFrame({
|
||||
ct.default_int64_field_name: int_values,
|
||||
ct.default_float_field_name: float_values,
|
||||
ct.default_string_field_name: string_values,
|
||||
ct.default_json_field_name: json_values,
|
||||
ct.default_float_vec_field_name: float_vec_values
|
||||
})
|
||||
if with_json is False:
|
||||
df.drop(ct.default_json_field_name, axis=1, inplace=True)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True):
|
||||
array = []
|
||||
for i in range(start, start + nb):
|
||||
dict = {ct.default_int64_field_name: i,
|
||||
ct.default_float_field_name: i*1.0,
|
||||
ct.default_string_field_name: str(i),
|
||||
ct.default_json_field_name: {"number": i, "string": str(i), "bool": bool(i),
|
||||
"list": [j for j in range(0, i)]},
|
||||
ct.default_float_vec_field_name: gen_vectors(1, dim)[0]
|
||||
}
|
||||
if with_json is False:
|
||||
dict.pop(ct.default_json_field_name, None)
|
||||
array.append(dict)
|
||||
|
||||
return array
|
||||
|
||||
|
||||
def gen_default_data_for_upsert(nb=ct.default_nb, dim=ct.default_dim, start=0, size=10000):
|
||||
int_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||
float_values = pd.Series(data=[np.float32(i + size) for i in range(start, start + nb)], dtype="float32")
|
||||
string_values = pd.Series(data=[str(i + size) for i in range(start, start + nb)], dtype="string")
|
||||
json_values = [{"number": i, "string": str(i), "bool": bool(i),
|
||||
"list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(start, start + nb)]
|
||||
float_vec_values = gen_vectors(nb, dim)
|
||||
df = pd.DataFrame({
|
||||
ct.default_int64_field_name: int_values,
|
||||
ct.default_float_field_name: float_values,
|
||||
ct.default_string_field_name: string_values,
|
||||
ct.default_json_field_name: json_values,
|
||||
ct.default_float_vec_field_name: float_vec_values
|
||||
})
|
||||
return df, float_values
|
||||
|
@ -304,7 +375,7 @@ def gen_dataframe_multi_string_fields(string_fields, nb=ct.default_nb):
|
|||
return df
|
||||
|
||||
|
||||
def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
|
||||
def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True):
|
||||
int64_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||
int32_values = pd.Series(data=[np.int32(i) for i in range(start, start + nb)], dtype="int32")
|
||||
int16_values = pd.Series(data=[np.int16(i) for i in range(start, start + nb)], dtype="int16")
|
||||
|
@ -313,6 +384,8 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
|
|||
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
|
||||
double_values = pd.Series(data=[np.double(i) for i in range(start, start + nb)], dtype="double")
|
||||
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
|
||||
json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]}
|
||||
for i in range(start, start + nb)]
|
||||
float_vec_values = gen_vectors(nb, dim)
|
||||
df = pd.DataFrame({
|
||||
ct.default_int64_field_name: int64_values,
|
||||
|
@ -323,11 +396,38 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
|
|||
ct.default_float_field_name: float_values,
|
||||
ct.default_double_field_name: double_values,
|
||||
ct.default_string_field_name: string_values,
|
||||
ct.default_json_field_name: json_values,
|
||||
ct.default_float_vec_field_name: float_vec_values
|
||||
|
||||
})
|
||||
if with_json is False:
|
||||
df.drop(ct.default_json_field_name, axis=1, inplace=True)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True):
|
||||
array = []
|
||||
for i in range(start, start + nb):
|
||||
dict = {ct.default_int64_field_name: i,
|
||||
ct.default_int32_field_name: i,
|
||||
ct.default_int16_field_name: i,
|
||||
ct.default_int8_field_name: i,
|
||||
ct.default_bool_field_name: bool(i),
|
||||
ct.default_float_field_name: i*1.0,
|
||||
ct.default_double_field_name: i*1.0,
|
||||
ct.default_string_field_name: str(i),
|
||||
ct.default_json_field_name: {"number": i, "string": str(i), "bool": bool(i),
|
||||
"list": [j for j in range(0, i)]},
|
||||
ct.default_float_vec_field_name: gen_vectors(1, dim)[0]
|
||||
}
|
||||
if with_json is False:
|
||||
dict.pop(ct.default_json_field_name, None)
|
||||
array.append(dict)
|
||||
|
||||
return array
|
||||
|
||||
|
||||
def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0):
|
||||
int_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
|
||||
|
@ -346,8 +446,10 @@ def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0):
|
|||
int_values = [i for i in range(start, start + nb)]
|
||||
float_values = [np.float32(i) for i in range(start, start + nb)]
|
||||
string_values = [str(i) for i in range(start, start + nb)]
|
||||
json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]}
|
||||
for i in range(start, start + nb)]
|
||||
float_vec_values = gen_vectors(nb, dim)
|
||||
data = [int_values, float_values, string_values, float_vec_values]
|
||||
data = [int_values, float_values, string_values, json_values, float_vec_values]
|
||||
return data
|
||||
|
||||
|
||||
|
@ -421,8 +523,10 @@ def gen_numpy_data(nb=ct.default_nb, dim=ct.default_dim):
|
|||
int_values = np.arange(nb, dtype='int64')
|
||||
float_values = np.arange(nb, dtype='float32')
|
||||
string_values = [np.str_(i) for i in range(nb)]
|
||||
json_values = [{"number": i, "string": str(i), "bool": bool(i),
|
||||
"list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(nb)]
|
||||
float_vec_values = gen_vectors(nb, dim)
|
||||
data = [int_values, float_values, string_values, float_vec_values]
|
||||
data = [int_values, float_values, string_values, json_values, float_vec_values]
|
||||
return data
|
||||
|
||||
|
||||
|
@ -768,7 +872,7 @@ def gen_partitions(collection_w, partition_num=1):
|
|||
|
||||
|
||||
def insert_data(collection_w, nb=3000, is_binary=False, is_all_data_type=False,
|
||||
auto_id=False, dim=ct.default_dim, insert_offset=0):
|
||||
auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True):
|
||||
"""
|
||||
target: insert non-binary/binary data
|
||||
method: insert non-binary/binary data into partitions if any
|
||||
|
@ -782,14 +886,23 @@ def insert_data(collection_w, nb=3000, is_binary=False, is_all_data_type=False,
|
|||
start = insert_offset
|
||||
log.info(f"inserted {nb} data into collection {collection_w.name}")
|
||||
for i in range(num):
|
||||
default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start)
|
||||
log.debug("Dynamic field is enabled: %s" % enable_dynamic_field)
|
||||
default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json)
|
||||
if enable_dynamic_field:
|
||||
default_data = gen_default_rows_data(nb // num, dim=dim, start=start, with_json=with_json)
|
||||
if is_binary:
|
||||
default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num, dim=dim, start=start)
|
||||
binary_raw_vectors.extend(binary_raw_data)
|
||||
if is_all_data_type:
|
||||
default_data = gen_dataframe_all_data_type(nb // num, dim=dim, start=start)
|
||||
default_data = gen_dataframe_all_data_type(nb // num, dim=dim, start=start, with_json=with_json)
|
||||
if enable_dynamic_field:
|
||||
default_data = gen_default_rows_data_all_data_type(nb // num, dim=dim, start=start, with_json=with_json)
|
||||
if auto_id:
|
||||
default_data.drop(ct.default_int64_field_name, axis=1, inplace=True)
|
||||
if enable_dynamic_field:
|
||||
for data in default_data:
|
||||
data.pop(ct.default_int64_field_name, None)
|
||||
else:
|
||||
default_data.drop(ct.default_int64_field_name, axis=1, inplace=True)
|
||||
insert_res = collection_w.insert(default_data, par[i].name)[0]
|
||||
time_stamp = insert_res.timestamp
|
||||
insert_ids.extend(insert_res.primary_keys)
|
||||
|
|
|
@ -33,6 +33,7 @@ default_int64_field_name = "int64"
|
|||
default_float_field_name = "float"
|
||||
default_double_field_name = "double"
|
||||
default_string_field_name = "varchar"
|
||||
default_json_field_name = "json_field"
|
||||
default_float_vec_field_name = "float_vector"
|
||||
another_float_vec_field_name = "float_vector1"
|
||||
default_binary_vec_field_name = "binary_vector"
|
||||
|
@ -42,6 +43,7 @@ default_resource_group_capacity = 1000000
|
|||
default_tag = "1970_01_01"
|
||||
row_count = "row_count"
|
||||
default_length = 65535
|
||||
default_json_list_length = 3
|
||||
default_desc = ""
|
||||
default_collection_desc = "default collection"
|
||||
default_index_name = "default_index_name"
|
||||
|
|
|
@ -327,7 +327,7 @@ class TestCollectionParams(TestcaseBase):
|
|||
expected: raise exception
|
||||
"""
|
||||
self._connect()
|
||||
error = {ct.err_code: 0, ct.err_msg: "Field dtype must be of DataType"}
|
||||
error = {ct.err_code: 1, ct.err_msg: "Field dtype must be of DataType"}
|
||||
self.field_schema_wrap.init_field_schema(name="unknown", dtype=DataType.UNKNOWN,
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
|
@ -560,7 +560,7 @@ class TestCollectionParams(TestcaseBase):
|
|||
"""
|
||||
self._connect()
|
||||
fields = [cf.gen_int64_field(), cf.gen_float_vec_field()]
|
||||
error = {ct.err_code: 0, ct.err_msg: "Primary field must in dataframe."}
|
||||
error = {ct.err_code: 1, ct.err_msg: "Param primary_field must be str type."}
|
||||
self.collection_schema_wrap.init_collection_schema(fields, primary_field=primary_field,
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
|
@ -646,7 +646,7 @@ class TestCollectionParams(TestcaseBase):
|
|||
int_field_one = cf.gen_int64_field(is_primary=True)
|
||||
int_field_two = cf.gen_int64_field(name="int2")
|
||||
fields = [int_field_one, int_field_two, cf.gen_float_vec_field()]
|
||||
error = {ct.err_code: 0, ct.err_msg: "Expected only one primary key field"}
|
||||
error = {ct.err_code: 1, ct.err_msg: "Expected only one primary key field"}
|
||||
self.collection_schema_wrap.init_collection_schema(fields, primary_field=int_field_two.name,
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
|
@ -726,6 +726,7 @@ class TestCollectionParams(TestcaseBase):
|
|||
assert not schema.auto_id
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.xfail(reason="issue 24578")
|
||||
def test_collection_auto_id_inconsistent(self):
|
||||
"""
|
||||
target: test collection auto_id with both collection schema and field schema
|
||||
|
@ -768,6 +769,7 @@ class TestCollectionParams(TestcaseBase):
|
|||
auto_id=None, check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.xfail(reason="issue 24578")
|
||||
@pytest.mark.parametrize("auto_id", ct.get_invalid_strs)
|
||||
def test_collection_invalid_auto_id(self, auto_id):
|
||||
"""
|
||||
|
@ -2909,6 +2911,7 @@ class TestDescribeCollection(TestcaseBase):
|
|||
"""
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.xfail(reason="issue 24493")
|
||||
def test_collection_describe(self):
|
||||
"""
|
||||
target: test describe collection
|
||||
|
@ -2921,14 +2924,22 @@ class TestDescribeCollection(TestcaseBase):
|
|||
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index)
|
||||
description = {'collection_name': c_name, 'auto_id': False, 'num_shards': ct.default_shards_num, 'description': '',
|
||||
'fields': [{'field_id': 100, 'name': 'int64', 'description': '', 'type': 5,
|
||||
'params': {}, 'is_primary': True, 'auto_id': False},
|
||||
'params': {}, 'is_primary': True, 'auto_id': False,
|
||||
'is_partition_key': False, 'default_value': None, 'is_dynamic': False},
|
||||
{'field_id': 101, 'name': 'float', 'description': '', 'type': 10,
|
||||
'params': {}, 'is_primary': False, 'auto_id': False},
|
||||
'params': {}, 'is_primary': False, 'auto_id': False,
|
||||
'is_partition_key': False, 'default_value': None, 'is_dynamic': False},
|
||||
{'field_id': 102, 'name': 'varchar', 'description': '', 'type': 21,
|
||||
'params': {'max_length': 65535}, 'is_primary': False, 'auto_id': False},
|
||||
{'field_id': 103, 'name': 'float_vector', 'description': '', 'type': 101,
|
||||
'params': {'dim': 128}, 'is_primary': False, 'auto_id': False}],
|
||||
'aliases': [], 'consistency_level': 0, 'properties': []}
|
||||
'params': {'max_length': 65535}, 'is_primary': False, 'auto_id': False,
|
||||
'is_partition_key': False, 'default_value': None, 'is_dynamic': False},
|
||||
{'field_id': 103, 'name': 'json_field', 'description': '', 'type': 23,
|
||||
'params': {}, 'is_primary': False, 'auto_id': False,
|
||||
'is_partition_key': False, 'default_value': None, 'is_dynamic': False},
|
||||
{'field_id': 104, 'name': 'float_vector', 'description': '', 'type': 101,
|
||||
'params': {'dim': 128}, 'is_primary': False, 'auto_id': False,
|
||||
'is_partition_key': False, 'default_value': None, 'is_dynamic': False}],
|
||||
'aliases': [], 'consistency_level': 2, 'properties': [], 'num_partitions': 0,
|
||||
'enable_dynamic_field': False}
|
||||
res = collection_w.describe()[0]
|
||||
del res['collection_id']
|
||||
log.info(res)
|
||||
|
@ -3670,3 +3681,85 @@ class TestCollectionString(TestcaseBase):
|
|||
error = {ct.err_code: 0, ct.err_msg: "autoID is not supported when the VarChar field is the primary key"}
|
||||
self.collection_wrap.init_collection(name=cf.gen_unique_str(prefix), schema=schema,
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
|
||||
class TestCollectionJSON(TestcaseBase):
|
||||
"""
|
||||
******************************************************************
|
||||
The following cases are used to test about string
|
||||
******************************************************************
|
||||
"""
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("auto_id", [True, False])
|
||||
def test_collection_json_field_as_primary_key(self, auto_id):
|
||||
"""
|
||||
target: test create collection with JSON field as primary key
|
||||
method: 1. create collection with one JSON field, and vector field
|
||||
2. set json field is_primary=true
|
||||
3. set auto_id as true
|
||||
expected: Raise exception (not supported)
|
||||
"""
|
||||
self._connect()
|
||||
int_field = cf.gen_int64_field()
|
||||
vec_field = cf.gen_float_vec_field()
|
||||
string_field = cf.gen_string_field()
|
||||
# 1. create json field as primary key through field schema api
|
||||
error = {ct.err_code: 1, ct.err_msg: "Primary key type must be DataType.INT64 or DataType.VARCHAR"}
|
||||
json_field = cf.gen_json_field(is_primary=True, auto_id=auto_id)
|
||||
fields = [int_field, string_field, json_field, vec_field]
|
||||
self.collection_schema_wrap.init_collection_schema(fields=fields,
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
# 2. create json field as primary key through collection schema api
|
||||
json_field = cf.gen_json_field()
|
||||
fields = [int_field, string_field, json_field, vec_field]
|
||||
self.collection_schema_wrap.init_collection_schema(fields=fields, primary_field=ct.default_json_field_name,
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_collection_json_field_partition_key(self, primary_field):
|
||||
"""
|
||||
target: test create collection with multiple JSON fields
|
||||
method: 1. create collection with multiple JSON fields, primary key field and vector field
|
||||
2. set json field is_primary=false
|
||||
expected: Raise exception
|
||||
"""
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
schema = cf.gen_json_default_collection_schema(primary_field=primary_field, is_partition_key=True)
|
||||
error = {ct.err_code: 1, ct.err_msg: "Partition key field type must be DataType.INT64 or DataType.VARCHAR."}
|
||||
self.collection_wrap.init_collection(name=c_name, schema=schema, partition_key_field=ct.default_json_field_name,
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name])
|
||||
def test_collection_json_field_supported_primary_key(self, primary_field):
|
||||
"""
|
||||
target: test create collection with one JSON field
|
||||
method: 1. create collection with one JSON field, primary key field and vector field
|
||||
2. set json field is_primary=false
|
||||
expected: Create collection successfully
|
||||
"""
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
schema = cf.gen_json_default_collection_schema(primary_field=primary_field)
|
||||
self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: c_name, exp_schema: schema})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name])
|
||||
def test_collection_multiple_json_fields_supported_primary_key(self, primary_field):
|
||||
"""
|
||||
target: test create collection with multiple JSON fields
|
||||
method: 1. create collection with multiple JSON fields, primary key field and vector field
|
||||
2. set json field is_primary=false
|
||||
expected: Create collection successfully
|
||||
"""
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
schema = cf.gen_multiple_json_default_collection_schema(primary_field=primary_field)
|
||||
self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||
check_task=CheckTasks.check_collection_property,
|
||||
check_items={exp_name: c_name, exp_schema: schema})
|
||||
|
||||
|
||||
|
|
|
@ -478,7 +478,7 @@ class TestDeleteOperation(TestcaseBase):
|
|||
"""
|
||||
# init collection with nb default data
|
||||
collection_w, _, _, ids = self.init_collection_general(prefix, insert_data=True)[0:4]
|
||||
entity, _ = collection_w.query(tmp_expr, output_fields=["%"])
|
||||
entity, _ = collection_w.query(tmp_expr, output_fields=[ct.default_float_vec_field_name])
|
||||
search_res, _ = collection_w.search([entity[0][ct.default_float_vec_field_name]],
|
||||
ct.default_float_vec_field_name,
|
||||
ct.default_search_params, ct.default_limit)
|
||||
|
@ -994,7 +994,7 @@ class TestDeleteOperation(TestcaseBase):
|
|||
log.debug(collection_w.num_entities)
|
||||
collection_w.query(tmp_expr, output_fields=[ct.default_float_vec_field_name],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={'exp_res': df_new.iloc[[0], [0, 3]].to_dict('records'), 'with_vec': True})
|
||||
check_items={'exp_res': df_new.iloc[[0], [0, 4]].to_dict('records'), 'with_vec': True})
|
||||
|
||||
collection_w.delete(tmp_expr)
|
||||
if to_flush_delete:
|
||||
|
@ -1635,7 +1635,7 @@ class TestDeleteString(TestcaseBase):
|
|||
log.debug(collection_w.num_entities)
|
||||
collection_w.query(default_string_expr, output_fields=[ct.default_float_vec_field_name],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={'exp_res': df_new.iloc[[0], [2, 3]].to_dict('records'),
|
||||
check_items={'exp_res': df_new.iloc[[0], [2, 4]].to_dict('records'),
|
||||
'primary_field': ct.default_string_field_name, 'with_vec': True})
|
||||
|
||||
collection_w.delete(default_string_expr)
|
||||
|
@ -1800,6 +1800,7 @@ class TestDeleteString(TestcaseBase):
|
|||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("to_query", [True, False])
|
||||
# @pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||
def test_delete_insert_same_id_sealed_string(self, to_query):
|
||||
"""
|
||||
target: test insert same id entity after delete from sealed data
|
||||
|
@ -1840,10 +1841,15 @@ class TestDeleteString(TestcaseBase):
|
|||
log.debug(collection_w.num_entities)
|
||||
|
||||
# re-query
|
||||
res = df_new.iloc[[0], [2, 3]].to_dict('records')
|
||||
res = df_new.iloc[[0], [2, 4]].to_dict('records')
|
||||
log.info(res)
|
||||
collection_w.query(default_string_expr, output_fields=[ct.default_float_vec_field_name],
|
||||
check_task=CheckTasks.check_query_results, check_items={'exp_res': res, 'primary_field': ct.default_string_field_name, 'with_vec': True})
|
||||
collection_w.search(data=[df_new[ct.default_float_vec_field_name][0]], anns_field=ct.default_float_vec_field_name,
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={'exp_res': res,
|
||||
'primary_field': ct.default_string_field_name,
|
||||
'with_vec': True})
|
||||
collection_w.search(data=[df_new[ct.default_float_vec_field_name][0]],
|
||||
anns_field=ct.default_float_vec_field_name,
|
||||
param=default_search_params, limit=1)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
|
|
|
@ -1861,3 +1861,17 @@ class TestIndexDiskann(TestcaseBase):
|
|||
check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1,
|
||||
ct.err_msg: "invalid index params"})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_create_index_json(self):
|
||||
"""
|
||||
target: test create index on json fields
|
||||
method: 1.create collection, and create index
|
||||
expected: create index raise an error
|
||||
"""
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True,
|
||||
dim=ct.default_dim, is_index=False)[0:4]
|
||||
collection_w.create_index(ct.default_json_field_name, index_params=ct.default_flat_index,
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1,
|
||||
ct.err_msg: "create index on json field is not supported"})
|
||||
|
|
|
@ -98,7 +98,7 @@ class TestInsertParams(TestcaseBase):
|
|||
collection_w = self.init_collection_wrap(name=c_name)
|
||||
error = {ct.err_code: 1, ct.err_msg: "The fields don't match with schema fields, "
|
||||
"expected: ['int64', 'float', 'varchar', 'float_vector'], got %s" % data}
|
||||
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
||||
collection_w.insert(data=data)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_insert_dataframe_only_columns(self):
|
||||
|
@ -1379,7 +1379,7 @@ class TestUpsertValid(TestcaseBase):
|
|||
[str(i) for i in range(nb)], binary_vectors]
|
||||
collection_w.upsert(data)
|
||||
res = collection_w.query("int64 >= 0", [ct.default_binary_vec_field_name])[0]
|
||||
assert binary_vectors[0] == res[0][ct. default_binary_vec_field_name]
|
||||
assert binary_vectors[0] == res[0][ct. default_binary_vec_field_name][0]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_upsert_same_with_inserted_data(self):
|
||||
|
|
|
@ -42,6 +42,10 @@ class TestQueryParams(TestcaseBase):
|
|||
query(collection_name, expr, output_fields=None, partition_names=None, timeout=None)
|
||||
"""
|
||||
|
||||
@pytest.fixture(scope="function", params=[True, False])
|
||||
def enable_dynamic_field(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_query_invalid(self):
|
||||
"""
|
||||
|
@ -55,18 +59,27 @@ class TestQueryParams(TestcaseBase):
|
|||
collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
def test_query(self):
|
||||
def test_query(self, enable_dynamic_field):
|
||||
"""
|
||||
target: test query
|
||||
method: query with term expr
|
||||
expected: verify query result
|
||||
"""
|
||||
# create collection, insert default_nb, load collection
|
||||
collection_w, vectors = self.init_collection_general(prefix, insert_data=True)[0:2]
|
||||
int_values = vectors[0][ct.default_int64_field_name].values.tolist()
|
||||
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,
|
||||
enable_dynamic_field=enable_dynamic_field)[0:2]
|
||||
pos = 5
|
||||
if enable_dynamic_field:
|
||||
int_values = []
|
||||
for vector in vectors[0]:
|
||||
vector = vector[ct.default_int64_field_name]
|
||||
int_values.append(vector)
|
||||
res = [{ct.default_int64_field_name: int_values[i]} for i in range(pos)]
|
||||
else:
|
||||
int_values = vectors[0][ct.default_int64_field_name].values.tolist()
|
||||
res = vectors[0].iloc[0:pos, :1].to_dict('records')
|
||||
|
||||
term_expr = f'{ct.default_int64_field_name} in {int_values[:pos]}'
|
||||
res = vectors[0].iloc[0:pos, :1].to_dict('records')
|
||||
collection_w.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
|
@ -337,7 +350,7 @@ class TestQueryParams(TestcaseBase):
|
|||
res = []
|
||||
# int8 range [-128, 127] so when nb=1200, there are many repeated int8 values equal to 0
|
||||
for i in range(0, ct.default_nb, 256):
|
||||
res.extend(df.iloc[i:i + 1, :-1].to_dict('records'))
|
||||
res.extend(df.iloc[i:i + 1, :-2].to_dict('records'))
|
||||
self.collection_wrap.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index)
|
||||
self.collection_wrap.load()
|
||||
self.collection_wrap.query(term_expr, output_fields=["float", "int64", "int8", "varchar"],
|
||||
|
@ -350,7 +363,7 @@ class TestQueryParams(TestcaseBase):
|
|||
yield request.param
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_query_with_expression(self, get_normal_expr):
|
||||
def test_query_with_expression(self, get_normal_expr, enable_dynamic_field):
|
||||
"""
|
||||
target: test query with different expr
|
||||
method: query with different boolean expr
|
||||
|
@ -358,7 +371,9 @@ class TestQueryParams(TestcaseBase):
|
|||
"""
|
||||
# 1. initialize with data
|
||||
nb = 1000
|
||||
collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb)[0:4]
|
||||
collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb,
|
||||
enable_dynamic_field=
|
||||
enable_dynamic_field)[0:4]
|
||||
|
||||
# filter result with expression in collection
|
||||
_vectors = _vectors[0]
|
||||
|
@ -366,8 +381,12 @@ class TestQueryParams(TestcaseBase):
|
|||
expression = expr.replace("&&", "and").replace("||", "or")
|
||||
filter_ids = []
|
||||
for i, _id in enumerate(insert_ids):
|
||||
int64 = _vectors.int64[i]
|
||||
float = _vectors.float[i]
|
||||
if enable_dynamic_field:
|
||||
int64 = _vectors[i][ct.default_int64_field_name]
|
||||
float = _vectors[i][ct.default_float_field_name]
|
||||
else:
|
||||
int64 = _vectors.int64[i]
|
||||
float = _vectors.float[i]
|
||||
if not expression or eval(expression):
|
||||
filter_ids.append(_id)
|
||||
|
||||
|
@ -536,30 +555,34 @@ class TestQueryParams(TestcaseBase):
|
|||
collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_query_output_field_none_or_empty(self):
|
||||
def test_query_output_field_none_or_empty(self, enable_dynamic_field):
|
||||
"""
|
||||
target: test query with none and empty output field
|
||||
method: query with output field=None, field=[]
|
||||
expected: return primary field
|
||||
"""
|
||||
collection_w = self.init_collection_general(prefix, insert_data=True)[0]
|
||||
collection_w = self.init_collection_general(prefix, insert_data=True,
|
||||
enable_dynamic_field=enable_dynamic_field)[0]
|
||||
for fields in [None, []]:
|
||||
res, _ = collection_w.query(default_term_expr, output_fields=fields)
|
||||
assert res[0].keys() == {ct.default_int64_field_name}
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
def test_query_output_one_field(self):
|
||||
def test_query_output_one_field(self, enable_dynamic_field):
|
||||
"""
|
||||
target: test query with output one field
|
||||
method: query with output one field
|
||||
expected: return one field
|
||||
"""
|
||||
collection_w, vectors = self.init_collection_general(prefix, insert_data=True)[0:2]
|
||||
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,
|
||||
enable_dynamic_field=
|
||||
enable_dynamic_field)[0:2]
|
||||
res, _ = collection_w.query(default_term_expr, output_fields=[ct.default_float_field_name])
|
||||
assert set(res[0].keys()) == {ct.default_int64_field_name, ct.default_float_field_name}
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_query_output_all_fields(self):
|
||||
@pytest.mark.xfail(reason="issue 24637")
|
||||
def test_query_output_all_fields(self, enable_dynamic_field):
|
||||
"""
|
||||
target: test query with none output field
|
||||
method: query with output field=None
|
||||
|
@ -567,11 +590,18 @@ class TestQueryParams(TestcaseBase):
|
|||
"""
|
||||
# 1. initialize with data
|
||||
collection_w, df, _, insert_ids = self.init_collection_general(prefix, True, nb=10,
|
||||
is_all_data_type=True)[0:4]
|
||||
is_all_data_type=True,
|
||||
enable_dynamic_field=
|
||||
enable_dynamic_field)[0:4]
|
||||
all_fields = [ct.default_int64_field_name, ct.default_int32_field_name, ct.default_int16_field_name,
|
||||
ct.default_int8_field_name, ct.default_bool_field_name, ct.default_float_field_name,
|
||||
ct.default_double_field_name, ct.default_string_field_name, ct.default_float_vec_field_name]
|
||||
res = df[0].iloc[:2].to_dict('records')
|
||||
ct.default_double_field_name, ct.default_string_field_name, ct.default_json_field_name,
|
||||
ct.default_float_vec_field_name]
|
||||
if enable_dynamic_field:
|
||||
res = df[0][:2]
|
||||
else:
|
||||
res = df[0].iloc[:2].to_dict('records')
|
||||
log.info(res)
|
||||
collection_w.load()
|
||||
actual_res, _ = collection_w.query(default_term_expr, output_fields=all_fields,
|
||||
check_task=CheckTasks.check_query_results,
|
||||
|
@ -736,6 +766,7 @@ class TestQueryParams(TestcaseBase):
|
|||
check_items=error)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L0)
|
||||
@pytest.mark.xfail(reason="issue 24637")
|
||||
def test_query_output_fields_simple_wildcard(self):
|
||||
"""
|
||||
target: test query output_fields with simple wildcard (* and %)
|
||||
|
@ -754,6 +785,7 @@ class TestQueryParams(TestcaseBase):
|
|||
check_items={exp_res: res3, "with_vec": True})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.xfail(reason="issue 24637")
|
||||
def test_query_output_fields_part_scale_wildcard(self):
|
||||
"""
|
||||
target: test query output_fields with part wildcard
|
||||
|
@ -1681,6 +1713,7 @@ class TestQueryString(TestcaseBase):
|
|||
check_items={ct.err_code: 1, ct.err_msg: f' cannot parse expression:{expression}'})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.xfail(reason="issue 24637")
|
||||
def test_query_after_insert_multi_threading(self):
|
||||
"""
|
||||
target: test data consistency after multi threading insert
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue