Add json and dynamic support cases (#24525)

Signed-off-by: Binbin Lv <binbin.lv@zilliz.com>
pull/24672/head
binbin 2023-06-06 12:02:34 +08:00 committed by GitHub
parent 732fe54775
commit c9997a2703
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 770 additions and 227 deletions

View File

@ -169,7 +169,7 @@ class TestcaseBase(Base):
if is_all_data_type:
default_schema = cf.gen_collection_schema_all_datatype(auto_id=auto_id, dim=dim,
primary_field=primary_field)
log.info("init_collection_general: collection creation")
log.info("insert_data_general: collection creation")
collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs)
pre_entities = collection_w.num_entities
if insert_data:
@ -184,7 +184,8 @@ class TestcaseBase(Base):
def init_collection_general(self, prefix="test", insert_data=False, nb=ct.default_nb,
partition_num=0, is_binary=False, is_all_data_type=False,
auto_id=False, dim=ct.default_dim, is_index=True,
primary_field=ct.default_int64_field_name, is_flush=True, name=None, **kwargs):
primary_field=ct.default_int64_field_name, is_flush=True, name=None,
enable_dynamic_field=False, with_json=True, **kwargs):
"""
target: create specified collections
method: 1. create collections (binary/non-binary, default/all data type, auto_id or not)
@ -204,13 +205,17 @@ class TestcaseBase(Base):
insert_ids = []
time_stamp = 0
# 1 create collection
default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field)
default_schema = cf.gen_default_collection_schema(auto_id=auto_id, dim=dim, primary_field=primary_field,
enable_dynamic_field=enable_dynamic_field,
with_json=with_json)
if is_binary:
default_schema = cf.gen_default_binary_collection_schema(auto_id=auto_id, dim=dim,
primary_field=primary_field)
if is_all_data_type:
default_schema = cf.gen_collection_schema_all_datatype(auto_id=auto_id, dim=dim,
primary_field=primary_field)
primary_field=primary_field,
enable_dynamic_field=enable_dynamic_field,
with_json=with_json)
log.info("init_collection_general: collection creation")
collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs)
# 2 add extra partitions if specified (default is 1 partition named "_default")
@ -219,7 +224,8 @@ class TestcaseBase(Base):
# 3 insert data if specified
if insert_data:
collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp = \
cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id, dim=dim)
cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id,
dim=dim, enable_dynamic_field=enable_dynamic_field, with_json=with_json)
if is_flush:
assert collection_w.is_empty is False
assert collection_w.num_entities == nb

View File

@ -296,7 +296,8 @@ class ResponseChecker:
primary_field = check_items.get("primary_field", None)
if exp_res is not None:
if isinstance(query_res, list):
assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=primary_field, with_vec=with_vec)
assert pc.equal_entities_list(exp=exp_res, actual=query_res, primary_field=primary_field,
with_vec=with_vec)
return True
else:
log.error(f"Query result {query_res} is not list")

View File

@ -81,6 +81,12 @@ def gen_string_field(name=ct.default_string_field_name, description=ct.default_d
return string_field
def gen_json_field(name=ct.default_json_field_name, description=ct.default_desc, is_primary=False, **kwargs):
json_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.JSON, description=description,
is_primary=is_primary, **kwargs)
return json_field
def gen_int8_field(name=ct.default_int8_field_name, description=ct.default_desc, is_primary=False, **kwargs):
int8_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT8, description=description,
is_primary=is_primary, **kwargs)
@ -134,10 +140,24 @@ def gen_binary_vec_field(name=ct.default_binary_vec_field_name, is_primary=False
def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim):
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_float_vec_field(dim=dim)]
auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=True):
if enable_dynamic_field:
if primary_field is ct.default_int64_field_name:
fields = [gen_int64_field(), gen_float_vec_field(dim=dim)]
elif primary_field is ct.default_string_field_name:
fields = [gen_string_field(), gen_float_vec_field(dim=dim)]
else:
log.error("Primary key only support int or varchar")
assert False
else:
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(),
gen_float_vec_field(dim=dim)]
if with_json is False:
fields.remove(gen_json_field())
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id)
primary_field=primary_field, auto_id=auto_id,
enable_dynamic_field=enable_dynamic_field)
return schema
@ -154,7 +174,24 @@ def gen_general_collection_schema(description=ct.default_desc, primary_field=ct.
def gen_string_pk_default_collection_schema(description=ct.default_desc, primary_field=ct.default_string_field_name,
auto_id=False, dim=ct.default_dim):
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_float_vec_field(dim=dim)]
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(), gen_float_vec_field(dim=dim)]
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id)
return schema
def gen_json_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim):
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(), gen_float_vec_field(dim=dim)]
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id)
return schema
def gen_multiple_json_default_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim):
fields = [gen_int64_field(), gen_float_field(), gen_string_field(), gen_json_field(name="json1"),
gen_json_field(name="json2"), gen_float_vec_field(dim=dim)]
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id)
return schema
@ -162,11 +199,19 @@ def gen_string_pk_default_collection_schema(description=ct.default_desc, primary
def gen_collection_schema_all_datatype(description=ct.default_desc,
primary_field=ct.default_int64_field_name,
auto_id=False, dim=ct.default_dim):
fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(),
gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(), gen_float_vec_field(dim=dim)]
auto_id=False, dim=ct.default_dim,
enable_dynamic_field=False, with_json=True):
if enable_dynamic_field:
fields = [gen_int64_field(), gen_float_vec_field(dim=dim)]
else:
fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(),
gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(),
gen_json_field(), gen_float_vec_field(dim=dim)]
if with_json is False:
fields.remove(gen_json_field())
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id)
primary_field=primary_field, auto_id=auto_id,
enable_dynamic_field=enable_dynamic_field)
return schema
@ -227,29 +272,55 @@ def gen_binary_vectors(num, dim):
return raw_vectors, binary_vectors
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0):
def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True):
int_values = pd.Series(data=[i for i in range(start, start + nb)])
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
json_values = [{"number": i, "float": i*1.0, "string": str(i), "bool": bool(i),
"list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(start, start + nb)]
float_vec_values = gen_vectors(nb, dim)
df = pd.DataFrame({
ct.default_int64_field_name: int_values,
ct.default_float_field_name: float_values,
ct.default_string_field_name: string_values,
ct.default_json_field_name: json_values,
ct.default_float_vec_field_name: float_vec_values
})
if with_json is False:
df.drop(ct.default_json_field_name, axis=1, inplace=True)
return df
def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True):
array = []
for i in range(start, start + nb):
dict = {ct.default_int64_field_name: i,
ct.default_float_field_name: i*1.0,
ct.default_string_field_name: str(i),
ct.default_json_field_name: {"number": i, "string": str(i), "bool": bool(i),
"list": [j for j in range(0, i)]},
ct.default_float_vec_field_name: gen_vectors(1, dim)[0]
}
if with_json is False:
dict.pop(ct.default_json_field_name, None)
array.append(dict)
return array
def gen_default_data_for_upsert(nb=ct.default_nb, dim=ct.default_dim, start=0, size=10000):
int_values = pd.Series(data=[i for i in range(start, start + nb)])
float_values = pd.Series(data=[np.float32(i + size) for i in range(start, start + nb)], dtype="float32")
string_values = pd.Series(data=[str(i + size) for i in range(start, start + nb)], dtype="string")
json_values = [{"number": i, "string": str(i), "bool": bool(i),
"list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(start, start + nb)]
float_vec_values = gen_vectors(nb, dim)
df = pd.DataFrame({
ct.default_int64_field_name: int_values,
ct.default_float_field_name: float_values,
ct.default_string_field_name: string_values,
ct.default_json_field_name: json_values,
ct.default_float_vec_field_name: float_vec_values
})
return df, float_values
@ -304,7 +375,7 @@ def gen_dataframe_multi_string_fields(string_fields, nb=ct.default_nb):
return df
def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True):
int64_values = pd.Series(data=[i for i in range(start, start + nb)])
int32_values = pd.Series(data=[np.int32(i) for i in range(start, start + nb)], dtype="int32")
int16_values = pd.Series(data=[np.int16(i) for i in range(start, start + nb)], dtype="int16")
@ -313,6 +384,8 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
double_values = pd.Series(data=[np.double(i) for i in range(start, start + nb)], dtype="double")
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]}
for i in range(start, start + nb)]
float_vec_values = gen_vectors(nb, dim)
df = pd.DataFrame({
ct.default_int64_field_name: int64_values,
@ -323,11 +396,38 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
ct.default_float_field_name: float_values,
ct.default_double_field_name: double_values,
ct.default_string_field_name: string_values,
ct.default_json_field_name: json_values,
ct.default_float_vec_field_name: float_vec_values
})
if with_json is False:
df.drop(ct.default_json_field_name, axis=1, inplace=True)
return df
def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True):
array = []
for i in range(start, start + nb):
dict = {ct.default_int64_field_name: i,
ct.default_int32_field_name: i,
ct.default_int16_field_name: i,
ct.default_int8_field_name: i,
ct.default_bool_field_name: bool(i),
ct.default_float_field_name: i*1.0,
ct.default_double_field_name: i*1.0,
ct.default_string_field_name: str(i),
ct.default_json_field_name: {"number": i, "string": str(i), "bool": bool(i),
"list": [j for j in range(0, i)]},
ct.default_float_vec_field_name: gen_vectors(1, dim)[0]
}
if with_json is False:
dict.pop(ct.default_json_field_name, None)
array.append(dict)
return array
def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0):
int_values = pd.Series(data=[i for i in range(start, start + nb)])
float_values = pd.Series(data=[np.float32(i) for i in range(start, start + nb)], dtype="float32")
@ -346,8 +446,10 @@ def gen_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0):
int_values = [i for i in range(start, start + nb)]
float_values = [np.float32(i) for i in range(start, start + nb)]
string_values = [str(i) for i in range(start, start + nb)]
json_values = [{"number": i, "string": str(i), "bool": bool(i), "list": [j for j in range(0, i)]}
for i in range(start, start + nb)]
float_vec_values = gen_vectors(nb, dim)
data = [int_values, float_values, string_values, float_vec_values]
data = [int_values, float_values, string_values, json_values, float_vec_values]
return data
@ -421,8 +523,10 @@ def gen_numpy_data(nb=ct.default_nb, dim=ct.default_dim):
int_values = np.arange(nb, dtype='int64')
float_values = np.arange(nb, dtype='float32')
string_values = [np.str_(i) for i in range(nb)]
json_values = [{"number": i, "string": str(i), "bool": bool(i),
"list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(nb)]
float_vec_values = gen_vectors(nb, dim)
data = [int_values, float_values, string_values, float_vec_values]
data = [int_values, float_values, string_values, json_values, float_vec_values]
return data
@ -768,7 +872,7 @@ def gen_partitions(collection_w, partition_num=1):
def insert_data(collection_w, nb=3000, is_binary=False, is_all_data_type=False,
auto_id=False, dim=ct.default_dim, insert_offset=0):
auto_id=False, dim=ct.default_dim, insert_offset=0, enable_dynamic_field=False, with_json=True):
"""
target: insert non-binary/binary data
method: insert non-binary/binary data into partitions if any
@ -782,14 +886,23 @@ def insert_data(collection_w, nb=3000, is_binary=False, is_all_data_type=False,
start = insert_offset
log.info(f"inserted {nb} data into collection {collection_w.name}")
for i in range(num):
default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start)
log.debug("Dynamic field is enabled: %s" % enable_dynamic_field)
default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json)
if enable_dynamic_field:
default_data = gen_default_rows_data(nb // num, dim=dim, start=start, with_json=with_json)
if is_binary:
default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num, dim=dim, start=start)
binary_raw_vectors.extend(binary_raw_data)
if is_all_data_type:
default_data = gen_dataframe_all_data_type(nb // num, dim=dim, start=start)
default_data = gen_dataframe_all_data_type(nb // num, dim=dim, start=start, with_json=with_json)
if enable_dynamic_field:
default_data = gen_default_rows_data_all_data_type(nb // num, dim=dim, start=start, with_json=with_json)
if auto_id:
default_data.drop(ct.default_int64_field_name, axis=1, inplace=True)
if enable_dynamic_field:
for data in default_data:
data.pop(ct.default_int64_field_name, None)
else:
default_data.drop(ct.default_int64_field_name, axis=1, inplace=True)
insert_res = collection_w.insert(default_data, par[i].name)[0]
time_stamp = insert_res.timestamp
insert_ids.extend(insert_res.primary_keys)

View File

@ -33,6 +33,7 @@ default_int64_field_name = "int64"
default_float_field_name = "float"
default_double_field_name = "double"
default_string_field_name = "varchar"
default_json_field_name = "json_field"
default_float_vec_field_name = "float_vector"
another_float_vec_field_name = "float_vector1"
default_binary_vec_field_name = "binary_vector"
@ -42,6 +43,7 @@ default_resource_group_capacity = 1000000
default_tag = "1970_01_01"
row_count = "row_count"
default_length = 65535
default_json_list_length = 3
default_desc = ""
default_collection_desc = "default collection"
default_index_name = "default_index_name"

View File

@ -327,7 +327,7 @@ class TestCollectionParams(TestcaseBase):
expected: raise exception
"""
self._connect()
error = {ct.err_code: 0, ct.err_msg: "Field dtype must be of DataType"}
error = {ct.err_code: 1, ct.err_msg: "Field dtype must be of DataType"}
self.field_schema_wrap.init_field_schema(name="unknown", dtype=DataType.UNKNOWN,
check_task=CheckTasks.err_res, check_items=error)
@ -560,7 +560,7 @@ class TestCollectionParams(TestcaseBase):
"""
self._connect()
fields = [cf.gen_int64_field(), cf.gen_float_vec_field()]
error = {ct.err_code: 0, ct.err_msg: "Primary field must in dataframe."}
error = {ct.err_code: 1, ct.err_msg: "Param primary_field must be str type."}
self.collection_schema_wrap.init_collection_schema(fields, primary_field=primary_field,
check_task=CheckTasks.err_res, check_items=error)
@ -646,7 +646,7 @@ class TestCollectionParams(TestcaseBase):
int_field_one = cf.gen_int64_field(is_primary=True)
int_field_two = cf.gen_int64_field(name="int2")
fields = [int_field_one, int_field_two, cf.gen_float_vec_field()]
error = {ct.err_code: 0, ct.err_msg: "Expected only one primary key field"}
error = {ct.err_code: 1, ct.err_msg: "Expected only one primary key field"}
self.collection_schema_wrap.init_collection_schema(fields, primary_field=int_field_two.name,
check_task=CheckTasks.err_res, check_items=error)
@ -726,6 +726,7 @@ class TestCollectionParams(TestcaseBase):
assert not schema.auto_id
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.xfail(reason="issue 24578")
def test_collection_auto_id_inconsistent(self):
"""
target: test collection auto_id with both collection schema and field schema
@ -768,6 +769,7 @@ class TestCollectionParams(TestcaseBase):
auto_id=None, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.xfail(reason="issue 24578")
@pytest.mark.parametrize("auto_id", ct.get_invalid_strs)
def test_collection_invalid_auto_id(self, auto_id):
"""
@ -2909,6 +2911,7 @@ class TestDescribeCollection(TestcaseBase):
"""
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.xfail(reason="issue 24493")
def test_collection_describe(self):
"""
target: test describe collection
@ -2921,14 +2924,22 @@ class TestDescribeCollection(TestcaseBase):
collection_w.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index)
description = {'collection_name': c_name, 'auto_id': False, 'num_shards': ct.default_shards_num, 'description': '',
'fields': [{'field_id': 100, 'name': 'int64', 'description': '', 'type': 5,
'params': {}, 'is_primary': True, 'auto_id': False},
'params': {}, 'is_primary': True, 'auto_id': False,
'is_partition_key': False, 'default_value': None, 'is_dynamic': False},
{'field_id': 101, 'name': 'float', 'description': '', 'type': 10,
'params': {}, 'is_primary': False, 'auto_id': False},
'params': {}, 'is_primary': False, 'auto_id': False,
'is_partition_key': False, 'default_value': None, 'is_dynamic': False},
{'field_id': 102, 'name': 'varchar', 'description': '', 'type': 21,
'params': {'max_length': 65535}, 'is_primary': False, 'auto_id': False},
{'field_id': 103, 'name': 'float_vector', 'description': '', 'type': 101,
'params': {'dim': 128}, 'is_primary': False, 'auto_id': False}],
'aliases': [], 'consistency_level': 0, 'properties': []}
'params': {'max_length': 65535}, 'is_primary': False, 'auto_id': False,
'is_partition_key': False, 'default_value': None, 'is_dynamic': False},
{'field_id': 103, 'name': 'json_field', 'description': '', 'type': 23,
'params': {}, 'is_primary': False, 'auto_id': False,
'is_partition_key': False, 'default_value': None, 'is_dynamic': False},
{'field_id': 104, 'name': 'float_vector', 'description': '', 'type': 101,
'params': {'dim': 128}, 'is_primary': False, 'auto_id': False,
'is_partition_key': False, 'default_value': None, 'is_dynamic': False}],
'aliases': [], 'consistency_level': 2, 'properties': [], 'num_partitions': 0,
'enable_dynamic_field': False}
res = collection_w.describe()[0]
del res['collection_id']
log.info(res)
@ -3670,3 +3681,85 @@ class TestCollectionString(TestcaseBase):
error = {ct.err_code: 0, ct.err_msg: "autoID is not supported when the VarChar field is the primary key"}
self.collection_wrap.init_collection(name=cf.gen_unique_str(prefix), schema=schema,
check_task=CheckTasks.err_res, check_items=error)
class TestCollectionJSON(TestcaseBase):
"""
******************************************************************
The following cases are used to test about string
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("auto_id", [True, False])
def test_collection_json_field_as_primary_key(self, auto_id):
"""
target: test create collection with JSON field as primary key
method: 1. create collection with one JSON field, and vector field
2. set json field is_primary=true
3. set auto_id as true
expected: Raise exception (not supported)
"""
self._connect()
int_field = cf.gen_int64_field()
vec_field = cf.gen_float_vec_field()
string_field = cf.gen_string_field()
# 1. create json field as primary key through field schema api
error = {ct.err_code: 1, ct.err_msg: "Primary key type must be DataType.INT64 or DataType.VARCHAR"}
json_field = cf.gen_json_field(is_primary=True, auto_id=auto_id)
fields = [int_field, string_field, json_field, vec_field]
self.collection_schema_wrap.init_collection_schema(fields=fields,
check_task=CheckTasks.err_res, check_items=error)
# 2. create json field as primary key through collection schema api
json_field = cf.gen_json_field()
fields = [int_field, string_field, json_field, vec_field]
self.collection_schema_wrap.init_collection_schema(fields=fields, primary_field=ct.default_json_field_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_collection_json_field_partition_key(self, primary_field):
"""
target: test create collection with multiple JSON fields
method: 1. create collection with multiple JSON fields, primary key field and vector field
2. set json field is_primary=false
expected: Raise exception
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
schema = cf.gen_json_default_collection_schema(primary_field=primary_field, is_partition_key=True)
error = {ct.err_code: 1, ct.err_msg: "Partition key field type must be DataType.INT64 or DataType.VARCHAR."}
self.collection_wrap.init_collection(name=c_name, schema=schema, partition_key_field=ct.default_json_field_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name])
def test_collection_json_field_supported_primary_key(self, primary_field):
"""
target: test create collection with one JSON field
method: 1. create collection with one JSON field, primary key field and vector field
2. set json field is_primary=false
expected: Create collection successfully
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
schema = cf.gen_json_default_collection_schema(primary_field=primary_field)
self.collection_wrap.init_collection(name=c_name, schema=schema,
check_task=CheckTasks.check_collection_property,
check_items={exp_name: c_name, exp_schema: schema})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name])
def test_collection_multiple_json_fields_supported_primary_key(self, primary_field):
"""
target: test create collection with multiple JSON fields
method: 1. create collection with multiple JSON fields, primary key field and vector field
2. set json field is_primary=false
expected: Create collection successfully
"""
self._connect()
c_name = cf.gen_unique_str(prefix)
schema = cf.gen_multiple_json_default_collection_schema(primary_field=primary_field)
self.collection_wrap.init_collection(name=c_name, schema=schema,
check_task=CheckTasks.check_collection_property,
check_items={exp_name: c_name, exp_schema: schema})

View File

@ -478,7 +478,7 @@ class TestDeleteOperation(TestcaseBase):
"""
# init collection with nb default data
collection_w, _, _, ids = self.init_collection_general(prefix, insert_data=True)[0:4]
entity, _ = collection_w.query(tmp_expr, output_fields=["%"])
entity, _ = collection_w.query(tmp_expr, output_fields=[ct.default_float_vec_field_name])
search_res, _ = collection_w.search([entity[0][ct.default_float_vec_field_name]],
ct.default_float_vec_field_name,
ct.default_search_params, ct.default_limit)
@ -994,7 +994,7 @@ class TestDeleteOperation(TestcaseBase):
log.debug(collection_w.num_entities)
collection_w.query(tmp_expr, output_fields=[ct.default_float_vec_field_name],
check_task=CheckTasks.check_query_results,
check_items={'exp_res': df_new.iloc[[0], [0, 3]].to_dict('records'), 'with_vec': True})
check_items={'exp_res': df_new.iloc[[0], [0, 4]].to_dict('records'), 'with_vec': True})
collection_w.delete(tmp_expr)
if to_flush_delete:
@ -1635,7 +1635,7 @@ class TestDeleteString(TestcaseBase):
log.debug(collection_w.num_entities)
collection_w.query(default_string_expr, output_fields=[ct.default_float_vec_field_name],
check_task=CheckTasks.check_query_results,
check_items={'exp_res': df_new.iloc[[0], [2, 3]].to_dict('records'),
check_items={'exp_res': df_new.iloc[[0], [2, 4]].to_dict('records'),
'primary_field': ct.default_string_field_name, 'with_vec': True})
collection_w.delete(default_string_expr)
@ -1800,6 +1800,7 @@ class TestDeleteString(TestcaseBase):
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("to_query", [True, False])
# @pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_delete_insert_same_id_sealed_string(self, to_query):
"""
target: test insert same id entity after delete from sealed data
@ -1840,10 +1841,15 @@ class TestDeleteString(TestcaseBase):
log.debug(collection_w.num_entities)
# re-query
res = df_new.iloc[[0], [2, 3]].to_dict('records')
res = df_new.iloc[[0], [2, 4]].to_dict('records')
log.info(res)
collection_w.query(default_string_expr, output_fields=[ct.default_float_vec_field_name],
check_task=CheckTasks.check_query_results, check_items={'exp_res': res, 'primary_field': ct.default_string_field_name, 'with_vec': True})
collection_w.search(data=[df_new[ct.default_float_vec_field_name][0]], anns_field=ct.default_float_vec_field_name,
check_task=CheckTasks.check_query_results,
check_items={'exp_res': res,
'primary_field': ct.default_string_field_name,
'with_vec': True})
collection_w.search(data=[df_new[ct.default_float_vec_field_name][0]],
anns_field=ct.default_float_vec_field_name,
param=default_search_params, limit=1)
@pytest.mark.tags(CaseLabel.L1)

View File

@ -1861,3 +1861,17 @@ class TestIndexDiskann(TestcaseBase):
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "invalid index params"})
@pytest.mark.tags(CaseLabel.L1)
def test_create_index_json(self):
"""
target: test create index on json fields
method: 1.create collection, and create index
expected: create index raise an error
"""
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True,
dim=ct.default_dim, is_index=False)[0:4]
collection_w.create_index(ct.default_json_field_name, index_params=ct.default_flat_index,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "create index on json field is not supported"})

View File

@ -98,7 +98,7 @@ class TestInsertParams(TestcaseBase):
collection_w = self.init_collection_wrap(name=c_name)
error = {ct.err_code: 1, ct.err_msg: "The fields don't match with schema fields, "
"expected: ['int64', 'float', 'varchar', 'float_vector'], got %s" % data}
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
collection_w.insert(data=data)
@pytest.mark.tags(CaseLabel.L2)
def test_insert_dataframe_only_columns(self):
@ -1379,7 +1379,7 @@ class TestUpsertValid(TestcaseBase):
[str(i) for i in range(nb)], binary_vectors]
collection_w.upsert(data)
res = collection_w.query("int64 >= 0", [ct.default_binary_vec_field_name])[0]
assert binary_vectors[0] == res[0][ct. default_binary_vec_field_name]
assert binary_vectors[0] == res[0][ct. default_binary_vec_field_name][0]
@pytest.mark.tags(CaseLabel.L1)
def test_upsert_same_with_inserted_data(self):

View File

@ -42,6 +42,10 @@ class TestQueryParams(TestcaseBase):
query(collection_name, expr, output_fields=None, partition_names=None, timeout=None)
"""
@pytest.fixture(scope="function", params=[True, False])
def enable_dynamic_field(self, request):
yield request.param
@pytest.mark.tags(CaseLabel.L2)
def test_query_invalid(self):
"""
@ -55,18 +59,27 @@ class TestQueryParams(TestcaseBase):
collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L0)
def test_query(self):
def test_query(self, enable_dynamic_field):
"""
target: test query
method: query with term expr
expected: verify query result
"""
# create collection, insert default_nb, load collection
collection_w, vectors = self.init_collection_general(prefix, insert_data=True)[0:2]
int_values = vectors[0][ct.default_int64_field_name].values.tolist()
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,
enable_dynamic_field=enable_dynamic_field)[0:2]
pos = 5
if enable_dynamic_field:
int_values = []
for vector in vectors[0]:
vector = vector[ct.default_int64_field_name]
int_values.append(vector)
res = [{ct.default_int64_field_name: int_values[i]} for i in range(pos)]
else:
int_values = vectors[0][ct.default_int64_field_name].values.tolist()
res = vectors[0].iloc[0:pos, :1].to_dict('records')
term_expr = f'{ct.default_int64_field_name} in {int_values[:pos]}'
res = vectors[0].iloc[0:pos, :1].to_dict('records')
collection_w.query(term_expr, check_task=CheckTasks.check_query_results, check_items={exp_res: res})
@pytest.mark.tags(CaseLabel.L1)
@ -337,7 +350,7 @@ class TestQueryParams(TestcaseBase):
res = []
# int8 range [-128, 127] so when nb=1200, there are many repeated int8 values equal to 0
for i in range(0, ct.default_nb, 256):
res.extend(df.iloc[i:i + 1, :-1].to_dict('records'))
res.extend(df.iloc[i:i + 1, :-2].to_dict('records'))
self.collection_wrap.create_index(ct.default_float_vec_field_name, index_params=ct.default_flat_index)
self.collection_wrap.load()
self.collection_wrap.query(term_expr, output_fields=["float", "int64", "int8", "varchar"],
@ -350,7 +363,7 @@ class TestQueryParams(TestcaseBase):
yield request.param
@pytest.mark.tags(CaseLabel.L1)
def test_query_with_expression(self, get_normal_expr):
def test_query_with_expression(self, get_normal_expr, enable_dynamic_field):
"""
target: test query with different expr
method: query with different boolean expr
@ -358,7 +371,9 @@ class TestQueryParams(TestcaseBase):
"""
# 1. initialize with data
nb = 1000
collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb)[0:4]
collection_w, _vectors, _, insert_ids = self.init_collection_general(prefix, True, nb,
enable_dynamic_field=
enable_dynamic_field)[0:4]
# filter result with expression in collection
_vectors = _vectors[0]
@ -366,8 +381,12 @@ class TestQueryParams(TestcaseBase):
expression = expr.replace("&&", "and").replace("||", "or")
filter_ids = []
for i, _id in enumerate(insert_ids):
int64 = _vectors.int64[i]
float = _vectors.float[i]
if enable_dynamic_field:
int64 = _vectors[i][ct.default_int64_field_name]
float = _vectors[i][ct.default_float_field_name]
else:
int64 = _vectors.int64[i]
float = _vectors.float[i]
if not expression or eval(expression):
filter_ids.append(_id)
@ -536,30 +555,34 @@ class TestQueryParams(TestcaseBase):
collection_w.query(term_expr, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_query_output_field_none_or_empty(self):
def test_query_output_field_none_or_empty(self, enable_dynamic_field):
"""
target: test query with none and empty output field
method: query with output field=None, field=[]
expected: return primary field
"""
collection_w = self.init_collection_general(prefix, insert_data=True)[0]
collection_w = self.init_collection_general(prefix, insert_data=True,
enable_dynamic_field=enable_dynamic_field)[0]
for fields in [None, []]:
res, _ = collection_w.query(default_term_expr, output_fields=fields)
assert res[0].keys() == {ct.default_int64_field_name}
@pytest.mark.tags(CaseLabel.L0)
def test_query_output_one_field(self):
def test_query_output_one_field(self, enable_dynamic_field):
"""
target: test query with output one field
method: query with output one field
expected: return one field
"""
collection_w, vectors = self.init_collection_general(prefix, insert_data=True)[0:2]
collection_w, vectors = self.init_collection_general(prefix, insert_data=True,
enable_dynamic_field=
enable_dynamic_field)[0:2]
res, _ = collection_w.query(default_term_expr, output_fields=[ct.default_float_field_name])
assert set(res[0].keys()) == {ct.default_int64_field_name, ct.default_float_field_name}
@pytest.mark.tags(CaseLabel.L1)
def test_query_output_all_fields(self):
@pytest.mark.xfail(reason="issue 24637")
def test_query_output_all_fields(self, enable_dynamic_field):
"""
target: test query with none output field
method: query with output field=None
@ -567,11 +590,18 @@ class TestQueryParams(TestcaseBase):
"""
# 1. initialize with data
collection_w, df, _, insert_ids = self.init_collection_general(prefix, True, nb=10,
is_all_data_type=True)[0:4]
is_all_data_type=True,
enable_dynamic_field=
enable_dynamic_field)[0:4]
all_fields = [ct.default_int64_field_name, ct.default_int32_field_name, ct.default_int16_field_name,
ct.default_int8_field_name, ct.default_bool_field_name, ct.default_float_field_name,
ct.default_double_field_name, ct.default_string_field_name, ct.default_float_vec_field_name]
res = df[0].iloc[:2].to_dict('records')
ct.default_double_field_name, ct.default_string_field_name, ct.default_json_field_name,
ct.default_float_vec_field_name]
if enable_dynamic_field:
res = df[0][:2]
else:
res = df[0].iloc[:2].to_dict('records')
log.info(res)
collection_w.load()
actual_res, _ = collection_w.query(default_term_expr, output_fields=all_fields,
check_task=CheckTasks.check_query_results,
@ -736,6 +766,7 @@ class TestQueryParams(TestcaseBase):
check_items=error)
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.xfail(reason="issue 24637")
def test_query_output_fields_simple_wildcard(self):
"""
target: test query output_fields with simple wildcard (* and %)
@ -754,6 +785,7 @@ class TestQueryParams(TestcaseBase):
check_items={exp_res: res3, "with_vec": True})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.xfail(reason="issue 24637")
def test_query_output_fields_part_scale_wildcard(self):
"""
target: test query output_fields with part wildcard
@ -1681,6 +1713,7 @@ class TestQueryString(TestcaseBase):
check_items={ct.err_code: 1, ct.err_msg: f' cannot parse expression:{expression}'})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.xfail(reason="issue 24637")
def test_query_after_insert_multi_threading(self):
"""
target: test data consistency after multi threading insert

File diff suppressed because it is too large Load Diff