test: Add insert and upsert related cases for null and default value support (#36932)

issue: #36129

Signed-off-by: qixuan <673771573@qq.com>
pull/37035/head
qixuan 2024-10-22 10:13:27 +08:00 committed by GitHub
parent e6cb7d6695
commit 36147b10f5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 190 additions and 119 deletions

View File

@ -865,19 +865,20 @@ class TestInsertOperation(TestcaseBase):
assert collection_w.num_entities == nb
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip("not support default_value now")
@pytest.mark.parametrize("default_value", [[], None])
def test_insert_one_field_using_default_value(self, default_value, auto_id):
@pytest.mark.parametrize("nullable", [True, False])
@pytest.mark.parametrize("default_value", [[], [None for i in range(ct.default_nb)]])
def test_insert_one_field_using_default_value(self, default_value, nullable, auto_id):
"""
target: test insert with one field using default value
method: 1. create a collection with one field using default value
2. insert using []/None to replace the field value
2. insert using default value to replace the field value []/[None]
expected: insert successfully
"""
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()]
cf.gen_string_field(default_value="abc", nullable=nullable), cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields, auto_id=auto_id)
collection_w = self.init_collection_wrap(schema=schema)
# default value fields, [] or [None]
data = [
[i for i in range(ct.default_nb)],
[np.float32(i) for i in range(ct.default_nb)],
@ -890,63 +891,91 @@ class TestInsertOperation(TestcaseBase):
assert collection_w.num_entities == ct.default_nb
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip("not support default_value now")
@pytest.mark.parametrize("default_value", [[], None])
def test_insert_multi_fields_using_default_value(self, default_value, auto_id):
@pytest.mark.parametrize("enable_partition_key", [True, False])
@pytest.mark.parametrize("default_value", [[], [None for _ in range(ct.default_nb)]])
def test_insert_multi_fields_using_none_data(self, enable_partition_key, default_value, auto_id):
"""
target: test insert with multi fields using default value
method: 1. default value fields before vector, insert [], None, fail
2. default value fields all after vector field, insert empty, succeed
expected: report error and insert successfully
"""
# 1. default value fields before vector, insert [], None, fail
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(default_value=np.float32(1.0)),
cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields, auto_id=auto_id)
collection_w = self.init_collection_wrap(schema=schema)
data = [[i for i in range(ct.default_nb)], default_value,
# if multi default_value fields before vector field, every field must use []/None
cf.gen_vectors(ct.default_nb, ct.default_dim)]
if auto_id:
del data[0]
collection_w.insert(data, check_task=CheckTasks.err_res,
check_items={ct.err_code: 999,
ct.err_msg: "The data type of field varchar doesn't match"})
# 2. default value fields all after vector field, insert empty, succeed
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_vec_field(),
cf.gen_float_field(default_value=np.float32(1.0)),
cf.gen_string_field(default_value="abc")]
schema = cf.gen_collection_schema(fields, auto_id=auto_id)
collection_w = self.init_collection_wrap(schema=schema)
data = [[i for i in range(ct.default_nb)], cf.gen_vectors(
ct.default_nb, ct.default_dim)]
data1 = [[i for i in range(ct.default_nb)], cf.gen_vectors(ct.default_nb, ct.default_dim),
[np.float32(i) for i in range(ct.default_nb)]]
if auto_id:
del data[0], data1[0]
collection_w.insert(data)
assert collection_w.num_entities == ct.default_nb
collection_w.insert(data1)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("not support default_value now")
def test_insert_dataframe_using_default_value(self):
"""
target: test insert with dataframe
method: insert with invalid dataframe
target: test insert with multi fields include array using none value
method: 1. create a collection with multi fields using default value
2. insert using none value to replace the field value
expected: insert successfully
"""
json_embedded_object = "json_embedded_object"
fields = [
cf.gen_int64_field(is_primary=True),
cf.gen_int32_field(default_value=np.int32(1), nullable=True),
cf.gen_float_field(default_value=np.float32(1.0), nullable=True),
cf.gen_string_field(default_value="abc", enable_partition_key=enable_partition_key, nullable=True),
cf.gen_array_field(name=ct.default_int32_array_field_name, element_type=DataType.INT32, nullable=True),
cf.gen_array_field(name=ct.default_float_array_field_name, element_type=DataType.FLOAT, nullable=True),
cf.gen_array_field(name=ct.default_string_array_field_name, element_type=DataType.VARCHAR, max_length=100, nullable=True),
cf.gen_json_field(name=json_embedded_object, nullable=True),
cf.gen_float_vec_field()
]
schema = cf.gen_collection_schema(fields, auto_id=auto_id)
collection_w = self.init_collection_wrap(schema=schema)
# default value fields, [] or [None]
data = [
[i for i in range(ct.default_nb)],
default_value,
default_value,
default_value,
[[np.int32(j) for j in range(10)] for _ in range(ct.default_nb)],
[[np.float32(j) for j in range(10)] for _ in range(ct.default_nb)],
default_value,
default_value,
cf.gen_vectors(ct.default_nb, ct.default_dim)
]
if auto_id:
del data[0]
collection_w.insert(data=data)
assert collection_w.num_entities == ct.default_nb
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("enable_partition_key", [True, False])
@pytest.mark.parametrize("nullable", [True, False])
def test_insert_dataframe_using_default_data(self, enable_partition_key, nullable):
"""
target: test insert with dataframe
method: insert with valid dataframe using default data
expected: insert successfully
"""
if enable_partition_key is True and nullable is True:
pytest.skip("partition key field not support nullable")
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()]
cf.gen_string_field(default_value="abc", is_partition_key=enable_partition_key, nullable=nullable),
cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
# None/[] is not allowed when using dataframe
# To use default value, delete the whole item
df = pd.DataFrame({
"int64": pd.Series(data=[i for i in range(ct.default_nb)]),
"float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"),
"varchar": pd.Series(data=[None for _ in range(ct.default_nb)]),
"float_vector": vectors
})
collection_w.insert(df)
assert collection_w.num_entities == ct.default_nb
@pytest.mark.tags(CaseLabel.L2)
def test_insert_dataframe_using_none_data(self):
"""
target: test insert with dataframe
method: insert with valid dataframe using none data
expected: insert successfully
"""
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
cf.gen_string_field(default_value=None, nullable=True), cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
df = pd.DataFrame({
"int64": pd.Series(data=[i for i in range(0, ct.default_nb)]),
"float_vector": vectors,
"float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32")
"float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"),
"varchar": pd.Series(data=[None for _ in range(ct.default_nb)])
})
collection_w.insert(df)
assert collection_w.num_entities == ct.default_nb
@ -1278,8 +1307,7 @@ class TestInsertInvalid(TestcaseBase):
data=data, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("not support default_value now")
@pytest.mark.parametrize("default_value", [[], None])
@pytest.mark.parametrize("default_value", [[], 123])
def test_insert_array_using_default_value(self, default_value):
"""
target: test insert with array
@ -1293,17 +1321,16 @@ class TestInsertInvalid(TestcaseBase):
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
data = [{"int64": 1, "float_vector": vectors[1],
"varchar": default_value, "float": np.float32(1.0)}]
collection_w.insert(data, check_task=CheckTasks.err_res,
check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"})
error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"}
collection_w.upsert(data, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("not support default_value now")
@pytest.mark.parametrize("default_value", [[], None])
def test_insert_tuple_using_default_value(self, default_value):
"""
target: test insert with tuple
method: insert with invalid tuple
expected: insert successfully
expected: raise exception
"""
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_vec_field(),
cf.gen_string_field(), cf.gen_float_field(default_value=np.float32(3.14))]
@ -1313,8 +1340,8 @@ class TestInsertInvalid(TestcaseBase):
int_values = [i for i in range(0, ct.default_nb)]
string_values = ["abc" for i in range(ct.default_nb)]
data = (int_values, vectors, string_values, default_value)
collection_w.insert(data, check_task=CheckTasks.err_res,
check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"})
error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"}
collection_w.upsert(data, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_insert_with_nan_value(self):
@ -1846,20 +1873,23 @@ class TestUpsertValid(TestcaseBase):
assert res[0].get('count(*)') == start + upsert_nb + extra_num
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip("not support default_value now")
@pytest.mark.parametrize("default_value", [[], None])
def test_upsert_one_field_using_default_value(self, default_value):
@pytest.mark.parametrize("nullable", [True, False])
@pytest.mark.parametrize("default_value", [[], [None for i in range(ct.default_nb)]])
def test_upsert_one_field_using_default_value(self, default_value, nullable):
"""
target: test insert with one field using default value
target: test insert/upsert with one field using default value
method: 1. create a collection with one field using default value
2. insert using []/None to replace the field value
expected: insert successfully
2. insert using default value to replace the field value []/[None]
expected: insert/upsert successfully
"""
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()]
cf.gen_string_field(default_value="abc", nullable=nullable), cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)
cf.insert_data(collection_w, with_json=False)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.load()
# default value fields, [] or [None]
data = [
[i for i in range(ct.default_nb)],
[np.float32(i) for i in range(ct.default_nb)],
@ -1867,82 +1897,125 @@ class TestUpsertValid(TestcaseBase):
cf.gen_vectors(ct.default_nb, ct.default_dim)
]
collection_w.upsert(data)
exp = f"{ct.default_string_field_name} == 'abc'"
res = collection_w.query(exp, output_fields=[default_float_name])[0]
assert len(res) == ct.default_nb
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip("not support default_value now")
@pytest.mark.parametrize("default_value", [[], None])
def test_upsert_multi_fields_using_default_value(self, default_value):
@pytest.mark.parametrize("enable_partition_key", [True, False])
@pytest.mark.parametrize("default_value", [[], [None for _ in range(ct.default_nb)]])
def test_upsert_multi_fields_using_none_data(self, enable_partition_key, default_value):
"""
target: test insert with multi fields using default value
method: 1. default value fields before vector, insert [], None, fail
2. default value fields all after vector field, insert empty, succeed
expected: report error and insert successfully
target: test insert/upsert with multi fields include array using none value
method: 1. create a collection with multi fields include array using default value
2. insert using none value to replace the field value
expected: insert/upsert successfully
"""
# 1. default value fields before vector, insert [], None, fail
json_embedded_object = "json_embedded_object"
fields = [
cf.gen_int64_field(is_primary=True),
cf.gen_float_field(default_value=np.float32(1.0)),
cf.gen_string_field(default_value="abc"),
cf.gen_int32_field(default_value=np.int32(1), nullable=True),
cf.gen_float_field(default_value=np.float32(1.0), nullable=True),
cf.gen_string_field(default_value="abc", enable_partition_key=enable_partition_key, nullable=True),
cf.gen_array_field(name=ct.default_int32_array_field_name, element_type=DataType.INT32, nullable=True),
cf.gen_array_field(name=ct.default_float_array_field_name, element_type=DataType.FLOAT, nullable=True),
cf.gen_array_field(name=ct.default_string_array_field_name, element_type=DataType.VARCHAR,
max_length=100, nullable=True),
cf.gen_json_field(name=json_embedded_object, nullable=True),
cf.gen_float_vec_field()
]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)
cf.insert_data(collection_w, with_json=False)
# insert data and load collection
data = [
[i for i in range(ct.default_nb)],
default_value,
# if multi default_value fields before vector field, every field must use []/None
[np.float32(2.0) for _ in range(ct.default_nb)],
[str(i) for i in range(ct.default_nb)],
[[np.int32(j) for j in range(10)] for _ in range(ct.default_nb)],
[[np.float32(j) for j in range(10)] for _ in range(ct.default_nb)],
[[str(j) for j in range(10)] for _ in range(ct.default_nb)],
cf.gen_json_data_for_diff_json_types(nb=ct.default_nb, start=0, json_type=json_embedded_object),
cf.gen_vectors(ct.default_nb, ct.default_dim)
]
collection_w.upsert(data,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 999,
ct.err_msg: "The data type of field varchar doesn't match"})
# 2. default value fields all after vector field, insert empty, succeed
fields = [
cf.gen_int64_field(is_primary=True),
cf.gen_float_vec_field(),
cf.gen_float_field(default_value=np.float32(1.0)),
cf.gen_string_field(default_value="abc")
]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)
collection_w.insert(data=data)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.load()
# default value fields, [] or [None]
data = [
[i for i in range(ct.default_nb)],
default_value,
default_value,
default_value,
[[np.int32(j) for j in range(10)] for _ in range(ct.default_nb)],
[[np.float32(j) for j in range(10)] for _ in range(ct.default_nb)],
[[str(j) for j in range(10)] for _ in range(ct.default_nb)],
default_value,
cf.gen_vectors(ct.default_nb, ct.default_dim)
]
data1 = [
[i for i in range(ct.default_nb)],
cf.gen_vectors(ct.default_nb, ct.default_dim),
[np.float32(i) for i in range(ct.default_nb)]
]
collection_w.upsert(data)
assert collection_w.num_entities == ct.default_nb
collection_w.upsert(data1)
collection_w.upsert(data=data)
exp = f"{ct.default_float_field_name} == {np.float32(1.0)} and {ct.default_string_field_name} == 'abc'"
res = collection_w.query(exp, output_fields=[default_float_name, json_embedded_object])[0]
assert len(res) == ct.default_nb
assert res[0][json_embedded_object] is None
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("not support default_value now")
def test_upsert_dataframe_using_default_value(self):
@pytest.mark.parametrize("enable_partition_key", [True, False])
@pytest.mark.parametrize("nullable", [True, False])
def test_upsert_dataframe_using_default_data(self, enable_partition_key, nullable):
"""
target: test upsert with dataframe
method: upsert with invalid dataframe
method: upsert with valid dataframe using default data
expected: upsert successfully
"""
if enable_partition_key is True and nullable is True:
pytest.skip("partition key field not support nullable")
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
cf.gen_string_field(default_value="abc", is_partition_key=enable_partition_key, nullable=nullable),
cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.load()
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
df = pd.DataFrame({
"int64": pd.Series(data=[i for i in range(ct.default_nb)]),
"float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"),
"varchar": pd.Series(data=[None for _ in range(ct.default_nb)]),
"float_vector": vectors
})
collection_w.upsert(df)
exp = f"{ct.default_string_field_name} == 'abc'"
res = collection_w.query(exp, output_fields=[ct.default_string_field_name])[0]
assert len(res) == ct.default_nb
@pytest.mark.tags(CaseLabel.L2)
def test_upsert_dataframe_using_none_data(self):
"""
target: test upsert with dataframe
method: upsert with valid dataframe using none data
expected: upsert successfully
"""
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()]
cf.gen_string_field(default_value=None, nullable=True),
cf.gen_float_vec_field()]
schema = cf.gen_collection_schema(fields)
collection_w = self.init_collection_wrap(schema=schema)
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
collection_w.load()
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
# None/[] is not allowed when using dataframe
# To use default value, delete the whole item
df = pd.DataFrame({
"int64": pd.Series(data=[i for i in range(0, ct.default_nb)]),
"float_vector": vectors,
"float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32")
"int64": pd.Series(data=[i for i in range(ct.default_nb)]),
"float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32"),
"varchar": pd.Series(data=[None for _ in range(ct.default_nb)]),
"float_vector": vectors
})
collection_w.upsert(df)
assert collection_w.num_entities == ct.default_nb
exp = f"{ct.default_string_field_name} == ''"
res = collection_w.query(exp, output_fields=[ct.default_string_field_name])[0]
assert len(res) == ct.default_nb
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("index ", ct.all_index_types[9:11])
@ -2136,8 +2209,7 @@ class TestUpsertInvalid(TestcaseBase):
collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("not support default_value now")
@pytest.mark.parametrize("default_value", [[], None])
@pytest.mark.parametrize("default_value", [[], 123])
def test_upsert_array_using_default_value(self, default_value):
"""
target: test upsert with array
@ -2151,17 +2223,16 @@ class TestUpsertInvalid(TestcaseBase):
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
data = [{"int64": 1, "float_vector": vectors[1],
"varchar": default_value, "float": np.float32(1.0)}]
collection_w.upsert(data, check_task=CheckTasks.err_res,
check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"})
error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"}
collection_w.upsert(data, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("not support default_value now")
@pytest.mark.parametrize("default_value", [[], None])
def test_upsert_tuple_using_default_value(self, default_value):
"""
target: test upsert with tuple
method: upsert with invalid tuple
expected: upsert successfully
expected: raise exception
"""
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(default_value=np.float32(3.14)),
cf.gen_string_field(), cf.gen_float_vec_field()]
@ -2171,8 +2242,8 @@ class TestUpsertInvalid(TestcaseBase):
int_values = [i for i in range(0, ct.default_nb)]
string_values = ["abc" for i in range(ct.default_nb)]
data = (int_values, default_value, string_values, vectors)
collection_w.upsert(data, check_task=CheckTasks.err_res,
check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"})
error = {ct.err_code: 1, ct.err_msg: "The Input data type is inconsistent with defined schema"}
collection_w.upsert(data, check_task=CheckTasks.err_res, check_items=error)
class TestInsertArray(TestcaseBase):