test: add check for partition key for import test (#33253)

see https://github.com/milvus-io/milvus/issues/33237

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
pull/33326/head
zhuwenxing 2024-05-23 14:03:40 +08:00 committed by GitHub
parent 32d3e22d7d
commit 229a6b942b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 35 additions and 10 deletions

View File

@ -828,7 +828,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
@pytest.mark.parametrize("dim", [128]) # 128
@pytest.mark.parametrize("entities", [1000]) # 1000
@pytest.mark.parametrize("enable_dynamic_field", [True])
def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities, enable_dynamic_field):
@pytest.mark.parametrize("enable_partition_key", [True, False])
def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities, enable_dynamic_field, enable_partition_key):
"""
collection schema 1: [pk, int64, float64, string float_vector]
data file: vectors.npy and uid.npy,
@ -841,7 +842,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id),
cf.gen_int64_field(name=df.int_field),
cf.gen_float_field(name=df.float_field),
cf.gen_string_field(name=df.string_field),
cf.gen_string_field(name=df.string_field, is_partition_key=enable_partition_key),
cf.gen_json_field(name=df.json_field),
cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64),
cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT),
@ -945,16 +946,23 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
if enable_dynamic_field:
assert "name" in fields_from_search
assert "address" in fields_from_search
# query data
res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field])
assert len(res) == entities
query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)]
res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field])
assert len(res) == len(query_data)
if enable_partition_key:
assert len(self.collection_wrap.partitions) > 1
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("auto_id", [True, False])
@pytest.mark.parametrize("dim", [128]) # 128
@pytest.mark.parametrize("entities", [1000]) # 1000
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
@pytest.mark.parametrize("enable_partition_key", [True, False])
@pytest.mark.parametrize("include_meta", [True, False])
def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_dynamic_field, include_meta):
def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_dynamic_field, enable_partition_key, include_meta):
"""
collection schema 1: [pk, int64, float64, string float_vector]
data file: vectors.npy and uid.npy,
@ -970,7 +978,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id),
cf.gen_int64_field(name=df.int_field),
cf.gen_float_field(name=df.float_field),
cf.gen_string_field(name=df.string_field),
cf.gen_string_field(name=df.string_field, is_partition_key=enable_partition_key),
cf.gen_json_field(name=df.json_field),
cf.gen_float_vec_field(name=df.float_vec_field, dim=dim),
# cf.gen_float_vec_field(name=df.image_float_vec_field, dim=dim),
@ -1072,14 +1080,25 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
if enable_dynamic_field and include_meta:
assert "name" in fields_from_search
assert "address" in fields_from_search
# query data
res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field])
assert len(res) == entities
query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)]
res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field])
assert len(res) == len(query_data)
if enable_partition_key:
assert len(self.collection_wrap.partitions) > 1
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("auto_id", [True, False])
@pytest.mark.parametrize("dim", [128]) # 128
@pytest.mark.parametrize("entities", [1000]) # 1000
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
@pytest.mark.parametrize("enable_partition_key", [True, False])
@pytest.mark.parametrize("include_meta", [True, False])
def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable_dynamic_field, include_meta):
def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable_dynamic_field, enable_partition_key, include_meta):
"""
collection schema 1: [pk, int64, float64, string float_vector]
data file: vectors.parquet and uid.parquet,
@ -1094,15 +1113,13 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id),
cf.gen_int64_field(name=df.int_field),
cf.gen_float_field(name=df.float_field),
cf.gen_string_field(name=df.string_field),
cf.gen_string_field(name=df.string_field, is_partition_key=enable_partition_key),
cf.gen_json_field(name=df.json_field),
cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64),
cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT),
cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100),
cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL),
cf.gen_float_vec_field(name=df.float_vec_field, dim=dim),
# cf.gen_float_vec_field(name=df.image_float_vec_field, dim=dim),
# cf.gen_float_vec_field(name=df.text_float_vec_field, dim=dim),
cf.gen_binary_vec_field(name=df.binary_vec_field, dim=dim),
cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=dim),
cf.gen_float16_vec_field(name=df.fp16_vec_field, dim=dim)
@ -1199,6 +1216,14 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
if enable_dynamic_field and include_meta:
assert "name" in fields_from_search
assert "address" in fields_from_search
# query data
res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field])
assert len(res) == entities
query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)]
res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field])
assert len(res) == len(query_data)
if enable_partition_key:
assert len(self.collection_wrap.partitions) > 1
@pytest.mark.tags(CaseLabel.L3)
@pytest.mark.parametrize("auto_id", [True])