mirror of https://github.com/milvus-io/milvus.git
test: add testcase for import without meta when enabled dynamic (#32706)
Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>pull/32808/head
parent
1f58cda957
commit
e1eb03ad0d
|
@ -674,7 +674,7 @@ def gen_new_json_files(float_vector, rows, dim, data_fields, file_nums=1, array_
|
|||
return files
|
||||
|
||||
|
||||
def gen_npy_files(float_vector, rows, dim, data_fields, file_size=None, file_nums=1, err_type="", force=False, enable_dynamic_field=False):
|
||||
def gen_npy_files(float_vector, rows, dim, data_fields, file_size=None, file_nums=1, err_type="", force=False, enable_dynamic_field=False, include_meta=True):
|
||||
# gen numpy files
|
||||
files = []
|
||||
start_uid = 0
|
||||
|
@ -707,7 +707,7 @@ def gen_npy_files(float_vector, rows, dim, data_fields, file_size=None, file_num
|
|||
file_name = gen_int_or_float_in_numpy_file(dir=data_source, data_field=data_field,
|
||||
rows=rows, force=force)
|
||||
files.append(file_name)
|
||||
if enable_dynamic_field:
|
||||
if enable_dynamic_field and include_meta:
|
||||
file_name = gen_dynamic_field_in_numpy_file(dir=data_source, rows=rows, force=force)
|
||||
files.append(file_name)
|
||||
if file_size is not None:
|
||||
|
@ -756,7 +756,7 @@ def gen_dynamic_field_data_in_parquet_file(rows, start=0):
|
|||
return data
|
||||
|
||||
|
||||
def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_group_size=None, file_nums=1, array_length=None, err_type="", enable_dynamic_field=False):
|
||||
def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_group_size=None, file_nums=1, array_length=None, err_type="", enable_dynamic_field=False, include_meta=True):
|
||||
# gen numpy files
|
||||
if err_type == "":
|
||||
err_type = "none"
|
||||
|
@ -771,7 +771,7 @@ def gen_parquet_files(float_vector, rows, dim, data_fields, file_size=None, row_
|
|||
data = gen_data_by_data_field(data_field=data_field, rows=rows, start=0,
|
||||
float_vector=float_vector, dim=dim, array_length=array_length)
|
||||
all_field_data[data_field] = data
|
||||
if enable_dynamic_field:
|
||||
if enable_dynamic_field and include_meta:
|
||||
all_field_data["$meta"] = gen_dynamic_field_data_in_parquet_file(rows=rows, start=0)
|
||||
df = pd.DataFrame(all_field_data)
|
||||
log.info(f"df: \n{df}")
|
||||
|
@ -903,7 +903,7 @@ def prepare_bulk_insert_new_json_files(minio_endpoint="", bucket_name="milvus-bu
|
|||
|
||||
|
||||
def prepare_bulk_insert_numpy_files(minio_endpoint="", bucket_name="milvus-bucket", rows=100, dim=128, enable_dynamic_field=False, file_size=None,
|
||||
data_fields=[DataField.vec_field], float_vector=True, file_nums=1, force=False):
|
||||
data_fields=[DataField.vec_field], float_vector=True, file_nums=1, force=False, include_meta=True):
|
||||
"""
|
||||
Generate column based files based on params in numpy format and copy them to the minio
|
||||
Note: each field in data_fields would be generated one numpy file.
|
||||
|
@ -935,14 +935,14 @@ def prepare_bulk_insert_numpy_files(minio_endpoint="", bucket_name="milvus-bucke
|
|||
"""
|
||||
files = gen_npy_files(rows=rows, dim=dim, float_vector=float_vector, file_size=file_size,
|
||||
data_fields=data_fields, enable_dynamic_field=enable_dynamic_field,
|
||||
file_nums=file_nums, force=force)
|
||||
file_nums=file_nums, force=force, include_meta=include_meta)
|
||||
|
||||
copy_files_to_minio(host=minio_endpoint, r_source=data_source, files=files, bucket_name=bucket_name, force=force)
|
||||
return files
|
||||
|
||||
|
||||
def prepare_bulk_insert_parquet_files(minio_endpoint="", bucket_name="milvus-bucket", rows=100, dim=128, array_length=None, file_size=None, row_group_size=None,
|
||||
enable_dynamic_field=False, data_fields=[DataField.vec_field], float_vector=True, file_nums=1, force=False):
|
||||
enable_dynamic_field=False, data_fields=[DataField.vec_field], float_vector=True, file_nums=1, force=False, include_meta=True):
|
||||
"""
|
||||
Generate column based files based on params in parquet format and copy them to the minio
|
||||
Note: each field in data_fields would be generated one parquet file.
|
||||
|
@ -974,7 +974,7 @@ def prepare_bulk_insert_parquet_files(minio_endpoint="", bucket_name="milvus-buc
|
|||
"""
|
||||
files = gen_parquet_files(rows=rows, dim=dim, float_vector=float_vector, enable_dynamic_field=enable_dynamic_field,
|
||||
data_fields=data_fields, array_length=array_length, file_size=file_size, row_group_size=row_group_size,
|
||||
file_nums=file_nums)
|
||||
file_nums=file_nums, include_meta=include_meta)
|
||||
copy_files_to_minio(host=minio_endpoint, r_source=data_source, files=files, bucket_name=bucket_name, force=force)
|
||||
return files
|
||||
|
||||
|
|
|
@ -953,7 +953,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
@pytest.mark.parametrize("dim", [128]) # 128
|
||||
@pytest.mark.parametrize("entities", [1000]) # 1000
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||
def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_dynamic_field):
|
||||
@pytest.mark.parametrize("include_meta", [True, False])
|
||||
def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_dynamic_field, include_meta):
|
||||
"""
|
||||
collection schema 1: [pk, int64, float64, string float_vector]
|
||||
data file: vectors.npy and uid.npy,
|
||||
|
@ -963,6 +964,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
2. import data
|
||||
3. verify
|
||||
"""
|
||||
if enable_dynamic_field is False and include_meta is True:
|
||||
pytest.skip("include_meta only works with enable_dynamic_field")
|
||||
fields = [
|
||||
cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id),
|
||||
cf.gen_int64_field(name=df.int_field),
|
||||
|
@ -985,6 +988,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
data_fields=data_fields,
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
force=True,
|
||||
include_meta=include_meta,
|
||||
|
||||
)
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str("bulk_insert")
|
||||
|
@ -1043,7 +1048,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
fields_from_search = r.fields.keys()
|
||||
for f in fields:
|
||||
assert f.name in fields_from_search
|
||||
if enable_dynamic_field:
|
||||
if enable_dynamic_field and include_meta:
|
||||
assert "name" in fields_from_search
|
||||
assert "address" in fields_from_search
|
||||
|
||||
|
@ -1064,7 +1069,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
fields_from_search = r.fields.keys()
|
||||
for f in fields:
|
||||
assert f.name in fields_from_search
|
||||
if enable_dynamic_field:
|
||||
if enable_dynamic_field and include_meta:
|
||||
assert "name" in fields_from_search
|
||||
assert "address" in fields_from_search
|
||||
|
||||
|
@ -1072,10 +1077,9 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
@pytest.mark.parametrize("auto_id", [True, False])
|
||||
@pytest.mark.parametrize("dim", [128]) # 128
|
||||
@pytest.mark.parametrize("entities", [1000]) # 1000
|
||||
@pytest.mark.parametrize("file_nums", [1])
|
||||
@pytest.mark.parametrize("array_len", [None, 0, 100])
|
||||
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||
def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, file_nums, array_len, enable_dynamic_field):
|
||||
@pytest.mark.parametrize("include_meta", [True, False])
|
||||
def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable_dynamic_field, include_meta):
|
||||
"""
|
||||
collection schema 1: [pk, int64, float64, string float_vector]
|
||||
data file: vectors.parquet and uid.parquet,
|
||||
|
@ -1084,6 +1088,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
2. import data
|
||||
3. verify
|
||||
"""
|
||||
if enable_dynamic_field is False and include_meta is True:
|
||||
pytest.skip("include_meta only works with enable_dynamic_field")
|
||||
fields = [
|
||||
cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id),
|
||||
cf.gen_int64_field(name=df.int_field),
|
||||
|
@ -1110,6 +1116,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
data_fields=data_fields,
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
force=True,
|
||||
include_meta=include_meta,
|
||||
)
|
||||
self._connect()
|
||||
c_name = cf.gen_unique_str("bulk_insert")
|
||||
|
@ -1168,7 +1175,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
fields_from_search = r.fields.keys()
|
||||
for f in fields:
|
||||
assert f.name in fields_from_search
|
||||
if enable_dynamic_field:
|
||||
if enable_dynamic_field and include_meta:
|
||||
assert "name" in fields_from_search
|
||||
assert "address" in fields_from_search
|
||||
|
||||
|
@ -1189,7 +1196,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
|||
fields_from_search = r.fields.keys()
|
||||
for f in fields:
|
||||
assert f.name in fields_from_search
|
||||
if enable_dynamic_field:
|
||||
if enable_dynamic_field and include_meta:
|
||||
assert "name" in fields_from_search
|
||||
assert "address" in fields_from_search
|
||||
|
||||
|
|
Loading…
Reference in New Issue