test: [cherry-pick]use float32 to do import (#32327)

pr: https://github.com/milvus-io/milvus/pull/32276

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
pull/32354/head
zhuwenxing 2024-04-17 10:57:20 +08:00 committed by GitHub
parent 82cf91fd72
commit dff96c323b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 44 additions and 36 deletions

View File

@ -15,6 +15,14 @@ from uuid import uuid4
IMPORT_TIMEOUT = 360
class NumpyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.float32):
return float(obj)
return super(NumpyEncoder, self).default(obj)
@pytest.mark.BulkInsert
class TestCreateImportJob(TestBase):
@ -49,7 +57,7 @@ class TestCreateImportJob(TestBase):
tmp = {
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]
"book_intro": [np.float32(random.random()) for _ in range(dim)]
}
if not auto_id:
tmp["book_id"] = i
@ -60,7 +68,7 @@ class TestCreateImportJob(TestBase):
file_name = f"bulk_insert_data_{uuid4()}.json"
file_path = f"/tmp/{file_name}"
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
@ -140,7 +148,7 @@ class TestCreateImportJob(TestBase):
tmp = {
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]
"book_intro": [np.float32(random.random()) for _ in range(dim)]
}
if not auto_id:
tmp["book_id"] = i
@ -151,7 +159,7 @@ class TestCreateImportJob(TestBase):
file_name = f"bulk_insert_data_{uuid4()}.json"
file_path = f"/tmp/{file_name}"
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
@ -230,7 +238,7 @@ class TestCreateImportJob(TestBase):
tmp = {
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]
"book_intro": [np.float32(random.random()) for _ in range(dim)]
}
if not auto_id:
tmp["book_id"] = i
@ -241,7 +249,7 @@ class TestCreateImportJob(TestBase):
file_name = f"bulk_insert_data_{uuid4()}.json"
file_path = f"/tmp/{file_name}"
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
# create partition
@ -325,7 +333,7 @@ class TestCreateImportJob(TestBase):
"book_id": i,
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]}
"book_intro": [np.float32(random.random()) for _ in range(dim)]}
for i in range(1000*file_num, 1000*(file_num+1))]
# dump data to file
@ -334,7 +342,7 @@ class TestCreateImportJob(TestBase):
# create dir for file path
Path(file_path).parent.mkdir(parents=True, exist_ok=True)
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
file_names.append([file_name])
@ -405,7 +413,7 @@ class TestCreateImportJob(TestBase):
"book_id": i,
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]}
"book_intro": [np.float32(random.random()) for _ in range(dim)]}
for i in range(1000*file_num, 1000*(file_num+1))]
# dump data to file
@ -485,7 +493,7 @@ class TestCreateImportJob(TestBase):
"book_id": i,
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]}
"book_intro": [np.float32(random.random()) for _ in range(dim)]}
for i in range(1000*file_num, 1000*(file_num+1))]
file_list = []
@ -572,7 +580,7 @@ class TestCreateImportJob(TestBase):
"book_id": i,
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]}
"book_intro": [np.float32(random.random()) for _ in range(dim)]}
for i in range(1000*file_num, 1000*(file_num+1))]
file_list = []
@ -597,7 +605,7 @@ class TestCreateImportJob(TestBase):
"book_id": i,
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]}
"book_intro": [np.float32(random.random()) for _ in range(dim)]}
for i in range(1000*file_num, 1000*(file_num+1))]
# dump data to file
@ -616,14 +624,14 @@ class TestCreateImportJob(TestBase):
"book_id": i,
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]}
"book_intro": [np.float32(random.random()) for _ in range(dim)]}
for i in range(1000*file_num, 1000*(file_num+1))]
# dump data to file
file_name = f"bulk_insert_data_{file_num}_{uuid4()}.json"
file_path = f"/tmp/{file_name}"
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
file_names.append([file_name])
@ -734,9 +742,9 @@ class TestCreateImportJob(TestBase):
"int_array": [i],
"varchar_array": [f"varchar_{i}"],
"bool_array": [random.choice([True, False])],
"text_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[
"text_emb": preprocessing.normalize([np.array([np.float32(random.random()) for _ in range(dim)])])[
0].tolist(),
"image_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[
"image_emb": preprocessing.normalize([np.array([np.float32(random.random()) for _ in range(dim)])])[
0].tolist(),
}
else:
@ -750,9 +758,9 @@ class TestCreateImportJob(TestBase):
"int_array": [i],
"varchar_array": [f"varchar_{i}"],
"bool_array": [random.choice([True, False])],
"text_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[
"text_emb": preprocessing.normalize([np.array([np.float32(random.random()) for _ in range(dim)])])[
0].tolist(),
"image_emb": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[
"image_emb": preprocessing.normalize([np.array([np.float32(random.random()) for _ in range(dim)])])[
0].tolist(),
}
if enable_dynamic_schema:
@ -845,7 +853,7 @@ class TestImportJobAdvance(TestBase):
"book_id": i,
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]}
"book_intro": [np.float32(random.random()) for _ in range(dim)]}
for i in range(batch_size*file_num, batch_size*(file_num+1))]
# dump data to file
@ -854,7 +862,7 @@ class TestImportJobAdvance(TestBase):
# create dir for file path
Path(file_path).parent.mkdir(parents=True, exist_ok=True)
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
file_names.append([file_name])
@ -945,7 +953,7 @@ class TestCreateImportJobAdvance(TestBase):
"book_id": i,
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]}
"book_intro": [np.float32(random.random()) for _ in range(dim)]}
for i in range(batch_size*file_num, batch_size*(file_num+1))]
# dump data to file
@ -954,7 +962,7 @@ class TestCreateImportJobAdvance(TestBase):
# create dir for file path
Path(file_path).parent.mkdir(parents=True, exist_ok=True)
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
file_names.append([file_name])
@ -1034,7 +1042,7 @@ class TestCreateImportJobAdvance(TestBase):
"book_id": i,
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]}
"book_intro": [np.float32(random.random()) for _ in range(dim)]}
for i in range(batch_size*file_num, batch_size*(file_num+1))]
# dump data to file
@ -1043,7 +1051,7 @@ class TestCreateImportJobAdvance(TestBase):
# create dir for file path
Path(file_path).parent.mkdir(parents=True, exist_ok=True)
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
file_names.append([file_name])
@ -1207,14 +1215,14 @@ class TestCreateImportJobNegative(TestBase):
"book_id": i,
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]}
"book_intro": [np.float32(random.random()) for _ in range(dim)]}
for i in range(10000)]
# dump data to file
file_name = f"bulk_insert_data_{uuid4()}.txt"
file_path = f"/tmp/{file_name}"
json_data = json.dumps(data)
json_data = json.dumps(data, cls=NumpyEncoder)
# 将JSON数据保存到txt文件
with open(file_path, 'w') as file:
@ -1253,14 +1261,14 @@ class TestCreateImportJobNegative(TestBase):
"book_id": i,
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]}
"book_intro": [np.float32(random.random()) for _ in range(dim)]}
for i in range(0)]
# dump data to file
file_name = "bulk_insert_empty_data.json"
file_path = f"/tmp/{file_name}"
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
@ -1324,7 +1332,7 @@ class TestCreateImportJobNegative(TestBase):
tmp = {
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]
"book_intro": [np.float32(random.random()) for _ in range(dim)]
}
if not auto_id:
tmp["book_id"] = i
@ -1336,7 +1344,7 @@ class TestCreateImportJobNegative(TestBase):
file_name = f"bulk_insert_data_{uuid4()}.json"
file_path = f"/tmp/{file_name}"
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
@ -1382,7 +1390,7 @@ class TestCreateImportJobNegative(TestBase):
tmp = {
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]
"book_intro": [np.float32(random.random()) for _ in range(dim)]
}
if not auto_id:
tmp["book_id"] = i
@ -1393,7 +1401,7 @@ class TestCreateImportJobNegative(TestBase):
file_name = f"bulk_insert_data_{uuid4()}.json"
file_path = f"/tmp/{file_name}"
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
@ -1480,7 +1488,7 @@ class TestListImportJob(TestBase):
tmp = {
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]
"book_intro": [np.float32(random.random()) for _ in range(dim)]
}
if not auto_id:
tmp["book_id"] = i
@ -1491,7 +1499,7 @@ class TestListImportJob(TestBase):
file_name = f"bulk_insert_data_{uuid4()}.json"
file_path = f"/tmp/{file_name}"
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
@ -1553,7 +1561,7 @@ class TestGetImportJobProgress(TestBase):
tmp = {
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]
"book_intro": [np.float32(random.random()) for _ in range(dim)]
}
if not auto_id:
tmp["book_id"] = i
@ -1564,7 +1572,7 @@ class TestGetImportJobProgress(TestBase):
file_name = f"bulk_insert_data_{uuid4()}.json"
file_path = f"/tmp/{file_name}"
with open(file_path, "w") as f:
json.dump(data, f)
json.dump(data, f, cls=NumpyEncoder)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
job_id_list = []