test: [cherry-pick]add empty sparse in import test (#40683)

pr: https://github.com/milvus-io/milvus/pull/40682

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
pull/40728/head
zhuwenxing 2025-03-18 15:18:13 +08:00 committed by GitHub
parent 0dc4b73c81
commit d9635c6f86
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 20 additions and 9 deletions

View File

@ -505,7 +505,7 @@ def gen_vectors(float_vector, rows, dim):
return vectors
def gen_sparse_vectors(rows, sparse_format="dok"):
def gen_sparse_vectors(rows, sparse_format="dok", empty_percentage=10):
# default sparse format is dok, dict of keys
# another option is coo, coordinate List
@ -513,6 +513,11 @@ def gen_sparse_vectors(rows, sparse_format="dok"):
vectors = [{
d: rng.random() for d in random.sample(range(1000), random.randint(20, 30))
} for _ in range(rows)]
if empty_percentage > 0:
empty_nb = int(rows * empty_percentage / 100)
empty_ids = random.sample(range(rows), empty_nb)
for i in empty_ids:
vectors[i] = {}
if sparse_format == "coo":
vectors = [
{"indices": list(x.keys()), "values": list(x.values())} for x in vectors

View File

@ -3185,7 +3185,7 @@ def gen_fp16_vectors(num, dim):
return raw_vectors, fp16_vectors
def gen_sparse_vectors(nb, dim=1000, sparse_format="dok"):
def gen_sparse_vectors(nb, dim=1000, sparse_format="dok", empty_percentage=0):
# default sparse format is dok, dict of keys
# another option is coo, coordinate List
@ -3193,6 +3193,11 @@ def gen_sparse_vectors(nb, dim=1000, sparse_format="dok"):
vectors = [{
d: rng.random() for d in list(set(random.sample(range(dim), random.randint(20, 30)) + [0, 1]))
} for _ in range(nb)]
if empty_percentage > 0:
empty_nb = int(nb * empty_percentage / 100)
empty_ids = random.sample(range(nb), empty_nb)
for i in empty_ids:
vectors[i] = {}
if sparse_format == "coo":
vectors = [
{"indices": list(x.keys()), "values": list(x.values())} for x in vectors

View File

@ -47,6 +47,7 @@ def pytest_addoption(parser):
parser.addoption('--field_name', action='store', default="field_name", help="field_name of index")
parser.addoption('--replica_num', action='store', default=ct.default_replica_num, help="memory replica number")
parser.addoption('--minio_host', action='store', default="localhost", help="minio service's ip")
parser.addoption('--minio_bucket', action='store', default="milvus-bucket", help="minio bucket name")
parser.addoption('--uri', action='store', default="", help="uri for high level api")
parser.addoption('--token', action='store', default="", help="token for high level api")
parser.addoption("--request_duration", action="store", default="10m", help="request_duration")
@ -188,6 +189,10 @@ def field_name(request):
def minio_host(request):
return request.config.getoption("--minio_host")
@pytest.fixture
def minio_bucket(request):
return request.config.getoption("--minio_bucket")
@pytest.fixture
def uri(request):
@ -198,6 +203,7 @@ def uri(request):
def token(request):
return request.config.getoption("--token")
@pytest.fixture
def request_duration(request):
return request.config.getoption("--request_duration")

View File

@ -51,16 +51,11 @@ def entity_suffix(entities):
class TestcaseBaseBulkInsert(TestcaseBase):
@pytest.fixture(scope="function", autouse=True)
def init_minio_client(self, minio_host):
def init_minio_client(self, minio_host, minio_bucket):
Path("/tmp/bulk_insert_data").mkdir(parents=True, exist_ok=True)
self._connect()
self.milvus_sys = MilvusSys(alias='default')
ms = MilvusSys()
minio_port = "9000"
self.minio_endpoint = f"{minio_host}:{minio_port}"
self.bucket_name = ms.index_nodes[0]["infos"]["system_configurations"][
"minio_bucket_name"
]
self.bucket_name = minio_bucket
class TestBulkInsert(TestcaseBaseBulkInsert):