milvus/tests/python_client/testcases/test_issues.py

117 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from utils.util_pymilvus import *
from common.common_type import CaseLabel, CheckTasks
from common import common_type as ct
from common import common_func as cf
from utils.util_log import test_log as log
from base.client_base import TestcaseBase
import random
import pytest
class TestIssues(TestcaseBase):
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name])
@pytest.mark.parametrize("use_upsert", [True, False])
def test_issue_30607(self, par_key_field, use_upsert):
"""
Method
1. create a collection with partition key on collection schema with customized num_partitions
2. randomly check 200 entities
2. verify partition key values are hashed into correct partitions
"""
self._connect()
pk_field = cf.gen_string_field(name='pk', is_primary=True)
int64_field = cf.gen_int64_field()
string_field = cf.gen_string_field()
vector_field = cf.gen_float_vec_field()
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
auto_id=False, partition_key_field=par_key_field)
c_name = cf.gen_unique_str("par_key")
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=9)
# insert
nb = 500
string_prefix = cf.gen_str_by_length(length=6)
entities_per_parkey = 20
for n in range(entities_per_parkey):
pk_values = [str(i) for i in range(n * nb, (n+1)*nb)]
int64_values = [i for i in range(0, nb)]
string_values = [string_prefix + str(i) for i in range(0, nb)]
float_vec_values = gen_vectors(nb, ct.default_dim)
data = [pk_values, int64_values, string_values, float_vec_values]
if use_upsert:
collection_w.upsert(data)
else:
collection_w.insert(data)
# flush
collection_w.flush()
num_entities = collection_w.num_entities
# build index
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
for index_on_par_key_field in [False, True]:
collection_w.release()
if index_on_par_key_field:
collection_w.create_index(field_name=par_key_field, index_params={})
# load
collection_w.load()
# verify the partition key values are bashed correctly
seeds = 200
rand_ids = random.sample(range(0, num_entities), seeds)
rand_ids = [str(rand_ids[i]) for i in range(len(rand_ids))]
res, _ = collection_w.query(expr=f"pk in {rand_ids}", output_fields=["pk", par_key_field])
# verify every the random id exists
assert len(res) == len(rand_ids)
dirty_count = 0
for i in range(len(res)):
pk = res[i].get("pk")
parkey_value = res[i].get(par_key_field)
res_parkey, _ = collection_w.query(expr=f"{par_key_field}=={parkey_value} and pk=='{pk}'",
output_fields=["pk", par_key_field])
if len(res_parkey) != 1:
log.info(f"dirty data found: pk {pk} with parkey {parkey_value}")
dirty_count += 1
assert dirty_count == 0
log.info(f"check randomly {seeds}/{num_entities}, dirty count={dirty_count}")
@pytest.mark.tags(CaseLabel.L2)
def test_issue_32294(self):
"""
Method
1. create a collection with partition key on collection schema with customized num_partitions
2. randomly check 200 entities
2. verify partition key values are hashed into correct partitions
"""
self._connect()
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
string_field = cf.gen_string_field(name="metadata")
vector_field = cf.gen_float_vec_field()
schema = cf.gen_collection_schema(fields=[pk_field, string_field, vector_field], auto_id=True)
collection_w = self.init_collection_wrap(schema=schema)
# insert
nb = 500
string_values = [str(i) for i in range(0, nb)]
float_vec_values = gen_vectors(nb, ct.default_dim)
string_values[0] = ('{\n'
'"Header 1": "Foo1?", \n'
'"document_category": "acme", \n'
'"type": "passage"\n'
'}')
string_values[1] = '{"Header 1": "Foo1?", "document_category": "acme", "type": "passage"}'
data = [string_values, float_vec_values]
collection_w.insert(data)
collection_w.create_index(field_name=ct.default_float_vec_field_name, index_params=ct.default_index)
collection_w.load()
expr = "metadata like '%passage%'"
collection_w.search(float_vec_values[-2:], ct.default_float_vec_field_name, {},
ct.default_limit, expr, output_fields=["metadata"],
check_task=CheckTasks.check_search_results,
check_items={"nq": 2,
"limit": 2})