mirror of https://github.com/milvus-io/milvus.git
117 lines
5.2 KiB
Python
117 lines
5.2 KiB
Python
from utils.util_pymilvus import *
|
||
from common.common_type import CaseLabel, CheckTasks
|
||
from common import common_type as ct
|
||
from common import common_func as cf
|
||
from utils.util_log import test_log as log
|
||
from base.client_base import TestcaseBase
|
||
import random
|
||
import pytest
|
||
|
||
|
||
class TestIssues(TestcaseBase):
|
||
|
||
@pytest.mark.tags(CaseLabel.L0)
|
||
@pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name])
|
||
@pytest.mark.parametrize("use_upsert", [True, False])
|
||
def test_issue_30607(self, par_key_field, use_upsert):
|
||
"""
|
||
Method:
|
||
1. create a collection with partition key on collection schema with customized num_partitions
|
||
2. randomly check 200 entities
|
||
2. verify partition key values are hashed into correct partitions
|
||
"""
|
||
self._connect()
|
||
pk_field = cf.gen_string_field(name='pk', is_primary=True)
|
||
int64_field = cf.gen_int64_field()
|
||
string_field = cf.gen_string_field()
|
||
vector_field = cf.gen_float_vec_field()
|
||
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
||
auto_id=False, partition_key_field=par_key_field)
|
||
c_name = cf.gen_unique_str("par_key")
|
||
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=9)
|
||
|
||
# insert
|
||
nb = 500
|
||
string_prefix = cf.gen_str_by_length(length=6)
|
||
entities_per_parkey = 20
|
||
for n in range(entities_per_parkey):
|
||
pk_values = [str(i) for i in range(n * nb, (n+1)*nb)]
|
||
int64_values = [i for i in range(0, nb)]
|
||
string_values = [string_prefix + str(i) for i in range(0, nb)]
|
||
float_vec_values = gen_vectors(nb, ct.default_dim)
|
||
data = [pk_values, int64_values, string_values, float_vec_values]
|
||
if use_upsert:
|
||
collection_w.upsert(data)
|
||
else:
|
||
collection_w.insert(data)
|
||
|
||
# flush
|
||
collection_w.flush()
|
||
num_entities = collection_w.num_entities
|
||
# build index
|
||
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
||
|
||
for index_on_par_key_field in [False, True]:
|
||
collection_w.release()
|
||
if index_on_par_key_field:
|
||
collection_w.create_index(field_name=par_key_field, index_params={})
|
||
# load
|
||
collection_w.load()
|
||
|
||
# verify the partition key values are bashed correctly
|
||
seeds = 200
|
||
rand_ids = random.sample(range(0, num_entities), seeds)
|
||
rand_ids = [str(rand_ids[i]) for i in range(len(rand_ids))]
|
||
res, _ = collection_w.query(expr=f"pk in {rand_ids}", output_fields=["pk", par_key_field])
|
||
# verify every the random id exists
|
||
assert len(res) == len(rand_ids)
|
||
|
||
dirty_count = 0
|
||
for i in range(len(res)):
|
||
pk = res[i].get("pk")
|
||
parkey_value = res[i].get(par_key_field)
|
||
res_parkey, _ = collection_w.query(expr=f"{par_key_field}=={parkey_value} and pk=='{pk}'",
|
||
output_fields=["pk", par_key_field])
|
||
if len(res_parkey) != 1:
|
||
log.info(f"dirty data found: pk {pk} with parkey {parkey_value}")
|
||
dirty_count += 1
|
||
assert dirty_count == 0
|
||
log.info(f"check randomly {seeds}/{num_entities}, dirty count={dirty_count}")
|
||
|
||
@pytest.mark.tags(CaseLabel.L2)
|
||
def test_issue_32294(self):
|
||
"""
|
||
Method:
|
||
1. create a collection with partition key on collection schema with customized num_partitions
|
||
2. randomly check 200 entities
|
||
2. verify partition key values are hashed into correct partitions
|
||
"""
|
||
self._connect()
|
||
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
||
string_field = cf.gen_string_field(name="metadata")
|
||
vector_field = cf.gen_float_vec_field()
|
||
schema = cf.gen_collection_schema(fields=[pk_field, string_field, vector_field], auto_id=True)
|
||
collection_w = self.init_collection_wrap(schema=schema)
|
||
|
||
# insert
|
||
nb = 500
|
||
string_values = [str(i) for i in range(0, nb)]
|
||
float_vec_values = gen_vectors(nb, ct.default_dim)
|
||
string_values[0] = ('{\n'
|
||
'"Header 1": "Foo1?", \n'
|
||
'"document_category": "acme", \n'
|
||
'"type": "passage"\n'
|
||
'}')
|
||
string_values[1] = '{"Header 1": "Foo1?", "document_category": "acme", "type": "passage"}'
|
||
data = [string_values, float_vec_values]
|
||
collection_w.insert(data)
|
||
collection_w.create_index(field_name=ct.default_float_vec_field_name, index_params=ct.default_index)
|
||
collection_w.load()
|
||
|
||
expr = "metadata like '%passage%'"
|
||
collection_w.search(float_vec_values[-2:], ct.default_float_vec_field_name, {},
|
||
ct.default_limit, expr, output_fields=["metadata"],
|
||
check_task=CheckTasks.check_search_results,
|
||
check_items={"nq": 2,
|
||
"limit": 2})
|