milvus/tests/python_client/testcases/test_high_level_api.py

338 lines
16 KiB
Python

import pytest
from base.client_v2_base import TestMilvusClientV2Base
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel, CheckTasks
from utils.util_pymilvus import *
from common.constants import *
prefix = "high_level_api"
epsilon = ct.epsilon
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_search_string_exp = "varchar >= \"0\""
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
default_invaild_string_exp = "varchar >= 0"
default_json_search_exp = "json_field[\"number\"] >= 0"
perfix_expr = 'varchar like "0%"'
default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params
default_primary_key_field_name = "id"
default_vector_field_name = "vector"
default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name
default_int32_array_field_name = ct.default_int32_array_field_name
default_string_array_field_name = ct.default_string_array_field_name
class TestHighLevelApi(TestMilvusClientV2Base):
""" Test case of search interface """
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=["COSINE", "L2"])
def metric_type(self, request):
yield request.param
"""
******************************************************************
# The following are invalid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.xfail(reason="pymilvus issue 1554")
def test_high_level_collection_invalid_primary_field(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
error = {ct.err_code: 1, ct.err_msg: f"Param id_type must be int or string"}
self.create_collection(client, collection_name, default_dim, consistency_level="Strong",
id_type="invalid", check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_high_level_collection_string_auto_id(self):
"""
target: test high level api: client.create_collection
method: create collection with auto id on string primary key
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
error = {ct.err_code: 65535, ct.err_msg: f"type param(max_length) should be specified for varChar "
f"field of collection {collection_name}"}
self.create_collection(client, collection_name, default_dim, id_type="string", auto_id=True,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_high_level_create_same_collection_different_params(self):
"""
target: test high level api: client.create_collection
method: create
expected: 1. Successfully to create collection with same params
2. Report errors for creating collection with same name and different params
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. create collection with same params
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 3. create collection with same name and different params
error = {ct.err_code: 1, ct.err_msg: f"create duplicate collection with different parameters, "
f"collection: {collection_name}"}
self.create_collection(client, collection_name, default_dim + 1, consistency_level="Strong",
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
def test_high_level_collection_invalid_metric_type(self):
"""
target: test high level api: client.create_collection
method: create collection with auto id on string primary key
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
error = {ct.err_code: 65535,
ct.err_msg: "float vector index does not support metric type: invalid: invalid parameter"}
self.create_collection(client, collection_name, default_dim, metric_type="invalid",
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("https://github.com/milvus-io/milvus/issues/29880")
def test_high_level_search_not_consistent_metric_type(self, metric_type):
"""
target: test search with inconsistent metric type (default is IP) with that of index
method: create connection, collection, insert and search with not consistent metric type
expected: Raise exception
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, 8))
search_params = {"metric_type": metric_type}
error = {ct.err_code: 1100,
ct.err_msg: f"metric type not match: invalid parameter[expected=IP][actual={metric_type}]"}
self.search(client, collection_name, vectors_to_search, limit=default_limit,
search_params=search_params,
check_task=CheckTasks.err_res, check_items=error)
self.drop_collection(client, collection_name)
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
def test_high_level_search_query_default(self):
"""
target: test search (high level api) normal case
method: create connection, collection, insert and search
expected: search/query successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
collections = self.list_collections(client)[0]
assert collection_name in collections
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim, "consistency_level": 0})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 3. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
# 4. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"primary_field": default_primary_key_field_name})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_high_level_array_insert_search(self):
"""
target: test search (high level api) normal case
method: create connection, collection, insert and search
expected: search/query successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
collections = self.list_collections(client)[0]
assert collection_name in collections
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{
default_primary_key_field_name: i,
default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0,
default_int32_array_field_name: [i, i + 1, i + 2],
default_string_array_field_name: [str(i), str(i + 1), str(i + 2)]
} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 3. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip(reason="issue 25110")
def test_high_level_search_query_string(self):
"""
target: test search (high level api) for string primary key
method: create connection, collection, insert and search
expected: search/query successfully
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, id_type="string",
max_length=ct.default_length, consistency_level="Strong")
self.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [
{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
# 3. search
vectors_to_search = rng.random((1, default_dim))
self.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"limit": default_limit})
# 4. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"primary_field": default_primary_key_field_name})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
def test_high_level_search_different_metric_types(self, metric_type, auto_id):
"""
target: test search (high level api) normal case
method: create connection, collection, insert and search
expected: search successfully with limit(topK)
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, metric_type=metric_type,
auto_id=auto_id, consistency_level="Strong")
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
if auto_id:
for row in rows:
row.pop(default_primary_key_field_name)
self.insert(client, collection_name, rows)
# 3. search
vectors_to_search = rng.random((1, default_dim))
search_params = {"metric_type": metric_type}
self.search(client, collection_name, vectors_to_search, limit=default_limit,
search_params=search_params,
output_fields=[default_primary_key_field_name],
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"limit": default_limit})
self.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_high_level_delete(self):
"""
target: test delete (high level api)
method: create connection, collection, insert delete, and search
expected: search/query successfully without deleted data
"""
client = self._client()
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
self.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
default_nb = 1000
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
self.insert(client, collection_name, rows)
pks = [i for i in range(default_nb)]
# 3. get first primary key
first_pk_data = self.get(client, collection_name, ids=pks[0:1])
# 4. delete
delete_num = 3
self.delete(client, collection_name, ids=pks[0:delete_num])
# 5. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
for insert_id in pks[0:delete_num]:
if insert_id in insert_ids:
insert_ids.remove(insert_id)
limit = default_nb - delete_num
self.search(client, collection_name, vectors_to_search, limit=default_nb,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": limit})
# 6. query
self.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows[delete_num:],
"with_vec": True,
"primary_field": default_primary_key_field_name})
self.drop_collection(client, collection_name)